Re: [PATCH] net: switchdev: don't abort unsupported operations

2015-07-11 Thread David Miller
From: Vivien Didelot 
Date: Fri, 10 Jul 2015 19:48:58 -0400

> There is no need to abort attribute setting or object addition, if the
> prepare phase returned operation not supported.
> 
> Thus, abort these two transactions only if the error is not -EOPNOTSUPP.
> 
> Signed-off-by: Vivien Didelot 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] mpt2sas: Refcount fw_events and fix unsafe list usage

2015-07-11 Thread Calvin Owens
The fw_event_work struct is concurrently referenced at shutdown, so
add a refcount to protect it, and refactor the code to use it.

Additionally, refactor _scsih_fw_event_cleanup_queue() such that it
no longer iterates over the list without holding the lock, since
_firmware_event_work() concurrently deletes items from the list.

Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Signed-off-by: Calvin Owens 
---
 drivers/scsi/mpt2sas/mpt2sas_scsih.c | 101 ---
 1 file changed, 81 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index fad80ce..8b267af 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -176,9 +176,37 @@ struct fw_event_work {
u8  VP_ID;
u8  ignore;
u16 event;
+   struct kref refcount;
charevent_data[0] __aligned(4);
 };
 
+static void fw_event_work_free(struct kref *r)
+{
+   kfree(container_of(r, struct fw_event_work, refcount));
+}
+
+static void fw_event_work_get(struct fw_event_work *fw_work)
+{
+   kref_get(_work->refcount);
+}
+
+static void fw_event_work_put(struct fw_event_work *fw_work)
+{
+   kref_put(_work->refcount, fw_event_work_free);
+}
+
+static struct fw_event_work *alloc_fw_event_work(int len)
+{
+   struct fw_event_work *fw_event;
+
+   fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC);
+   if (!fw_event)
+   return NULL;
+
+   kref_init(_event->refcount);
+   return fw_event;
+}
+
 /* raid transport support */
 static struct raid_template *mpt2sas_raid_template;
 
@@ -2844,36 +2872,39 @@ _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct 
fw_event_work *fw_event)
return;
 
spin_lock_irqsave(>fw_event_lock, flags);
+   fw_event_work_get(fw_event);
list_add_tail(_event->list, >fw_event_list);
INIT_DELAYED_WORK(_event->delayed_work, _firmware_event_work);
+   fw_event_work_get(fw_event);
queue_delayed_work(ioc->firmware_event_thread,
_event->delayed_work, 0);
spin_unlock_irqrestore(>fw_event_lock, flags);
 }
 
 /**
- * _scsih_fw_event_free - delete fw_event
+ * _scsih_fw_event_del_from_list - delete fw_event from the list
  * @ioc: per adapter object
  * @fw_event: object describing the event
  * Context: This function will acquire ioc->fw_event_lock.
  *
- * This removes firmware event object from link list, frees associated memory.
+ * If the fw_event is on the fw_event_list, remove it and do a put.
  *
  * Return nothing.
  */
 static void
-_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
+_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
 *fw_event)
 {
unsigned long flags;
 
spin_lock_irqsave(>fw_event_lock, flags);
-   list_del(_event->list);
-   kfree(fw_event);
+   if (!list_empty(_event->list)) {
+   list_del_init(_event->list);
+   fw_event_work_put(fw_event);
+   }
spin_unlock_irqrestore(>fw_event_lock, flags);
 }
 
-
 /**
  * _scsih_error_recovery_delete_devices - remove devices not responding
  * @ioc: per adapter object
@@ -2888,13 +2919,14 @@ _scsih_error_recovery_delete_devices(struct 
MPT2SAS_ADAPTER *ioc)
if (ioc->is_driver_loading)
return;
 
-   fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+   fw_event = alloc_fw_event_work(0);
if (!fw_event)
return;
 
fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES;
fw_event->ioc = ioc;
_scsih_fw_event_add(ioc, fw_event);
+   fw_event_work_put(fw_event);
 }
 
 /**
@@ -2908,12 +2940,29 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER 
*ioc)
 {
struct fw_event_work *fw_event;
 
-   fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+   fw_event = alloc_fw_event_work(0);
if (!fw_event)
return;
fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE;
fw_event->ioc = ioc;
_scsih_fw_event_add(ioc, fw_event);
+   fw_event_work_put(fw_event);
+}
+
+static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc)
+{
+   unsigned long flags;
+   struct fw_event_work *fw_event = NULL;
+
+   spin_lock_irqsave(>fw_event_lock, flags);
+   if (!list_empty(>fw_event_list)) {
+   fw_event = list_first_entry(>fw_event_list,
+   struct fw_event_work, list);
+   list_del_init(_event->list);
+   }
+   spin_unlock_irqrestore(>fw_event_lock, flags);
+
+   return fw_event;
 }
 
 /**
@@ -2928,17 +2977,25 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER 
*ioc)
 static void
 _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc)
 {
-   struct 

[PATCH 0/2 v2] Fixes for memory corruption in mpt2sas

2015-07-11 Thread Calvin Owens
Hello all,

This patchset attempts to address problems we've been having with
panics due to memory corruption from the mpt2sas driver.

Thanks,
Calvin

Patches in this series:
[PATCH 1/2] mpt2sas: Refcount sas_device objects and fix unsafe list usage
[PATCH 2/2] mpt2sas: Refcount fw_events and fix unsafe list usage

Changes since v1:
* Squished patches 1-3 and 4-6 into two patches
* s/BUG_ON(!spin_is_locked/assert_spin_locked/g
* Use more succinct fuction names
* Store a pointer to the sas_device object in ->hostdata to eliminate
  the need for several lookups on the lists.
* Remove the fw_event from fw_event_list at the start of
  _firmware_event_work()
* Explicitly separate fw_event_list removal from fw_event freeing

Total diffstat:

 drivers/scsi/mpt2sas/mpt2sas_base.h  |  22 +-
 drivers/scsi/mpt2sas/mpt2sas_scsih.c | 535 +--
 drivers/scsi/mpt2sas/mpt2sas_transport.c |  12 +-
 3 files changed, 396 insertions(+), 173 deletions(-)

Diff showing changes v1 => v2:
http://jcalvinowens.github.io/stuff/mpt2sas-patchset-v1v2.patch
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] mpt2sas: Refcount sas_device objects and fix unsafe list usage

2015-07-11 Thread Calvin Owens
These objects can be referenced concurrently throughout the driver, we
need a way to make sure threads can't delete them out from under each
other. This patch adds the refcount, and refactors the code to use it.

Additionally, we cannot iterate over the sas_device_list without
holding the lock, or we risk corrupting random memory if items are
added or deleted as we iterate. This patch refactors _scsih_probe_sas()
to use the sas_device_list in a safe way.

Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Signed-off-by: Calvin Owens 
---
 drivers/scsi/mpt2sas/mpt2sas_base.h  |  22 +-
 drivers/scsi/mpt2sas/mpt2sas_scsih.c | 434 ---
 drivers/scsi/mpt2sas/mpt2sas_transport.c |  12 +-
 3 files changed, 315 insertions(+), 153 deletions(-)

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h 
b/drivers/scsi/mpt2sas/mpt2sas_base.h
index caff8d1..78f41ac 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -238,6 +238,7 @@
  * @flags: MPT_TARGET_FLAGS_XXX flags
  * @deleted: target flaged for deletion
  * @tm_busy: target is busy with TM request.
+ * @sdev: The sas_device associated with this target
  */
 struct MPT2SAS_TARGET {
struct scsi_target *starget;
@@ -248,6 +249,7 @@ struct MPT2SAS_TARGET {
u32 flags;
u8  deleted;
u8  tm_busy;
+   struct _sas_device *sdev;
 };
 
 
@@ -376,8 +378,24 @@ struct _sas_device {
u8  phy;
u8  responding;
u8  pfa_led_on;
+   struct kref refcount;
 };
 
+static inline void sas_device_get(struct _sas_device *s)
+{
+   kref_get(>refcount);
+}
+
+static inline void sas_device_free(struct kref *r)
+{
+   kfree(container_of(r, struct _sas_device, refcount));
+}
+
+static inline void sas_device_put(struct _sas_device *s)
+{
+   kref_put(>refcount, sas_device_free);
+}
+
 /**
  * struct _raid_device - raid volume link list
  * @list: sas device list
@@ -1095,7 +1113,9 @@ struct _sas_node 
*mpt2sas_scsih_expander_find_by_handle(struct MPT2SAS_ADAPTER *
 u16 handle);
 struct _sas_node *mpt2sas_scsih_expander_find_by_sas_address(struct 
MPT2SAS_ADAPTER
 *ioc, u64 sas_address);
-struct _sas_device *mpt2sas_scsih_sas_device_find_by_sas_address(
+struct _sas_device *mpt2sas_get_sdev_by_addr(
+struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
+struct _sas_device *__mpt2sas_get_sdev_by_addr(
 struct MPT2SAS_ADAPTER *ioc, u64 sas_address);
 
 void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 3f26147..fad80ce 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -526,8 +526,43 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc,
}
 }
 
+struct _sas_device *
+__mpt2sas_get_sdev_from_target(struct MPT2SAS_TARGET *tgt_priv)
+{
+   struct _sas_device *ret;
+
+   ret = tgt_priv->sdev;
+   if (ret)
+   sas_device_get(ret);
+
+   return ret;
+}
+
+struct _sas_device *
+__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
+u64 sas_address)
+{
+   struct _sas_device *sas_device;
+
+   assert_spin_locked(>sas_device_lock);
+
+   list_for_each_entry(sas_device, >sas_device_list, list)
+   if (sas_device->sas_address == sas_address)
+   goto found_device;
+
+   list_for_each_entry(sas_device, >sas_device_init_list, list)
+   if (sas_device->sas_address == sas_address)
+   goto found_device;
+
+   return NULL;
+
+found_device:
+   sas_device_get(sas_device);
+   return sas_device;
+}
+
 /**
- * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search
+ * mpt2sas_get_sdev_by_addr - sas device search
  * @ioc: per adapter object
  * @sas_address: sas address
  * Context: Calling function should acquire ioc->sas_device_lock
@@ -536,24 +571,44 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc,
  * object.
  */
 struct _sas_device *
-mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc,
+mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc,
 u64 sas_address)
 {
struct _sas_device *sas_device;
+   unsigned long flags;
+
+   spin_lock_irqsave(>sas_device_lock, flags);
+   sas_device = __mpt2sas_get_sdev_by_addr(ioc,
+   sas_address);
+   spin_unlock_irqrestore(>sas_device_lock, flags);
+
+   return sas_device;
+}
+
+static struct _sas_device *
+__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
+{
+   struct _sas_device *sas_device;
+
+   assert_spin_locked(>sas_device_lock);
 
list_for_each_entry(sas_device, >sas_device_list, list)
-   if (sas_device->sas_address == sas_address)
-   return sas_device;
+   if (sas_device->handle == handle)
+   goto found_device;
 
 

Re: [PATCH 6/6] Fix unsafe fw_event_list usage

2015-07-11 Thread Calvin Owens
On Friday 07/03 at 09:02 -0700, Christoph Hellwig wrote:
> On Mon, Jun 08, 2015 at 08:50:56PM -0700, Calvin Owens wrote:
> > Since the fw_event deletes itself from the list, cleanup_queue() can
> > walk onto garbage pointers or walk off into freed memory.
> > 
> > This refactors the code in _scsih_fw_event_cleanup_queue() to not
> > iterate over the fw_event_list without a lock. 
> 
> I think this really should be folded into the previous one, with the
> fixes in this one the other refcounting change don't make a whole lot
> sense.
> 
> > +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER 
> > *ioc)
> > +{
> > +   unsigned long flags;
> > +   struct fw_event_work *fw_event = NULL;
> > +
> > +   spin_lock_irqsave(>fw_event_lock, flags);
> > +   if (!list_empty(>fw_event_list)) {
> > +   fw_event = list_first_entry(>fw_event_list,
> > +   struct fw_event_work, list);
> > +   list_del_init(_event->list);
> > +   fw_event_work_get(fw_event);
> > +   }
> > +   spin_unlock_irqrestore(>fw_event_lock, flags);
> > +
> > +   return fw_event;
> 
> Shouldn't we have a reference for each item on the list that gets
> transfer to whomever removes it from the list?

Yes, this was a bit weird the way I did it. I redid this in v2, hopefully
it's clearer.

> Additionally _firmware_event_work should call dequeue_next_fw_event
> first in the function so that item is off the list before we process
> it, and can then just drop the reference once it's done.

That works: cleanup_queue() won't wait on some already-running events, but
destroy_workqueue() drains the wq, so we won't run ahead and free things
from under the fw_event when unwinding.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/6] Refactor code to use new sas_device refcount

2015-07-11 Thread Calvin Owens
On Friday 07/03 at 08:38 -0700, Christoph Hellwig wrote:
> >  
> > +struct _sas_device *
> > +mpt2sas_scsih_sas_device_get_by_sas_address_nolock(struct MPT2SAS_ADAPTER 
> > *ioc,
> > +u64 sas_address)
> 
> Any chance to use a shorter name for this function? E.g.
> __mpt2sas_get_sdev_by_addr ?

Will do.

> > +{
> > +   struct _sas_device *sas_device;
> > +
> > +   BUG_ON(!spin_is_locked(>sas_device_lock));
> 
> This will blow on UP builds.  Please use assert_spin_locked or
> lockdep_assert_held instead.  And don't ask me which of the two,
> that's a mystery I don't understand myself either.

Will do.

> >  struct _sas_device *
> > -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc,
> > +mpt2sas_scsih_sas_device_get_by_sas_address(struct MPT2SAS_ADAPTER *ioc,
> >  u64 sas_address)
> >  {
> 
> > +static struct _sas_device *
> > +_scsih_sas_device_get_by_handle_nolock(struct MPT2SAS_ADAPTER *ioc, u16 
> > handle)
> 
> >  static struct _sas_device *
> > -_scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
> > +_scsih_sas_device_get_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle)
> 
> Same comments about the function names as above.
> 
> > +   struct _sas_device *sas_device;
> > +
> > +   BUG_ON(!spin_is_locked(>sas_device_lock));
> 
> Same comment about the right assert helpers as above.
> 
> > @@ -594,9 +634,15 @@ _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc,
> > if (!sas_device)
> > return;
> >  
> > +   /*
> > +* The lock serializes access to the list, but we still need to verify
> > +* that nobody removed the entry while we were waiting on the lock.
> > +*/
> > spin_lock_irqsave(>sas_device_lock, flags);
> > -   list_del(_device->list);
> > -   kfree(sas_device);
> > +   if (!list_empty(_device->list)) {
> > +   list_del_init(_device->list);
> > +   sas_device_put(sas_device);
> > +   }
> > spin_unlock_irqrestore(>sas_device_lock, flags);
> 
> This looks odd to me.  Normally you'd have the lock from the list
> iteration that finds the device.  From looking at the code it seems
> like this only called from probe failure paths, though.  It seems like
> for this case the device simplify shouldn't be added until the probe
> succeeds and this function should go away?

There's a horrible maze of dependencies on things being on the lists
while being added that make this impossible: I spent some time trying
to get this to work, but I always end up with no drives. :(

(The path through _scsih_probe_sas() seems not to care)

I was hopeful your suggestion below about putting the sas_device
pointer in ->hostdata would eliminate the need for all the find_by_X()
lookups, but some won't go away.

> > @@ -1208,12 +1256,15 @@ _scsih_change_queue_depth(struct scsi_device *sdev, 
> > int qdepth)
> > goto not_sata;
> > if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))
> > goto not_sata;
> > +
> > spin_lock_irqsave(>sas_device_lock, flags);
> > -   sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
> > +   sas_device = mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc,
> >sas_device_priv_data->sas_target->sas_address);
> > -   if (sas_device && sas_device->device_info &
> > -   MPI2_SAS_DEVICE_INFO_SATA_DEVICE)
> > +   if (sas_device && sas_device->device_info
> > +   & MPI2_SAS_DEVICE_INFO_SATA_DEVICE) {
> > max_depth = MPT2SAS_SATA_QUEUE_DEPTH;
> > +   sas_device_put(sas_device);
> > +   }
> 
> Please store a pointer to the sas_device in struct scsi_target ->hostdata
> in _scsih_target_alloc and avoid the need for this and other runtime
> lookups where we have a scsi_device or scsi_target structure available.

Will do.

> > @@ -1324,13 +1377,15 @@ _scsih_target_destroy(struct scsi_target *starget)
> >  
> > spin_lock_irqsave(>sas_device_lock, flags);
> > rphy = dev_to_rphy(starget->dev.parent);
> > -   sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
> > +   sas_device = mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc,
> >rphy->identify.sas_address);
> > if (sas_device && (sas_device->starget == starget) &&
> > (sas_device->id == starget->id) &&
> > (sas_device->channel == starget->channel))
> > sas_device->starget = NULL;
> >  
> > +   if (sas_device)
> > +   sas_device_put(sas_device);
> > spin_unlock_irqrestore(>sas_device_lock, flags);
> 
> .. like this one.
> 
> >   out:
> > @@ -1386,7 +1441,7 @@ _scsih_slave_alloc(struct scsi_device *sdev)
> >  
> > if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) {
> > spin_lock_irqsave(>sas_device_lock, flags);
> > -   sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc,
> > +   sas_device = 
> > mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc,
> > sas_target_priv_data->sas_address);
> >  

Re: [PATCH TRIVIAL] README: GTK+ is a acronym

2015-07-11 Thread Diego Viola
Thanks, that should have been a "an acronym" instead of "a acronym"

Typo in my commit message. :-(

Diego

On Fri, Jul 10, 2015 at 6:17 PM, Jonathan Corbet  wrote:
> On Mon,  6 Jul 2015 14:33:21 -0300
> Diego Viola  wrote:
>
>> - "make gconfig" X windows (Gtk) based configuration tool.
>> + "make gconfig" X windows (GTK+) based configuration tool.
>
> Applied to the docs tree, thanks.
>
> jon
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] Refactor code to use new fw_event refcount

2015-07-11 Thread Calvin Owens
Thanks for this, I'm sending a v2 shortly.

On Friday 07/03 at 09:00 -0700, Christoph Hellwig wrote:
> On Mon, Jun 08, 2015 at 08:50:55PM -0700, Calvin Owens wrote:
> > This refactors the fw_event code to use the new refcount.
> 
> I spent some time looking over this code because it's so convoluted.
> In general I think code should either embeed one work_struct (and it
> really doesn't seem to need a delayed work here!) or if needed a list
> and not both like this one.  But it's probably too much work to sort
> all this out, so let's go with your version.

Yeah, I tried to get rid of fw_event_list altogether, since I think what
cleanup_queue() does could be simplified to calling flush_workqueue().

The problem is _scsih_check_topo_delete_events(), which looks at the
list and sometimes marks fw_events as "ignored" so they aren't executed.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] locking/pvqspinlock: Fix kernel panic in locking-selftest

2015-07-11 Thread Masami Hiramatsu
On 2015/07/12 10:19, Waiman Long wrote:
> Enabling locking-selftest in a VM guest may cause the following
> kernel panic:
> 
> kernel BUG at .../kernel/locking/qspinlock_paravirt.h:137!
> 
> This is due to the fact that the pvqspinlock unlock function is
> expecting either a _Q_LOCKED_VAL or _Q_SLOW_VAL in the lock byte. This
> patch prevents that bug report by ignoring it when debug_locks_silent
> is set. Otherwise, a warning will be printed if it contains an
> unexpected value.
> 
> With this patch applied, the kernel locking-selftest completed without
> any noise.
> 

OK, I've tested this with make allmodconfig && make localmodconfig kernel.
(I've hit another issue to boot, but it seems not related to this issue)

Tested-by: Masami Hiramatsu 

Thank you!



> Signed-off-by: Waiman Long 
> ---
>  kernel/locking/qspinlock_paravirt.h |   12 +++-
>  1 files changed, 11 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/locking/qspinlock_paravirt.h 
> b/kernel/locking/qspinlock_paravirt.h
> index 04ab181..15d3733 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -4,6 +4,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Implement paravirt qspinlocks; the general idea is to halt the vcpus 
> instead
> @@ -286,15 +287,24 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
> *lock)
>  {
>   struct __qspinlock *l = (void *)lock;
>   struct pv_node *node;
> + u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
>  
>   /*
>* We must not unlock if SLOW, because in that case we must first
>* unhash. Otherwise it would be possible to have multiple @lock
>* entries, which would be BAD.
>*/
> - if (likely(cmpxchg(>locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
> + if (likely(lockval == _Q_LOCKED_VAL))
>   return;
>  
> + if (unlikely(lockval != _Q_SLOW_VAL)) {
> + if (debug_locks_silent)
> + return;
> + WARN(1, "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
> + (unsigned long)lock, atomic_read(>val));
> + return;
> + }
> +
>   /*
>* Since the above failed to release, this must be the SLOW path.
>* Therefore start by looking up the blocked node and unhashing it.
> 


-- 
Masami HIRAMATSU
Linux Technology Research Center, System Productivity Research Dept.
Center for Technology Innovation - Systems Engineering
Hitachi, Ltd., Research & Development Group
E-mail: masami.hiramatsu...@hitachi.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG][tip/master] kernel panic while locking selftest at qspinlock_paravirt.h:137!

2015-07-11 Thread Waiman Long

On 07/11/2015 01:05 AM, Masami Hiramatsu wrote:

On 2015/07/11 10:27, Waiman Long wrote:

On 07/10/2015 08:32 PM, Masami Hiramatsu wrote:

On 2015/07/10 23:28, Peter Zijlstra wrote:

On Fri, Jul 10, 2015 at 03:57:46PM +0200, Ingo Molnar wrote:

* Peter Zijlstra   wrote:

Do we want to make double unlock non-fatal unconditionally?

No, just don't BUG() out, don't crash the system - generate a warning?

So that would be a yes..

Something like so then? Won't this generate a splat on that locking self
test then? And upset people?

Hmm, yes, this still noisy...
Can't we avoid double-unlock completely? it seems that this warning can
happen randomly, which means pv-spinlock randomly broken, doesn't it?

It shouldn't randomly happen. The message should be printed at the first
instance of double-unlock. If that is not case, there may be some
problem in the code.

Ah, OK. That comes from locking selftest. In that case, do we really
need the warning while selftest, since we know it always fails ?


Anyway, I have an alternative fix that should better capture the problem:

Do we need both Peter's BUG() removing patch and this?



No, you can choose either one. They are just different ways to solve the 
same BUG() problem.


Cheers,
Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] mm/shrinker: make unregister_shrinker() less fragile

2015-07-11 Thread Sergey Senozhatsky
Hello Christoph,

On (07/11/15 03:02), Christoph Hellwig wrote:
> > Shrinker API does not handle nicely unregister_shrinker() on a 
> > not-registered
> > ->shrinker. Looking at shrinker users, they all have to
> > (a) carry on some sort of a flag to make sure that "unregister_shrinker()"
> > will not blow up later
> > (b) be fishy (potentially can Oops)
> > (c) access private members `struct shrinker' (e.g. `shrink.list.next')
> 
> Ayone who does that is broken.  You just need to have clear init (with
> proper unwinding) and exit functions and order things properly.  It
> works like most register/unregister calls and should stay that way.
> 
> Maye you you should ty to explain what practical problem you're seeing
> to start with.

Yes, but the main difference here is that it seems that shrinker users
don't tend to treat shrinker registration failures as fatal errors and
just continue with shrinker functionality disabled. And it makes sense.

(copy paste from https://lkml.org/lkml/2015/7/9/751)

> Ayone who does that is broken

I'm afraid, in that case we almost don't have not-broken shrinker users.


-- ignoring register_shrinker() error

: int ldlm_pools_init(void)
: {
: int rc;
:
: rc = ldlm_pools_thread_start();
: if (rc == 0) {
: register_shrinker(_pools_srv_shrinker);
: register_shrinker(_pools_cli_shrinker);
: }
: return rc;
: }
: EXPORT_SYMBOL(ldlm_pools_init);
:
: void ldlm_pools_fini(void)
: {
: unregister_shrinker(_pools_srv_shrinker);
: unregister_shrinker(_pools_cli_shrinker);
: ldlm_pools_thread_stop();
: }
: EXPORT_SYMBOL(ldlm_pools_fini);


-- and here

:void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
:{
:dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
:dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
:dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
:register_shrinker(_priv->mm.shrinker);
:
:dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
:register_oom_notifier(_priv->mm.oom_notifier);
:}


-- and here

:int __init gfs2_glock_init(void)
:{
:unsigned i;
...
:register_shrinker(_shrinker);
:
:return 0;
:}
:
:void gfs2_glock_exit(void)
:{
:unregister_shrinker(_shrinker);
:destroy_workqueue(glock_workqueue);
:destroy_workqueue(gfs2_delete_workqueue);
:}


-- and here

:static int __init lowmem_init(void)
:{
:register_shrinker(_shrinker);
:return 0;
:}
:
:static void __exit lowmem_exit(void)
:{
:unregister_shrinker(_shrinker);
:}



-- accessing private member 'c->shrink.list.next' to distinguish between
'register_shrinker() was successful and need to unregister it' and
'register_shrinker() failed, don't unregister_shrinker() because it
may Oops'

:struct cache_set {
: ...
:   struct shrinker shrink;
: ...
:};
:
: ...
:
: void bch_btree_cache_free(struct cache_set *c)
: {
: struct btree *b;
: struct closure cl;
: closure_init_stack();
:
: if (c->shrink.list.next)
: unregister_shrinker(>shrink);


-- and here
:int bch_btree_cache_alloc(struct cache_set *c)
:{
...
:register_shrinker(>shrink);
:
:
...
:
:void bch_btree_cache_free(struct cache_set *c)
:{
:struct btree *b;
:struct closure cl;
:closure_init_stack();
:
:if (c->shrink.list.next)
:unregister_shrinker(>shrink);
:


And so on and on.

In fact, 'git grep = register_shrinker' gives only

$ git grep '= register_shrinker'
fs/ext4/extents_status.c:   err = register_shrinker(>s_es_shrinker);
fs/nfsd/nfscache.c: status = register_shrinker(_reply_cache_shrinker);
fs/ubifs/super.c:   err = register_shrinker(_shrinker_info);
mm/huge_memory.c:   err = register_shrinker(_zero_page_shrinker);
mm/workingset.c:ret = register_shrinker(_shadow_shrinker);


The rest is 'broken'.

-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 19/45] clk: mxs: Include clk.h in C files that use it

2015-07-11 Thread Shawn Guo
On Sat, Jul 11, 2015 at 7:33 AM, Stephen Boyd  wrote:
> Clock provider drivers generally shouldn't include clk.h because
> it's the consumer API. The clk.h include is being included in all
> mxs files because it's part of mxs/clk.h even though nothing
> actually requires it in that file. Move the clk.h include to the
> C files that are actually using it and remove the clk.h include
> from the header file. The clkdev.h include isn't used either, so
> drop it too.
>
> Cc: Shawn Guo 
> Signed-off-by: Stephen Boyd 

Acked-by: Shawn Guo 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Subject: [PATCH 1/1] Fix redundant check against unsigned int in broken audit test fix for exec arg len

2015-07-11 Thread Mustapha Abiola
>From 55fae099d46749b73895934aab8c2823c5a23abe Mon Sep 17 00:00:00 2001

From: Mustapha Abiola 

Date: Sat, 11 Jul 2015 17:01:04 +

Subject: [PATCH 1/1] Fix redundant check against unsigned int in broken audit
test fix for exec arg len


Quick patch to fix the needless check of `len` being < 0 as its an

unsigned int.


Signed-off-by: Mustapha Abiola 

---

 kernel/auditsc.c | 2 +-

 1 file changed, 1 insertion(+), 1 deletion(-)


diff --git a/kernel/auditsc.c b/kernel/auditsc.c

index e85bdfd..0012476 100644

--- a/kernel/auditsc.c

+++ b/kernel/auditsc.c

@@ -1021,7 +1021,7 @@ static int audit_log_single_execve_arg(struct
audit_context *context,

  * for strings that are too long, we should not have created

  * any.

  */

- if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) {

+ if (WARN_ON_ONCE(len > MAX_ARG_STRLEN - 1)) {

  send_sig(SIGKILL, current, 0);

  return -1;

  }

-- 

1.9.1


0001-Fix-redundant-check-against-unsigned-int-in-broken-a.patch
Description: Binary data


Re: [PATCH v2] net: dsa: mv88e6xxx: add write access to debugfs regs file

2015-07-11 Thread David Miller
From: Vivien Didelot 
Date: Sat, 11 Jul 2015 14:36:12 -0400 (EDT)

> In the meantime, this is really useful for development. i.e. ensuring a good
> switchdev/DSA interaction without being able to read and write directly the
> hardware VLAN table, is a bit a PITA. A dynamic debugfs looked appropriate.

For "development" you can hack the driver, add tracepoints, or use
another mechanism anyone hacking the kernel (which by definition
someone doing "development" is doing) can do.

I do not buy any of your arguments, and you really miss the grand
opportunity to export the knobs and values in a way which are going
to:

1) Be useful to users

2) Be usable by any similar DSA driver, not just _yours_

So please stop this myopic narrow thinking when you add facilities for
development or export values.  Think of the big picture and long term,
not just your personal perceived immediate needs of today.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] tty: add missing rcu_read_lock for task_pgrp

2015-07-11 Thread Peter Hurley
On 06/29/2015 07:59 PM, Patrick Donnelly wrote:
> task_pgrp requires an rcu or tasklist lock to be obtained if the returned pid
> is to be dereferenced, which kill_pgrp does. Obtain an RCU lock for the
> duration of use.
> 
> Signed-off-by: Patrick Donnelly 
> ---
>  drivers/tty/n_tty.c  | 12 ++--
>  drivers/tty/tty_io.c | 17 -
>  2 files changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
> index c9c27f6..0d631f8 100644
> --- a/drivers/tty/n_tty.c
> +++ b/drivers/tty/n_tty.c
> @@ -2137,6 +2137,8 @@ extern ssize_t redirected_tty_write(struct file *, 
> const char __user *,
>  
>  static int job_control(struct tty_struct *tty, struct file *file)
>  {
> + struct pid *pgrp;
> +
>   /* Job control check -- must be done at start and after
>  every sleep (POSIX.1 7.1.1.4). */
>   /* NOTE: not yet done after every sleep pending a thorough
> @@ -2146,18 +2148,24 @@ static int job_control(struct tty_struct *tty, struct 
> file *file)
>   current->signal->tty != tty)
>   return 0;
>  
> + rcu_read_lock();
> + pgrp = task_pgrp(current);
> +
>   spin_lock_irq(>ctrl_lock);
> +
>   if (!tty->pgrp)
>   printk(KERN_ERR "n_tty_read: no tty->pgrp!\n");
> - else if (task_pgrp(current) != tty->pgrp) {
> + else if (pgrp != tty->pgrp) {
>   spin_unlock_irq(>ctrl_lock);
>   if (is_ignored(SIGTTIN) || is_current_pgrp_orphaned())
>   return -EIO;

I just realized there's a missing rcu_read_unlock() from this early return.

Regards,
Peter Hurley


> - kill_pgrp(task_pgrp(current), SIGTTIN, 1);
> + kill_pgrp(pgrp, SIGTTIN, 1);
> + rcu_read_unlock();
>   set_thread_flag(TIF_SIGPENDING);
>   return -ERESTARTSYS;
>   }
>   spin_unlock_irq(>ctrl_lock);
> + rcu_read_unlock();
>   return 0;
>  }
>  
> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index 57fc6ee..6bdfb98 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -388,33 +388,40 @@ EXPORT_SYMBOL_GPL(tty_find_polling_driver);
>  int tty_check_change(struct tty_struct *tty)
>  {
>   unsigned long flags;
> + struct pid *pgrp;
>   int ret = 0;
>  
>   if (current->signal->tty != tty)
>   return 0;
>  
> + rcu_read_lock();
> + pgrp = task_pgrp(current);
> +
>   spin_lock_irqsave(>ctrl_lock, flags);
>  
>   if (!tty->pgrp) {
>   printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n");
>   goto out_unlock;
>   }
> - if (task_pgrp(current) == tty->pgrp)
> + if (pgrp == tty->pgrp)
>   goto out_unlock;
>   spin_unlock_irqrestore(>ctrl_lock, flags);
> +
>   if (is_ignored(SIGTTOU))
> - goto out;
> + goto out_rcuunlock;
>   if (is_current_pgrp_orphaned()) {
>   ret = -EIO;
> - goto out;
> + goto out_rcuunlock;
>   }
> - kill_pgrp(task_pgrp(current), SIGTTOU, 1);
> + kill_pgrp(pgrp, SIGTTOU, 1);
> + rcu_read_unlock();
>   set_thread_flag(TIF_SIGPENDING);
>   ret = -ERESTARTSYS;
> -out:
>   return ret;
>  out_unlock:
>   spin_unlock_irqrestore(>ctrl_lock, flags);
> +out_rcuunlock:
> + rcu_read_unlock();
>   return ret;
>  }
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] xen/blkfront: convert to blk-mq APIs

2015-07-11 Thread Bob Liu

On 07/12/2015 02:14 AM, Jens Axboe wrote:
> On 07/11/2015 07:30 AM, Bob Liu wrote:
>> Note: This patch is based on original work of Arianna's internship for
>> GNOME's Outreach Program for Women.
> 
> Great to see this finally get prepped to go in!
> 
>> Only one hardware queue is used now, so there is no performance change.
> 
> I would hope that the blk-mq path, even with one queue, is a perf win over 
> the old interface. So I'm not sure that is correct. But the bigger win will 
> be with more queues, of course.
> 

Right, but there are memory consumption and migration issues while using more 
hardware queues.
So I separated this patch from that big patchset and hope can be merged first.

>> The legacy non-mq code is deleted completely which is the same as other
>> drivers like virtio, mtip, and nvme.
>>
>> Also dropped one unnecessary holding of info->io_lock when calling
>> blk_mq_stop_hw_queues().
>>
>> Changes in v2:
>>   - Reorganized blk_mq_queue_rq()
>>   - Restored most io_locks in place
> 
> Looks good to me. The most common error case is the busy-out not stopping 
> queues, or not restarting them at completion. But that all looks fine.
> 
> I would, however, rename blk_mq_queue_rq(). It sounds like a core function. 
> blkif_queue_rq() would be more appropriate.
> 

Will send v3.

>> Signed-off-by: Arianna Avanzini 
>> Signed-off-by: Bob Liu 
> 
> Acked-by: Jens Axboe 
> 

Thank you!

-- 
Regards,
-Bob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Darlehen.

2015-07-11 Thread francisco . andy
Haben Sie sich für ein Darlehen oder Hypotheken und ständig abgelehnt suchen 
die von Finanzinstituten ist Mr.James Rodriguez ein Gläubiger bietet Darlehen 
zu einem Zinssatz von 3% Personen / Unternehmensverbände, Unternehmen, 
Betrieben, Schulen, Kirchen, usw., die in der sind Bedarf an Geld in einer 
Amortisationszeit von 1 bis 30 years.We Angebot 5.000,00 Euro auf 50 Mio. Euro 
bis zu einschließlich 18 und älter gelten müssen Sie sind. Wir sind 
vertrauenswürdig, zuverlässig und dynamisch. kontaktieren Sie uns jetzt: 
jr9304...@gmail.com

Ihre Namen ..
Menge benötigt werden ..
Dauer: ..
Dein Land ...
Deine Adresse ..
Telefon ...
Monatliches Einkommen ...
Sex  ...
Dein Alter ...

(jr9304...@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/2] kconfig: warn of unhandled characters in Kconfig commands

2015-07-11 Thread Ulf Magnusson
One more nitpick (should be the last one):

On Fri, Jul 10, 2015 at 10:25:32AM +0200, Andreas Ruprecht wrote:
> In Kconfig, definitions of options take the following form:
> "   ...". COMMANDs and PARAMs are treated
> slightly different by the underlying parser.
> 
> While commit 2e0d737fc76f ("kconfig: don't silently ignore unhandled
> characters") introduced a warning for unsupported characters around
> PARAMs, it does not cover situations where a COMMAND has additional
> characters before it.
> 
> This change makes Kconfig emit a warning if superfluous characters
> are found before COMMANDs. As the 'help' statement sometimes is
> written as '---help---', the '-' character would now also be regarded
> as unhandled and generate a warning. To avoid that, '-' is added to
> the list of allowed characters, and the token '---help---' is included
> in the zconf.gperf file.
> 
> Reported-by: Valentin Rothberg 
> Signed-off-by: Andreas Ruprecht 
> ---
> Changes to v1:
>   - add '---help---' in zconf.gperf instead of special casing
> it in zconf.l
> 
> Changes to v2:
>   - Do no constify char parameter to warn_ignored_character
>   - Shorten rule definitions for '.'
> 
>  scripts/kconfig/zconf.gperf |  1 +
>  scripts/kconfig/zconf.l | 20 +++-
>  2 files changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/scripts/kconfig/zconf.gperf b/scripts/kconfig/zconf.gperf
> index b6ac02d..7aceb7b 100644
> --- a/scripts/kconfig/zconf.gperf
> +++ b/scripts/kconfig/zconf.gperf
> @@ -22,6 +22,7 @@ comment,T_COMMENT,  TF_COMMAND
>  config,  T_CONFIG,   TF_COMMAND
>  menuconfig,  T_MENUCONFIG,   TF_COMMAND
>  help,T_HELP, TF_COMMAND
> +"---help---",   T_HELP, TF_COMMAND

I think the quotes are redundant here, and tabs instead of spaces would
be consistent with the other entries. Make sure everything lines up with
8-space tabstops.

>  if,  T_IF,   TF_COMMAND|TF_PARAM
>  endif,   T_ENDIF,TF_COMMAND
>  depends, T_DEPENDS,  TF_COMMAND
> diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
> index 200a3fe..c410d25 100644
> --- a/scripts/kconfig/zconf.l
> +++ b/scripts/kconfig/zconf.l
> @@ -66,9 +66,16 @@ static void alloc_string(const char *str, int size)
>   memcpy(text, str, size);
>   text[size] = 0;
>  }
> +
> +static void warn_ignored_character(char chr)
> +{
> + fprintf(stderr,
> + "%s:%d:warning: ignoring unsupported character '%c'\n",
> + zconf_curname(), zconf_lineno(), chr);
> +}
>  %}
>  
> -n[A-Za-z0-9_]
> +n[A-Za-z0-9_-]
>  
>  %%
>   int str = 0;
> @@ -106,7 +113,7 @@ n [A-Za-z0-9_]
>   zconflval.string = text;
>   return T_WORD;
>   }
> - .
> + .   warn_ignored_character(*yytext);
>   \n  {
>   BEGIN(INITIAL);
>   current_file->lineno++;
> @@ -132,8 +139,7 @@ n [A-Za-z0-9_]
>   BEGIN(STRING);
>   }
>   \n  BEGIN(INITIAL); current_file->lineno++; return T_EOL;
> - --- /* ignore */
> - ({n}|[-/.])+{
> + ({n}|[/.])+ {
>   const struct kconf_id *id = kconf_id_lookup(yytext, yyleng);
>   if (id && id->flags & TF_PARAM) {
>   zconflval.id = id;
> @@ -146,11 +152,7 @@ n[A-Za-z0-9_]
>   #.* /* comment */
>   \\\ncurrent_file->lineno++;
>   [[:blank:]]+
> - .   {
> - fprintf(stderr,
> - "%s:%d:warning: ignoring unsupported character '%c'\n",
> - zconf_curname(), zconf_lineno(), *yytext);
> - }
> + .   warn_ignored_character(*yytext);
>   <> {
>   BEGIN(INITIAL);
>   }
> -- 
> 1.9.1
> 

Cheers,
Ulf
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] locking/pvqspinlock: Fix kernel panic in locking-selftest

2015-07-11 Thread Waiman Long
Enabling locking-selftest in a VM guest may cause the following
kernel panic:

kernel BUG at .../kernel/locking/qspinlock_paravirt.h:137!

This is due to the fact that the pvqspinlock unlock function is
expecting either a _Q_LOCKED_VAL or _Q_SLOW_VAL in the lock byte. This
patch prevents that bug report by ignoring it when debug_locks_silent
is set. Otherwise, a warning will be printed if it contains an
unexpected value.

With this patch applied, the kernel locking-selftest completed without
any noise.

Signed-off-by: Waiman Long 
---
 kernel/locking/qspinlock_paravirt.h |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 04ab181..15d3733 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -4,6 +4,7 @@
 
 #include 
 #include 
+#include 
 
 /*
  * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
@@ -286,15 +287,24 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
*lock)
 {
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
+   u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
 
/*
 * We must not unlock if SLOW, because in that case we must first
 * unhash. Otherwise it would be possible to have multiple @lock
 * entries, which would be BAD.
 */
-   if (likely(cmpxchg(>locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
+   if (likely(lockval == _Q_LOCKED_VAL))
return;
 
+   if (unlikely(lockval != _Q_SLOW_VAL)) {
+   if (debug_locks_silent)
+   return;
+   WARN(1, "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
+   (unsigned long)lock, atomic_read(>val));
+   return;
+   }
+
/*
 * Since the above failed to release, this must be the SLOW path.
 * Therefore start by looking up the blocked node and unhashing it.
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] libnvdimm fixes for 4.2-rc2

2015-07-11 Thread Williams, Dan J
Hi Linus, please pull from:

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm libnvdimm-fixes

...to receive:

1/ Fixes for a handful of smatch reports (Thanks Dan C.!) and minor
   bug fixes (patches 1-6)

2/ Correctness fixes to the BLK-mode nvdimm driver (patches 7-10).
   Granted these are slightly large for a -rc update.  They have been
   out for review in one form or another since the end of May and were
   deferred from the merge window while we settled on the "PMEM API" for
   the PMEM-mode nvdimm driver (i.e. memremap_pmem, memcpy_to_pmem, and
   wmb_pmem).  Now that those apis are merged we implement them in the
   BLK driver to guarantee that mmio aperture moves stay ordered with
   respect to incoming read/write requests, and that writes are flushed
   through those mmio-windows and platform-buffers to be persistent on
   media.

These pass the sub-system unit tests with the updates to
tools/testing/nvdimm, and have received a successful build-report from
the kbuild robot (468 configs).  Full git log below with acks from
Rafael for the touches to drivers/acpi/.

[PATCH 01/10] sparse: fix misplaced __pmem definition
[PATCH 02/10] libnvdimm: smatch cleanups in __nd_ioctl
[PATCH 03/10] nvdimm: Fix return value of nvdimm_bus_init() if class_create() 
fails
[PATCH 04/10] nfit: fix smatch "use after null check" report
[PATCH 05/10] pmem: add maintainer for include/linux/pmem.h
[PATCH 06/10] tools/testing/nvdimm: mock ioremap_wt
[PATCH 07/10] tools/testing/nvdimm: fix return code for unimplemented commands
[PATCH 08/10] tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to 
nfit_test
[PATCH 09/10] nfit: update block I/O path to use PMEM API
[PATCH 10/10] nfit: add support for NVDIMM "latch" flag

The following changes since commit 88793e5c774ec69351ef6b5200bb59f532e41bca:

  Merge tag 'libnvdimm-for-4.2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm (2015-06-29 10:34:42 
-0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm libnvdimm-fixes

for you to fetch changes up to f0f2c072cf530d5b8890be5051cc8b36b0c54cce:

  nfit: add support for NVDIMM "latch" flag (2015-07-10 14:43:50 -0400)


Axel Lin (1):
  nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails

Dan Williams (6):
  sparse: fix misplaced __pmem definition
  libnvdimm: smatch cleanups in __nd_ioctl
  nfit: fix smatch "use after null check" report
  tools/testing/nvdimm: mock ioremap_wt
  tools/testing/nvdimm: fix return code for unimplemented commands
  tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to 
nfit_test

Ross Zwisler (3):
  pmem: add maintainer for include/linux/pmem.h
  nfit: update block I/O path to use PMEM API
  nfit: add support for NVDIMM "latch" flag

 MAINTAINERS   |   1 +
 drivers/acpi/nfit.c   | 134 ++
 drivers/acpi/nfit.h   |  20 +-
 drivers/nvdimm/bus.c  |  11 +---
 include/linux/compiler.h  |   2 +-
 tools/testing/nvdimm/Kbuild   |   3 +
 tools/testing/nvdimm/test/iomap.c |  27 
 tools/testing/nvdimm/test/nfit.c  |  52 ++-
 8 files changed, 223 insertions(+), 27 deletions(-)


commit 31f02455455d405320e2f749696bef4e02903b35
Author: Dan Williams 
Date:   Tue Jun 30 12:07:17 2015 -0400

sparse: fix misplaced __pmem definition

Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix:

drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function 
__builtin_expect
drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration
drivers/nvdimm/pmem.c:48:21: sparse: void declaration

...due to __pmem failing to be defined in some configurations when
CONFIG_SPARSE_RCU_POINTER=y.

Reported-by: kbuild test robot 
Reported-by: Dan Carpenter 
Signed-off-by: Dan Williams 

commit af834d457d9ed69e14836b63d0da198fdd2ec706
Author: Dan Williams 
Date:   Tue Jun 30 14:10:09 2015 -0400

libnvdimm: smatch cleanups in __nd_ioctl

Drop use of access_ok() since we are already using copy_{to|from}_user()
which do their own access_ok().

Reported-by: Dan Carpenter 
Signed-off-by: Dan Williams 

commit daa1dee405d7d3d3e816b84a692e838a5647a02a
Author: Axel Lin 
Date:   Sun Jun 28 17:00:57 2015 +0800

nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails

Return proper error if class_create() fails.

Signed-off-by: Axel Lin 
Signed-off-by: Dan Williams 

commit 193ccca43850d2355e7690a93ab9d7d78d38f905
Author: Dan Williams 
Date:   Tue Jun 30 16:09:39 2015 -0400

nfit: fix smatch "use after null check" report

drivers/acpi/nfit.c:1224 acpi_nfit_blk_region_enable()
 error: we previously assumed 'nfit_mem' could be null 

Re: [PATCH 0/7] Add rcu_sync infrastructure to avoid _expedited() in percpu-rwsem

2015-07-11 Thread Linus Torvalds
On Sat, Jul 11, 2015 at 4:35 PM, Oleg Nesterov  wrote:
>
> Linus, I am mostly trying to convince you. Nobody else objected so far.
> Could you please comment?

I don't mind this part of the series.

It's the whole "do we really want to put the effort into percpu-rwsem
I worry about, as there just aren't that many users.

The conversions made that "too damn special" thing go away, but the
conversions (particularly the big _real_ user, namely fs/locks.c) seem
to have serious performance problems that are quite possibly not
fixable.

So my objection isn't to your change, my objection is to the whole
"right now there are two users, and they both use a global lock, so
*of course* they scale like shit, and this is all just papering over
that much more fundamental problem".

I hate one-off locking. One-off locking with one global lock? Yeah,
that just smells.

  Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 0/3] special_mapping_fault() is broken

2015-07-11 Thread Oleg Nesterov
On 07/10, Andrew Morton wrote:
>
> On Fri, 10 Jul 2015 18:51:21 +0200 Oleg Nesterov  wrote:
>
> > special_mapping_fault() is absolutely broken. It seems it was always
> > wrong, but this didn't matter until vdso/vvar started to use more than
> > one page.
> >
> > The patches are the same, just 1/3 was re-diffed on top of the recent
> > 6b7339f4c31ad "mm: avoid setting up anonymous pages into file mapping"
> > from Kirill.
> >
> > And after this change vma_is_anonymous() becomes really trivial, it
> > simply checks vm_ops == NULL. However, I do think the helper makes
> > sense. There are a lot of ->vm_ops != NULL checks, the helper makes
> > the caller's code more understandable (self-documented) and this is
> > more grep-friendly.
>
> I'm trying to work out which kernel version(s) this should go into,
> without a lot of success.
>
> What do we think the worst-case effects of the bug?

Ah, I should have mentioned this. And when I re-read my messages I see
that "absolutely broken" looks like "should be fixed asap". Sorry for
confusion.

No, this bug is not serious. Nothing bad can happen from the kernel
perspective. And I doubt that some application will ever unmap/remap
the part of vdso or any other install_special_mapping() user. So this
is just correctness fix. In fact, to me the main problem is that I
was totally confused when I tried to read/understand this code ;)

Oleg.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/7] percpu-rwsem: change it to rely on rss_sync infrastructure

2015-07-11 Thread Oleg Nesterov
Currently down_write/up_write calls synchronize_sched_expedited()
twice which is evil. Change this code to rely on rcu-sync primitives.
This avoids the _expedited "big hammer", and this can be faster in
the contended case or even in the case when a single thread does
down_write/up_write in a loop.

Of course, a single down_write() will take more time, but otoh it
will be much more friendly to the whole system.

To simplify the review this patch doesn't update the comments, fixed
by the next change.

Signed-off-by: Oleg Nesterov 
---
 include/linux/percpu-rwsem.h  |3 ++-
 kernel/locking/percpu-rwsem.c |   18 +++---
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 3e88c9a..3e58226 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,11 +5,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct percpu_rw_semaphore {
+   struct rcu_sync_struct  rss;
unsigned int __percpu   *fast_read_ctr;
-   atomic_twrite_ctr;
struct rw_semaphore rw_sem;
atomic_tslow_read_ctr;
wait_queue_head_t   write_waitq;
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 652a8ee..69a7314 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
__init_rwsem(>rw_sem, name, rwsem_key);
-   atomic_set(>write_ctr, 0);
+   rcu_sync_init(>rss, RCU_SCHED_SYNC);
atomic_set(>slow_read_ctr, 0);
init_waitqueue_head(>write_waitq);
return 0;
@@ -25,6 +25,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
 
 void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 {
+   rcu_sync_dtor(>rss);
free_percpu(brw->fast_read_ctr);
brw->fast_read_ctr = NULL; /* catch use after free bugs */
 }
@@ -54,13 +55,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
-   bool success = false;
+   bool success;
 
preempt_disable();
-   if (likely(!atomic_read(>write_ctr))) {
+   success = rcu_sync_is_idle(>rss);
+   if (likely(success))
__this_cpu_add(*brw->fast_read_ctr, val);
-   success = true;
-   }
preempt_enable();
 
return success;
@@ -126,8 +126,6 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-   /* tell update_fast_ctr() there is a pending writer */
-   atomic_inc(>write_ctr);
/*
 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
 *so that update_fast_ctr() can't succeed.
@@ -139,7 +137,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 *fast-path, it executes a full memory barrier before we return.
 *See R_W case in the comment above update_fast_ctr().
 */
-   synchronize_sched_expedited();
+   rcu_sync_enter(>rss);
 
/* exclude other writers, and block the new readers completely */
down_write(>rw_sem);
@@ -159,7 +157,5 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
 * Insert the barrier before the next fast-path in down_read,
 * see W_R case in the comment above update_fast_ctr().
 */
-   synchronize_sched_expedited();
-   /* the last writer unblocks update_fast_ctr() */
-   atomic_dec(>write_ctr);
+   rcu_sync_exit(>rss);
 }
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/7] rcusync: Introduce struct rcu_sync_ops

2015-07-11 Thread Oleg Nesterov
Add the new struct rcu_sync_ops which holds sync/call methods, and
turn the function pointers in rcu_sync_struct into an array of struct
rcu_sync_ops.

This simplifies the "init" helpers, and this way it is simpler to add
the new methods we need, especially ifdef'ed.

Reviewed-by: Paul E. McKenney 
Signed-off-by: Oleg Nesterov 
Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/rcusync.h |   60 ++-
 kernel/rcu/sync.c   |   43 +
 2 files changed, 45 insertions(+), 58 deletions(-)

diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h
index 7858491..988ec33 100644
--- a/include/linux/rcusync.h
+++ b/include/linux/rcusync.h
@@ -4,6 +4,8 @@
 #include 
 #include 
 
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
 struct rcu_sync_struct {
int gp_state;
int gp_count;
@@ -12,53 +14,37 @@ struct rcu_sync_struct {
int cb_state;
struct rcu_head cb_head;
 
-   void (*sync)(void);
-   void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+   enum rcu_sync_type  gp_type;
 };
 
-#define ___RCU_SYNC_INIT(name) \
-   .gp_state = 0,  \
-   .gp_count = 0,  \
-   .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
-   .cb_state = 0
-
-#define __RCU_SCHED_SYNC_INIT(name) {  \
-   ___RCU_SYNC_INIT(name), \
-   .sync = synchronize_sched,  \
-   .call = call_rcu_sched, \
-}
-
-#define __RCU_BH_SYNC_INIT(name) { \
-   ___RCU_SYNC_INIT(name), \
-   .sync = synchronize_rcu_bh, \
-   .call = call_rcu_bh,\
-}
-
-#define __RCU_SYNC_INIT(name) {
\
-   ___RCU_SYNC_INIT(name), \
-   .sync = synchronize_rcu,\
-   .call = call_rcu,   \
-}
-
-#define DEFINE_RCU_SCHED_SYNC(name)\
-   struct rcu_sync_struct name = __RCU_SCHED_SYNC_INIT(name)
-
-#define DEFINE_RCU_BH_SYNC(name)   \
-   struct rcu_sync_struct name = __RCU_BH_SYNC_INIT(name)
-
-#define DEFINE_RCU_SYNC(name)  \
-   struct rcu_sync_struct name = __RCU_SYNC_INIT(name)
-
 static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
 {
return !rss->gp_state; /* GP_IDLE */
 }
 
-enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
-
 extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync_struct *);
 extern void rcu_sync_exit(struct rcu_sync_struct *);
 
+#define __RCU_SYNC_INITIALIZER(name, type) {   \
+   .gp_state = 0,  \
+   .gp_count = 0,  \
+   .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
+   .cb_state = 0,  \
+   .gp_type = type,\
+   }
+
+#define__DEFINE_RCU_SYNC(name, type)   \
+   struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+
+#define DEFINE_RCU_SYNC(name)  \
+   __DEFINE_RCU_SYNC(name, RCU_SYNC)
+
+#define DEFINE_RCU_SCHED_SYNC(name)\
+   __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+
+#define DEFINE_RCU_BH_SYNC(name)   \
+   __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+
 #endif /* _LINUX_RCUSYNC_H_ */
 
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index f84176a..99051b7 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -1,7 +1,24 @@
-
 #include 
 #include 
 
+static const struct {
+   void (*sync)(void);
+   void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+} gp_ops[] = {
+   [RCU_SYNC] = {
+   .sync = synchronize_rcu,
+   .call = call_rcu,
+   },
+   [RCU_SCHED_SYNC] = {
+   .sync = synchronize_sched,
+   .call = call_rcu_sched,
+   },
+   [RCU_BH_SYNC] = {
+   .sync = synchronize_rcu_bh,
+   .call = call_rcu_bh,
+   },
+};
+
 enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
 enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
@@ -11,23 +28,7 @@ void rcu_sync_init(struct rcu_sync_struct *rss, enum 
rcu_sync_type type)
 {
memset(rss, 0, 

[PATCH 7/7] percpu-rwsem: cleanup the lockdep annotations in percpu_down_read()

2015-07-11 Thread Oleg Nesterov
Stolen from Peter's patch.

Change percpu_down_read() to use __down_read(), this way we can
do rwsem_acquire_read() unconditionally at the start to make this
code more symmetric and clean.

Signed-off-by: Oleg Nesterov 
---
 kernel/locking/percpu-rwsem.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 705aefd..2c54c64 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -62,14 +62,14 @@ static bool update_fast_ctr(struct percpu_rw_semaphore 
*brw, unsigned int val)
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
might_sleep();
-   if (likely(update_fast_ctr(brw, +1))) {
-   rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_);
+   rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_);
+
+   if (likely(update_fast_ctr(brw, +1)))
return;
-   }
 
-   down_read(>rw_sem);
+   /* Avoid rwsem_acquire_read() and rwsem_release() */
+   __down_read(>rw_sem);
atomic_inc(>slow_read_ctr);
-   /* avoid up_read()->rwsem_release() */
__up_read(>rw_sem);
 }
 
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/7] percpu-rwsem: fix the comments outdated by rcu_sync

2015-07-11 Thread Oleg Nesterov
Update the comments broken by the previous change.

Signed-off-by: Oleg Nesterov 
---
 kernel/locking/percpu-rwsem.c |   50 +
 1 files changed, 11 insertions(+), 39 deletions(-)

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 69a7314..705aefd 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -31,27 +31,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 }
 
 /*
- * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
- * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
- * serialize with the preempt-disabled section below.
- *
- * The nontrivial part is that we should guarantee acquire/release semantics
- * in case when
- *
- * R_W: down_write() comes after up_read(), the writer should see all
- *  changes done by the reader
- * or
- * W_R: down_read() comes after up_write(), the reader should see all
- *  changes done by the writer
+ * This is the fast-path for down_read/up_read. If it succeeds we rely
+ * on the barriers provided by rcu_sync_enter/exit; see the comments in
+ * percpu_down_write() and percpu_up_write().
  *
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
- *
- * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
- * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
- * reader inside the critical section. See the comments in down_write and
- * up_write below.
  */
 static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 {
@@ -113,29 +98,15 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
return sum;
 }
 
-/*
- * A writer increments ->write_ctr to force the readers to switch to the
- * slow mode, note the atomic_read() check in update_fast_ctr().
- *
- * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
- * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
- * counter it represents the number of active readers.
- *
- * Finally the writer takes ->rw_sem for writing and blocks the new readers,
- * then waits until the slow counter becomes zero.
- */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
/*
-* 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
-*so that update_fast_ctr() can't succeed.
-*
-* 2. Ensures we see the result of every previous this_cpu_add() in
-*update_fast_ctr().
+* Make rcu_sync_is_idle() == F and thus disable the fast-path in
+* percpu_down_read() and percpu_up_read(), and wait for gp pass.
 *
-* 3. Ensures that if any reader has exited its critical section via
-*fast-path, it executes a full memory barrier before we return.
-*See R_W case in the comment above update_fast_ctr().
+* The latter synchronises us with the preceeding readers which used
+* the fast-past, so we can not miss the result of __this_cpu_add()
+* or anything else inside their criticial sections.
 */
rcu_sync_enter(>rss);
 
@@ -154,8 +125,9 @@ void percpu_up_write(struct percpu_rw_semaphore *brw)
/* release the lock, but the readers can't use the fast-path */
up_write(>rw_sem);
/*
-* Insert the barrier before the next fast-path in down_read,
-* see W_R case in the comment above update_fast_ctr().
+* Enable the fast-path in percpu_down_read() and percpu_up_read()
+* but only after another gp pass; this adds the necessary barrier
+* to ensure the reader can't miss the changes done by us.
 */
rcu_sync_exit(>rss);
 }
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/7] rcusync: Introduce rcu_sync_dtor()

2015-07-11 Thread Oleg Nesterov
Add the new rcu_sync_ops->wait() method and the new helper,
rcu_sync_dtor().

It is needed if you are going to, say, kfree(rcu_sync_object).
It simply calls ops->wait() to "flush" the potentially pending
rcu callback.

Reviewed-by: Paul E. McKenney 
Signed-off-by: Oleg Nesterov 
Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/rcusync.h |1 +
 kernel/rcu/sync.c   |   22 ++
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h
index a51e5c7..0135838 100644
--- a/include/linux/rcusync.h
+++ b/include/linux/rcusync.h
@@ -31,6 +31,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync_struct 
*rss)
 extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync_struct *);
 extern void rcu_sync_exit(struct rcu_sync_struct *);
+extern void rcu_sync_dtor(struct rcu_sync_struct *);
 
 #define __RCU_SYNC_INITIALIZER(name, type) {   \
.gp_state = 0,  \
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 32cdbb8..8835ad1 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,6 +10,7 @@
 static const struct {
void (*sync)(void);
void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+   void (*wait)(void);
 #ifdef CONFIG_PROVE_RCU
int  (*held)(void);
 #endif
@@ -17,16 +18,19 @@ static const struct {
[RCU_SYNC] = {
.sync = synchronize_rcu,
.call = call_rcu,
+   .wait = rcu_barrier,
__INIT_HELD(rcu_read_lock_held)
},
[RCU_SCHED_SYNC] = {
.sync = synchronize_sched,
.call = call_rcu_sched,
+   .wait = rcu_barrier_sched,
__INIT_HELD(rcu_read_lock_sched_held)
},
[RCU_BH_SYNC] = {
.sync = synchronize_rcu_bh,
.call = call_rcu_bh,
+   .wait = rcu_barrier_bh,
__INIT_HELD(rcu_read_lock_bh_held)
},
 };
@@ -128,3 +132,21 @@ void rcu_sync_exit(struct rcu_sync_struct *rss)
}
spin_unlock_irq(>rss_lock);
 }
+
+void rcu_sync_dtor(struct rcu_sync_struct *rss)
+{
+   int cb_state;
+
+   BUG_ON(rss->gp_count);
+
+   spin_lock_irq(>rss_lock);
+   if (rss->cb_state == CB_REPLAY)
+   rss->cb_state = CB_PENDING;
+   cb_state = rss->cb_state;
+   spin_unlock_irq(>rss_lock);
+
+   if (cb_state != CB_IDLE) {
+   gp_ops[rss->gp_type].wait();
+   BUG_ON(rss->cb_state != CB_IDLE);
+   }
+}
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/7] rcusync: Add the CONFIG_PROVE_RCU checks

2015-07-11 Thread Oleg Nesterov
It would be nice to validate that the caller of rcu_sync_is_idle()
holds the corresponding type of RCU read-side lock. Add the new
rcu_sync_ops->held() method and change rcu_sync_is_idle() to
WARN() if it returns false.

This obviously penalizes the readers (fast-path), but only if
CONFIG_PROVE_RCU.

Reviewed-by: Paul E. McKenney 
Suggested-by: "Paul E. McKenney" 
Signed-off-by: Oleg Nesterov 
Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/rcusync.h |6 ++
 kernel/rcu/sync.c   |   21 +
 2 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h
index 988ec33..a51e5c7 100644
--- a/include/linux/rcusync.h
+++ b/include/linux/rcusync.h
@@ -17,9 +17,15 @@ struct rcu_sync_struct {
enum rcu_sync_type  gp_type;
 };
 
+extern bool __rcu_sync_is_idle(struct rcu_sync_struct *);
+
 static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
 {
+#ifdef CONFIG_PROVE_RCU
+   return __rcu_sync_is_idle(rss);
+#else
return !rss->gp_state; /* GP_IDLE */
+#endif
 }
 
 extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 99051b7..32cdbb8 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -1,21 +1,33 @@
 #include 
 #include 
 
+#ifdef CONFIG_PROVE_RCU
+#define __INIT_HELD(func)  .held = func,
+#else
+#define __INIT_HELD(func)
+#endif
+
 static const struct {
void (*sync)(void);
void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+#ifdef CONFIG_PROVE_RCU
+   int  (*held)(void);
+#endif
 } gp_ops[] = {
[RCU_SYNC] = {
.sync = synchronize_rcu,
.call = call_rcu,
+   __INIT_HELD(rcu_read_lock_held)
},
[RCU_SCHED_SYNC] = {
.sync = synchronize_sched,
.call = call_rcu_sched,
+   __INIT_HELD(rcu_read_lock_sched_held)
},
[RCU_BH_SYNC] = {
.sync = synchronize_rcu_bh,
.call = call_rcu_bh,
+   __INIT_HELD(rcu_read_lock_bh_held)
},
 };
 
@@ -24,6 +36,15 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
 
 #definerss_lockgp_wait.lock
 
+#ifdef CONFIG_PROVE_RCU
+bool __rcu_sync_is_idle(struct rcu_sync_struct *rss)
+{
+   WARN_ON(!gp_ops[rss->gp_type].held());
+   return rss->gp_state == GP_IDLE;
+}
+EXPORT_SYMBOL_GPL(__rcu_sync_is_idle);
+#endif
+
 void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type)
 {
memset(rss, 0, sizeof(*rss));
-- 
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/7] rcu: Create rcu_sync infrastructure

2015-07-11 Thread Oleg Nesterov
It is functionally equivalent to

struct rcu_sync_struct {
atomic_t counter;
};

static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
{
return atomic_read(>counter) == 0;
}

static inline void rcu_sync_enter(struct rcu_sync_struct *rss)
{
atomic_inc(>counter);
synchronize_sched();
}

static inline void rcu_sync_exit(struct rcu_sync_struct *rss)
{
synchronize_sched();
atomic_dec(>counter);
}

except: it records the state and synchronize_sched() is only called by
rcu_sync_enter() and only if necessary.

Reviewed-by: Paul E. McKenney 
Signed-off-by: Oleg Nesterov 
Signed-off-by: Peter Zijlstra (Intel) 
---
 include/linux/rcusync.h |   64 
 kernel/rcu/Makefile |2 +-
 kernel/rcu/sync.c   |  108 +++
 3 files changed, 173 insertions(+), 1 deletions(-)
 create mode 100644 include/linux/rcusync.h
 create mode 100644 kernel/rcu/sync.c

diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h
new file mode 100644
index 000..7858491
--- /dev/null
+++ b/include/linux/rcusync.h
@@ -0,0 +1,64 @@
+#ifndef _LINUX_RCUSYNC_H_
+#define _LINUX_RCUSYNC_H_
+
+#include 
+#include 
+
+struct rcu_sync_struct {
+   int gp_state;
+   int gp_count;
+   wait_queue_head_t   gp_wait;
+
+   int cb_state;
+   struct rcu_head cb_head;
+
+   void (*sync)(void);
+   void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+};
+
+#define ___RCU_SYNC_INIT(name) \
+   .gp_state = 0,  \
+   .gp_count = 0,  \
+   .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
+   .cb_state = 0
+
+#define __RCU_SCHED_SYNC_INIT(name) {  \
+   ___RCU_SYNC_INIT(name), \
+   .sync = synchronize_sched,  \
+   .call = call_rcu_sched, \
+}
+
+#define __RCU_BH_SYNC_INIT(name) { \
+   ___RCU_SYNC_INIT(name), \
+   .sync = synchronize_rcu_bh, \
+   .call = call_rcu_bh,\
+}
+
+#define __RCU_SYNC_INIT(name) {
\
+   ___RCU_SYNC_INIT(name), \
+   .sync = synchronize_rcu,\
+   .call = call_rcu,   \
+}
+
+#define DEFINE_RCU_SCHED_SYNC(name)\
+   struct rcu_sync_struct name = __RCU_SCHED_SYNC_INIT(name)
+
+#define DEFINE_RCU_BH_SYNC(name)   \
+   struct rcu_sync_struct name = __RCU_BH_SYNC_INIT(name)
+
+#define DEFINE_RCU_SYNC(name)  \
+   struct rcu_sync_struct name = __RCU_SYNC_INIT(name)
+
+static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
+{
+   return !rss->gp_state; /* GP_IDLE */
+}
+
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
+extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type);
+extern void rcu_sync_enter(struct rcu_sync_struct *);
+extern void rcu_sync_exit(struct rcu_sync_struct *);
+
+#endif /* _LINUX_RCUSYNC_H_ */
+
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 50a8084..61a1656 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,4 +1,4 @@
-obj-y += update.o
+obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
new file mode 100644
index 000..f84176a
--- /dev/null
+++ b/kernel/rcu/sync.c
@@ -0,0 +1,108 @@
+
+#include 
+#include 
+
+enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
+enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+
+#definerss_lockgp_wait.lock
+
+void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type)
+{
+   memset(rss, 0, sizeof(*rss));
+   init_waitqueue_head(>gp_wait);
+
+   switch (type) {
+   case RCU_SYNC:
+   rss->sync = synchronize_rcu;
+   rss->call = call_rcu;
+   break;
+
+   case RCU_SCHED_SYNC:
+   rss->sync = synchronize_sched;
+   rss->call = call_rcu_sched;
+   break;
+
+   case RCU_BH_SYNC:
+   rss->sync = synchronize_rcu_bh;
+   rss->call = call_rcu_bh;
+   break;
+   }
+}
+

[PATCH 0/7] Add rcu_sync infrastructure to avoid _expedited() in percpu-rwsem

2015-07-11 Thread Oleg Nesterov
Hello,

Let me make another attempt to push rcu_sync and add a _simple_
improvment into percpu-rwsem. It already has another user (cgroups)
and I think it can have more. Peter has some use-cases. sb->s_writers
(which afaics is buggy btw) can be turned into percpu-rwsem too I think.

Linus, I am mostly trying to convince you. Nobody else objected so far.
Could you please comment?

Peter, if you agree with 5-7, can I add your Signed-off-by's ?

To me, the most annoying problem with percpu_rw_semaphore is
synchronize_sched_expedited() which is called twice by every
down_write/up_write. I think it would be really nice to avoid it.

Let's start with the simple test-case,

#!/bin/bash

perf probe -x /lib/libc.so.6 syscall

for i in {1..1000}; do
echo 1 >| 
/sys/kernel/debug/tracing/events/probe_libc/syscall/enable
echo 0 >| 
/sys/kernel/debug/tracing/events/probe_libc/syscall/enable
done

It needs ~ 13.5 seconds (2 CPUs, KVM). If we simply replace
synchronize_sched_expedited() with synchronize_sched() it takes
~ 67.5 seconds. This is not good.

With these patches it takes around 13.3 seconds again (a little
bit faster), and it doesn't use _expedited. synchronize_sched()
is called 1-2 (max 3) times in average. And now it does not
disturb the whole system.

And just in case, I also measured

for (i = 0; i < 100; ++i) {
percpu_down_write(_mmap_sem);
percpu_up_write(_mmap_sem);
}

and it runs more than 1.5 times faster (to remind, only 2 CPUs),
but this is not that interesting, I agree.

And note that the actual change in percpu-rwsem is really simple,
and imo it even makes the code simpler. (the last patch is off-
topic cleanup).

So the only complication is rcu_sync itself. But, rightly or not (I
am obviously biased), I believe this new rcu infrastructure is natural
and useful, and I think it can have more users too.

And. We can do more improvements in rcu_sync and percpu-rwsem, and
I don't only mean other optimizations from Peter. In particular, we
can extract the "wait for gp pass" from rcu_sync_enter() into another
helper, we can teach percpu_down_write() to allow multiple writers,
and more.

Oleg.

 include/linux/percpu-rwsem.h  |3 +-
 include/linux/rcusync.h   |   57 +++
 kernel/locking/percpu-rwsem.c |   78 ++---
 kernel/rcu/Makefile   |2 +-
 kernel/rcu/sync.c |  152 +
 5 files changed, 235 insertions(+), 57 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ARM: pxa: fix dm9000 platform data regression

2015-07-11 Thread Robert Jarzmik
Since dm9000 driver added support for a vcc regulator, platform data
based platforms have their ethernet broken, as the regulator claiming
returns -EPROBE_DEFER and prevents dm9000 loading.

This patch fixes this for all pxa boards using dm9000, by using the
specific regulator_has_full_constraints() function.

This was discovered and tested on the cm-x300 board.

Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000")
Signed-off-by: Robert Jarzmik 
---
 arch/arm/mach-pxa/capc7117.c   | 3 +++
 arch/arm/mach-pxa/cm-x2xx.c| 3 +++
 arch/arm/mach-pxa/cm-x300.c| 2 ++
 arch/arm/mach-pxa/colibri-pxa270.c | 3 +++
 arch/arm/mach-pxa/em-x270.c| 2 ++
 arch/arm/mach-pxa/icontrol.c   | 3 +++
 arch/arm/mach-pxa/trizeps4.c   | 3 +++
 arch/arm/mach-pxa/vpac270.c| 3 +++
 arch/arm/mach-pxa/zeus.c   | 2 ++
 9 files changed, 24 insertions(+)

diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c
index c092730749b9..bf366b39fa61 100644
--- a/arch/arm/mach-pxa/capc7117.c
+++ b/arch/arm/mach-pxa/capc7117.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -144,6 +145,8 @@ static void __init capc7117_init(void)
 
capc7117_uarts_init();
capc7117_ide_init();
+
+   regulator_has_full_constraints();
 }
 
 MACHINE_START(CAPC7117,
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index bb99f59a36d8..a17a91eb8e9a 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -466,6 +467,8 @@ static void __init cmx2xx_init(void)
cmx2xx_init_ac97();
cmx2xx_init_touchscreen();
cmx2xx_init_leds();
+
+   regulator_has_full_constraints();
 }
 
 static void __init cmx2xx_init_irq(void)
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 4d3588d26c2a..5851f4c254c1 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -835,6 +835,8 @@ static void __init cm_x300_init(void)
cm_x300_init_ac97();
cm_x300_init_wi2wi();
cm_x300_init_bl();
+
+   regulator_has_full_constraints();
 }
 
 static void __init cm_x300_fixup(struct tag *tags, char **cmdline)
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c 
b/arch/arm/mach-pxa/colibri-pxa270.c
index 5f9d9303b346..3503826333c7 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void)
printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n",
colibri_pxa270_baseboard);
}
+
+   regulator_has_full_constraints();
 }
 
 /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 51531ecffca8..9d7072b04045 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1306,6 +1306,8 @@ static void __init em_x270_init(void)
em_x270_init_i2c();
em_x270_init_camera();
em_x270_userspace_consumers_init();
+
+   regulator_has_full_constraints();
 }
 
 MACHINE_START(EM_X270, "Compulab EM-X270")
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index c98511c5abd1..9b0eb0252af6 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "generic.h"
 
@@ -185,6 +186,8 @@ static void __init icontrol_init(void)
mxm_8x10_mmc_init();
 
icontrol_can_init();
+
+   regulator_has_full_constraints();
 }
 
 MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM")
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 872dcb20e757..066e3a250ee0 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -534,6 +535,8 @@ static void __init trizeps4_init(void)
 
BCR_writew(trizeps_conxs_bcr);
board_backlight_power(1);
+
+   regulator_has_full_constraints();
 }
 
 static void __init trizeps4_map_io(void)
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index aa89488f961e..54122a983ae3 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -711,6 +712,8 @@ static void __init vpac270_init(void)
vpac270_ts_init();
vpac270_rtc_init();
vpac270_ide_init();
+
+   regulator_has_full_constraints();
 }
 
 MACHINE_START(VPAC270, "Voipac PXA270")
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index ac2ae5c71ab4..6158566fa0f7 100644
--- 

Loan Offer At 3% Interest Rate

2015-07-11 Thread John Matthew
Good Day Sir/Madam.

I am Mr John Matthew, A private Money lender. I am Writing you to
introduce a small and large business money lending service to you.  I
can service your financial need with less payback problem that is why
we fund you for just 3%. Need business or a personal loan Fill the
Short application below.Contact us today for that loan you need with
this email address :johnmatthewloanlen...@gmail.com

NAME.
COUNTRY.
STATE.
ADDRESS...
PHONE NUMBER...
AMOUNT NEEDED AS LOAN..
LOAN DURATION..
MONTHLY INCOME.


Your's Faithfully:
Mr John Matthew:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Lånetilbud Ved 3% rente

2015-07-11 Thread John Matthew
Good Day Sir / Fru.

Jeg er John Matthew, En privat Money långiver. Jeg skriver dig til at
introducere en lille og store virksomheder penge udlån service til
dig. Jeg kan servicere dine finansielle behov med mindre
tilbagebetalingstid problem, der er derfor, vi finansierer dig for kun
3%. Brug forretning eller et personligt lån Fyld Short ansøgning
below.Contact os i dag for dette lån, du har brug for med denne
e-mail-adresse: johnmatthewloanlen...@gmail.com

NAME .
LAND .
STATE .
ADRESSE ...
TELEFONNUMMER ...
Nødvendige beløb AS LÅN ..
LÅN VARIGHED ..
Månedlige indkomst .


Dine er trofast:
John Matthew:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/3] arm64, mm: Use IPIs for TLB invalidation.

2015-07-11 Thread Sergei Shtylyov

Hello.

On 07/11/2015 11:25 PM, David Daney wrote:


From: David Daney 



Most broadcast TLB invalidations are unnecessary.  So when
invalidating for a given mm/vma target the only the needed CPUs via


   The only the needed?


and IPI.



For global TLB invalidations, also use IPI.



Tested on Cavium ThunderX.



This change reduces 'time make -j48' on kernel from 139s to 116s (83%
as long).



The patch is needed because of a ThunderX Pass1 erratum: Exclusive
store operations unreliable in the presence of broadcast TLB
invalidations.  The performance improvements shown make it compelling
even without the erratum workaround need.



Signed-off-by: David Daney 


WBR, Sergei

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] irqchip: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  5b29264c659c31bada65582005d99adb3bb41fea
Gitweb: http://git.kernel.org/tip/5b29264c659c31bada65582005d99adb3bb41fea
Author: Jiang Liu 
AuthorDate: Thu, 4 Jun 2015 12:13:20 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:27 +0200

irqchip: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc

Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc while we
already have a pointer to corresponding irq_desc.

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: linux-arm-ker...@lists.infradead.org
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Cc: Jason Cooper 
Cc: Kukjin Kim 
Cc: Krzysztof Kozlowski 
Cc: Maxime Ripard 
Link: 
http://lkml.kernel.org/r/1433391238-19471-11-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 drivers/irqchip/exynos-combiner.c   | 4 ++--
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 drivers/irqchip/irq-gic.c   | 4 ++--
 drivers/irqchip/irq-orion.c | 2 +-
 drivers/irqchip/irq-sunxi-nmi.c | 2 +-
 drivers/irqchip/spear-shirq.c   | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c 
b/drivers/irqchip/exynos-combiner.c
index 6ad04ac..1a4a1b0 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -67,8 +67,8 @@ static void combiner_unmask_irq(struct irq_data *data)
 
 static void combiner_handle_cascade_irq(unsigned int irq, struct irq_desc 
*desc)
 {
-   struct combiner_chip_data *chip_data = irq_get_handler_data(irq);
-   struct irq_chip *chip = irq_get_chip(irq);
+   struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq, combiner_irq;
unsigned long status;
 
diff --git a/drivers/irqchip/irq-armada-370-xp.c 
b/drivers/irqchip/irq-armada-370-xp.c
index 73b73ac..39b72da 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -450,7 +450,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *r, 
bool b) {}
 static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq,
  struct irq_desc *desc)
 {
-   struct irq_chip *chip = irq_get_chip(irq);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned long irqmap, irqn, irqsrc, cpuid;
unsigned int cascade_irq;
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 2eaae9c..cadd862 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -288,8 +288,8 @@ static void __exception_irq_entry gic_handle_irq(struct 
pt_regs *regs)
 
 static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 {
-   struct gic_chip_data *chip_data = irq_get_handler_data(irq);
-   struct irq_chip *chip = irq_get_chip(irq);
+   struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq, gic_irq;
unsigned long status;
 
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index 7fbae56..5ea999a 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -108,7 +108,7 @@ IRQCHIP_DECLARE(orion_intc, "marvell,orion-intc", 
orion_irq_init);
 
 static void orion_bridge_irq_handler(unsigned int irq, struct irq_desc *desc)
 {
-   struct irq_domain *d = irq_get_handler_data(irq);
+   struct irq_domain *d = irq_desc_get_handler_data(desc);
 
struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, 0);
u32 stat = readl_relaxed(gc->reg_base + ORION_BRIDGE_IRQ_CAUSE) &
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 9186a11..772a82c 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -61,7 +61,7 @@ static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic 
*gc, u32 off)
 static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc)
 {
struct irq_domain *domain = irq_desc_get_handler_data(desc);
-   struct irq_chip *chip = irq_get_chip(irq);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int virq = irq_find_mapping(domain, 0);
 
chained_irq_enter(chip, desc);
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
index 3df144f..6171855 100644
--- a/drivers/irqchip/spear-shirq.c
+++ b/drivers/irqchip/spear-shirq.c
@@ -184,7 +184,7 @@ static struct spear_shirq *spear320_shirq_blocks[] = {
 
 static void shirq_handler(unsigned irq, struct irq_desc *desc)
 {
-   struct spear_shirq *shirq = irq_get_handler_data(irq);
+   struct spear_shirq *shirq = irq_desc_get_handler_data(desc);
u32 pend;
 
pend = readl(shirq->base + shirq->status_reg) & shirq->mask;
--
To 

[tip:irq/core] genirq: Remove the irq argument from setup_affinity()

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  a8a98eac7b238beb49b479c164303651d5a37eb6
Gitweb: http://git.kernel.org/tip/a8a98eac7b238beb49b479c164303651d5a37eb6
Author: Jiang Liu 
AuthorDate: Thu, 4 Jun 2015 12:13:30 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:25 +0200

genirq: Remove the irq argument from setup_affinity()

Unused except for the alpha wrapper, which can retrieve if from the
irq descriptor.

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Link: 
http://lkml.kernel.org/r/1433391238-19471-21-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 kernel/irq/manage.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f5b7742..886f115 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -361,8 +361,7 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
 /*
  * Generic version of the affinity autoselector.
  */
-static int
-setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
+static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 {
struct cpumask *set = irq_default_affinity;
int node = irq_desc_get_node(desc);
@@ -395,10 +394,10 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, 
struct cpumask *mask)
return 0;
 }
 #else
-static inline int
-setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask)
+/* Wrapper for ALPHA specific affinity selector magic */
+static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask)
 {
-   return irq_select_affinity(irq);
+   return irq_select_affinity(irq_desc_get_irq(d));
 }
 #endif
 
@@ -412,14 +411,14 @@ int irq_select_affinity_usr(unsigned int irq, struct 
cpumask *mask)
int ret;
 
raw_spin_lock_irqsave(>lock, flags);
-   ret = setup_affinity(irq, desc, mask);
+   ret = setup_affinity(desc, mask);
raw_spin_unlock_irqrestore(>lock, flags);
return ret;
 }
 
 #else
 static inline int
-setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
+setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 {
return 0;
 }
@@ -1256,7 +1255,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, 
struct irqaction *new)
}
 
/* Set default affinity mask once everything is setup */
-   setup_affinity(irq, desc, mask);
+   setup_affinity(desc, mask);
 
} else if (new->flags & IRQF_TRIGGER_MASK) {
unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] irqchip/mips-gic: Use access helper irq_data_get_affinity_mask()

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  72f86db4dd5eafbadd45c9092df73c49f320f638
Gitweb: http://git.kernel.org/tip/72f86db4dd5eafbadd45c9092df73c49f320f638
Author: Jiang Liu 
AuthorDate: Mon, 1 Jun 2015 16:05:38 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:27 +0200

irqchip/mips-gic: Use access helper irq_data_get_affinity_mask()

Use access helper irq_data_get_affinity_mask() to hide implementation
details of struct irq_desc.

[ tglx: Verified with coccinelle ]

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Cc: Jason Cooper 
Link: 
http://lkml.kernel.org/r/1433145945-789-30-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 drivers/irqchip/irq-mips-gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 42dbebc..e6c2df9 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -405,7 +405,7 @@ static int gic_set_affinity(struct irq_data *d, const 
struct cpumask *cpumask,
clear_bit(irq, pcpu_masks[i].pcpu_mask);
set_bit(irq, pcpu_masks[cpumask_first()].pcpu_mask);
 
-   cpumask_copy(d->affinity, cpumask);
+   cpumask_copy(irq_data_get_affinity_mask(d), cpumask);
spin_unlock_irqrestore(_lock, flags);
 
return IRQ_SET_MASK_OK_NOCOPY;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] genirq: Remove the irq argument from check_irq_resend()

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  0798abeb7eec37dcc20f252c2195fc31c41561f9
Gitweb: http://git.kernel.org/tip/0798abeb7eec37dcc20f252c2195fc31c41561f9
Author: Jiang Liu 
AuthorDate: Thu, 4 Jun 2015 12:13:27 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:24 +0200

genirq: Remove the irq argument from check_irq_resend()

It's only used in the software resend case and can be retrieved from
irq_desc if necessary.

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Link: 
http://lkml.kernel.org/r/1433391238-19471-18-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 kernel/irq/chip.c  | 2 +-
 kernel/irq/internals.h | 2 +-
 kernel/irq/manage.c| 2 +-
 kernel/irq/resend.c| 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f3c3d55..0cfbd15 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -187,7 +187,7 @@ int irq_startup(struct irq_desc *desc, bool resend)
irq_enable(desc);
}
if (resend)
-   check_irq_resend(desc, desc->irq_data.irq);
+   check_irq_resend(desc);
return ret;
 }
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 3e03824..7054947e 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -90,7 +90,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, 
struct irqaction *act
 irqreturn_t handle_irq_event(struct irq_desc *desc);
 
 /* Resending of interrupts :*/
-void check_irq_resend(struct irq_desc *desc, unsigned int irq);
+void check_irq_resend(struct irq_desc *desc);
 bool irq_wait_for_poll(struct irq_desc *desc);
 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f974485..c2e835d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -516,7 +516,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq)
/* Prevent probing on this irq: */
irq_settings_set_noprobe(desc);
irq_enable(desc);
-   check_irq_resend(desc, irq);
+   check_irq_resend(desc);
/* fall-through */
}
default:
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 9065107..32fc47c 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -53,7 +53,7 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
  *
  * Is called with interrupts disabled and desc->lock held.
  */
-void check_irq_resend(struct irq_desc *desc, unsigned int irq)
+void check_irq_resend(struct irq_desc *desc)
 {
/*
 * We do not resend level type interrupts. Level type
@@ -74,6 +74,8 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
if (!desc->irq_data.chip->irq_retrigger ||
!desc->irq_data.chip->irq_retrigger(>irq_data)) {
 #ifdef CONFIG_HARDIRQS_SW_RESEND
+   unsigned int irq = irq_desc_get_irq(desc);
+
/*
 * If the interrupt has a parent irq and runs
 * in the thread context of the parent irq,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] genirq: Remove the irq argument from note_interrupt()

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  0dcdbc97557fd8c297c4e38e9f66e304a64bae9d
Gitweb: http://git.kernel.org/tip/0dcdbc97557fd8c297c4e38e9f66e304a64bae9d
Author: Jiang Liu 
AuthorDate: Thu, 4 Jun 2015 12:13:28 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:25 +0200

genirq: Remove the irq argument from note_interrupt()

Only required for the slow path. Retrieve it from irq descriptor if
necessary.

[ tglx: Split out from combo patch. Left [try_]misrouted_irq()
untouched as there is no win in the slow path ]

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Cc: Jason Cooper 
Cc: Kevin Cernekee 
Cc: Arnd Bergmann 
Link: 
http://lkml.kernel.org/r/1433391238-19471-19-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 include/linux/irq.h   | 3 +--
 kernel/irq/chip.c | 2 +-
 kernel/irq/handle.c   | 2 +-
 kernel/irq/spurious.c | 6 --
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 92188b0..429ac26 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -487,8 +487,7 @@ extern int irq_chip_set_vcpu_affinity_parent(struct 
irq_data *data,
 #endif
 
 /* Handling of unhandled and spurious interrupts: */
-extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
-  irqreturn_t action_ret);
+extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret);
 
 
 /* Enable/disable irq debugging output: */
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 310d658..76f199d 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -328,7 +328,7 @@ void handle_nested_irq(unsigned int irq)
 
action_ret = action->thread_fn(action->irq, action->dev_id);
if (!noirqdebug)
-   note_interrupt(irq, desc, action_ret);
+   note_interrupt(desc, action_ret);
 
raw_spin_lock_irq(>lock);
irqd_clear(>irq_data, IRQD_IRQ_INPROGRESS);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 4d37b96..b6eeea8 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -176,7 +176,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct 
irqaction *action)
add_interrupt_randomness(irq, flags);
 
if (!noirqdebug)
-   note_interrupt(irq, desc, retval);
+   note_interrupt(desc, retval);
return retval;
 }
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 5378c52..3214417 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -270,9 +270,10 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
 
 #define SPURIOUS_DEFERRED  0x8000
 
-void note_interrupt(unsigned int irq, struct irq_desc *desc,
-   irqreturn_t action_ret)
+void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 {
+   unsigned int irq;
+
if (desc->istate & IRQS_POLL_INPROGRESS ||
irq_settings_is_polled(desc))
return;
@@ -396,6 +397,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
desc->last_unhandled = jiffies;
}
 
+   irq = irq_desc_get_irq(desc);
if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
int ok = misrouted_irq(irq);
if (action_ret == IRQ_NONE)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] genirq: Remove the parameter 'irq' of kstat_incr_irqs_this_cpu()

2015-07-11 Thread tip-bot for Jiang Liu
Commit-ID:  b51bf95c583bba645974348666e9b5a14c7aa3ea
Gitweb: http://git.kernel.org/tip/b51bf95c583bba645974348666e9b5a14c7aa3ea
Author: Jiang Liu 
AuthorDate: Thu, 4 Jun 2015 12:13:25 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:24 +0200

genirq: Remove the parameter 'irq' of kstat_incr_irqs_this_cpu()

The first parameter 'irq' is never used by
kstat_incr_irqs_this_cpu(). Remove it.

Signed-off-by: Jiang Liu 
Cc: Konrad Rzeszutek Wilk 
Cc: Tony Luck 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Randy Dunlap 
Cc: Yinghai Lu 
Cc: Borislav Petkov 
Link: 
http://lkml.kernel.org/r/1433391238-19471-16-git-send-email-jiang@linux.intel.com
Signed-off-by: Thomas Gleixner 
---
 kernel/irq/chip.c  | 16 
 kernel/irq/handle.c|  2 +-
 kernel/irq/internals.h |  2 +-
 kernel/irq/irqdesc.c   |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 27f4332..f3c3d55 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -315,7 +315,7 @@ void handle_nested_irq(unsigned int irq)
raw_spin_lock_irq(>lock);
 
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(>irq_data))) {
@@ -391,7 +391,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
 
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
if (unlikely(!desc->action || irqd_irq_disabled(>irq_data))) {
desc->istate |= IRQS_PENDING;
@@ -443,7 +443,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
 
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
/*
 * If its disabled or no action available
@@ -515,7 +515,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
goto out;
 
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
/*
 * If its disabled or no action available
@@ -583,7 +583,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
}
 
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
/* Start handling the irq */
desc->irq_data.chip->irq_ack(>irq_data);
@@ -646,7 +646,7 @@ void handle_edge_eoi_irq(unsigned int irq, struct irq_desc 
*desc)
goto out_eoi;
}
 
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
do {
if (unlikely(!desc->action))
@@ -675,7 +675,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 {
struct irq_chip *chip = irq_desc_get_chip(desc);
 
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
if (chip->irq_ack)
chip->irq_ack(>irq_data);
@@ -705,7 +705,7 @@ void handle_percpu_devid_irq(unsigned int irq, struct 
irq_desc *desc)
void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
irqreturn_t res;
 
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
 
if (chip->irq_ack)
chip->irq_ack(>irq_data);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6354802..4d37b96 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -30,7 +30,7 @@
 void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 {
print_irq_desc(irq, desc);
-   kstat_incr_irqs_this_cpu(irq, desc);
+   kstat_incr_irqs_this_cpu(desc);
ack_bad_irq(irq);
 }
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4834ee8..3e03824 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -191,7 +191,7 @@ static inline bool irqd_has_set(struct irq_data *d, 
unsigned int mask)
return __irqd_to_state(d) & mask;
 }
 
-static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc 
*desc)
+static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 {
__this_cpu_inc(*desc->kstat_irqs);
__this_cpu_inc(kstat.irqs_sum);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4afc457..0a2a4b6 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -582,7 +582,7 @@ int irq_set_percpu_devid(unsigned int irq)
 
 void kstat_incr_irq_this_cpu(unsigned int irq)
 {
-   kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+   kstat_incr_irqs_this_cpu(irq_to_desc(irq));
 }
 
 /**
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo 

[tip:irq/core] irqchip/sirfsoc: Fix generic chip allocation wreckage

2015-07-11 Thread tip-bot for Thomas Gleixner
Commit-ID:  d452bca82d9ff4f220afa4234418912623db4fe6
Gitweb: http://git.kernel.org/tip/d452bca82d9ff4f220afa4234418912623db4fe6
Author: Thomas Gleixner 
AuthorDate: Mon, 6 Jul 2015 10:18:29 +
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:23 +0200

irqchip/sirfsoc: Fix generic chip allocation wreckage

irq_alloc_domain_generic_chips() can only be called once for an
irqdomain. The sirfsoc init calls it twice and because the return
value is not checked it does not notice the wreckage.

The code works by chance because the first call already allocates two
chips and therefor the second call to sirfsoc_alloc_gc() operates on
the proper generic chip instance.

Use a single call and setup the two chips in the obvious correct way.

Signed-off-by: Thomas Gleixner 
Cc: Jason Cooper 
Cc: Barry Song 
Cc: linux-arm-ker...@lists.infradead.org
Cc: Olof Johansson 
Link: http://lkml.kernel.org/r/20150706101543.470696...@linutronix.de
---
 drivers/irqchip/irq-sirfsoc.c | 48 ++-
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c
index a469355..b930069 100644
--- a/drivers/irqchip/irq-sirfsoc.c
+++ b/drivers/irqchip/irq-sirfsoc.c
@@ -17,34 +17,38 @@
 #include 
 #include "irqchip.h"
 
-#define SIRFSOC_INT_RISC_MASK0  0x0018
-#define SIRFSOC_INT_RISC_MASK1  0x001C
-#define SIRFSOC_INT_RISC_LEVEL0 0x0020
-#define SIRFSOC_INT_RISC_LEVEL1 0x0024
+#define SIRFSOC_INT_RISC_MASK0 0x0018
+#define SIRFSOC_INT_RISC_MASK1 0x001C
+#define SIRFSOC_INT_RISC_LEVEL00x0020
+#define SIRFSOC_INT_RISC_LEVEL10x0024
 #define SIRFSOC_INIT_IRQ_ID0x0038
+#define SIRFSOC_INT_BASE_OFFSET0x0004
 
 #define SIRFSOC_NUM_IRQS   64
+#define SIRFSOC_NUM_BANKS  (SIRFSOC_NUM_IRQS / 32)
 
 static struct irq_domain *sirfsoc_irqdomain;
 
-static __init void
-sirfsoc_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num)
+static __init void sirfsoc_alloc_gc(void __iomem *base)
 {
-   struct irq_chip_generic *gc;
-   struct irq_chip_type *ct;
-   int ret;
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
unsigned int set = IRQ_LEVEL;
-
-   ret = irq_alloc_domain_generic_chips(sirfsoc_irqdomain, num, 1, 
"irq_sirfsoc",
-   handle_level_irq, clr, set, IRQ_GC_INIT_MASK_CACHE);
-
-   gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, irq_start);
-   gc->reg_base = base;
-   ct = gc->chip_types;
-   ct->chip.irq_mask = irq_gc_mask_clr_bit;
-   ct->chip.irq_unmask = irq_gc_mask_set_bit;
-   ct->regs.mask = SIRFSOC_INT_RISC_MASK0;
+   struct irq_chip_generic *gc;
+   struct irq_chip_type *ct;
+   int i;
+
+   irq_alloc_domain_generic_chips(sirfsoc_irqdomain, 32, 1, "irq_sirfsoc",
+  handle_level_irq, clr, set,
+  IRQ_GC_INIT_MASK_CACHE);
+
+   for (i = 0; i < SIRFSOC_NUM_BANKS; i++) {
+   gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, i * 32);
+   gc->reg_base = base + i * SIRFSOC_INT_BASE_OFFSET;
+   ct = gc->chip_types;
+   ct->chip.irq_mask = irq_gc_mask_clr_bit;
+   ct->chip.irq_unmask = irq_gc_mask_set_bit;
+   ct->regs.mask = SIRFSOC_INT_RISC_MASK0;
+   }
 }
 
 static void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs)
@@ -64,10 +68,8 @@ static int __init sirfsoc_irq_init(struct device_node *np,
panic("unable to map intc cpu registers\n");
 
sirfsoc_irqdomain = irq_domain_add_linear(np, SIRFSOC_NUM_IRQS,
-   _generic_chip_ops, base);
-
-   sirfsoc_alloc_gc(base, 0, 32);
-   sirfsoc_alloc_gc(base + 4, 32, SIRFSOC_NUM_IRQS - 32);
+ _generic_chip_ops, base);
+   sirfsoc_alloc_gc(base);
 
writel_relaxed(0, base + SIRFSOC_INT_RISC_LEVEL0);
writel_relaxed(0, base + SIRFSOC_INT_RISC_LEVEL1);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:irq/core] irqchip: Prepare for local stub header removal

2015-07-11 Thread tip-bot for Joel Porquet
Commit-ID:  41a83e06e2bb9ac46731681fd44d1e6ab184dac5
Gitweb: http://git.kernel.org/tip/41a83e06e2bb9ac46731681fd44d1e6ab184dac5
Author: Joel Porquet 
AuthorDate: Tue, 7 Jul 2015 17:11:46 -0400
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:23 +0200

irqchip: Prepare for local stub header removal

The IRQCHIP_DECLARE macro moved to to 'include/linux/irqchip.h', so
the local irqchip.h became an empty shell, which solely includes
include/linux/irqchip.h

Include the global header in all irqchip drivers instead of the local
header, so we can remove it.

Signed-off-by: Joel Porquet 
Cc: vgu...@synopsys.com
Cc: mon...@monstr.eu
Cc: r...@linux-mips.org
Cc: ja...@lakedaemon.net
Link: http://lkml.kernel.org/r/1882096.X39jVG8e0D@joel-zenbook
Signed-off-by: Thomas Gleixner 
---
 drivers/irqchip/exynos-combiner.c| 3 +--
 drivers/irqchip/irq-armada-370-xp.c  | 3 +--
 drivers/irqchip/irq-atmel-aic.c  | 2 +-
 drivers/irqchip/irq-atmel-aic5.c | 2 +-
 drivers/irqchip/irq-bcm2835.c| 3 +--
 drivers/irqchip/irq-bcm7038-l1.c | 3 +--
 drivers/irqchip/irq-bcm7120-l2.c | 3 +--
 drivers/irqchip/irq-brcmstb-l2.c | 2 --
 drivers/irqchip/irq-clps711x.c   | 3 +--
 drivers/irqchip/irq-crossbar.c   | 3 +--
 drivers/irqchip/irq-digicolor.c  | 3 +--
 drivers/irqchip/irq-dw-apb-ictl.c| 3 +--
 drivers/irqchip/irq-gic-v3-its.c | 3 +--
 drivers/irqchip/irq-gic-v3.c | 2 +-
 drivers/irqchip/irq-gic.c| 2 +-
 drivers/irqchip/irq-hip04.c  | 2 +-
 drivers/irqchip/irq-ingenic.c| 3 +--
 drivers/irqchip/irq-keystone.c   | 3 +--
 drivers/irqchip/irq-mips-cpu.c   | 3 +--
 drivers/irqchip/irq-mips-gic.c   | 3 +--
 drivers/irqchip/irq-mmp.c| 3 +--
 drivers/irqchip/irq-moxart.c | 3 +--
 drivers/irqchip/irq-mtk-sysirq.c | 3 +--
 drivers/irqchip/irq-mxs.c| 3 +--
 drivers/irqchip/irq-nvic.c   | 3 +--
 drivers/irqchip/irq-omap-intc.c  | 3 +--
 drivers/irqchip/irq-or1k-pic.c   | 3 +--
 drivers/irqchip/irq-orion.c  | 3 +--
 drivers/irqchip/irq-renesas-h8300h.c | 2 --
 drivers/irqchip/irq-renesas-h8s.c| 2 +-
 drivers/irqchip/irq-s3c24xx.c| 3 +--
 drivers/irqchip/irq-sirfsoc.c| 2 +-
 drivers/irqchip/irq-sun4i.c  | 3 +--
 drivers/irqchip/irq-sunxi-nmi.c  | 2 +-
 drivers/irqchip/irq-tb10x.c  | 2 +-
 drivers/irqchip/irq-tegra.c  | 3 +--
 drivers/irqchip/irq-versatile-fpga.c | 3 +--
 drivers/irqchip/irq-vf610-mscm-ir.c  | 3 +--
 drivers/irqchip/irq-vic.c| 3 +--
 drivers/irqchip/irq-vt8500.c | 3 +--
 drivers/irqchip/irq-xtensa-mx.c  | 3 +--
 drivers/irqchip/irq-xtensa-pic.c | 3 +--
 drivers/irqchip/irq-zevio.c  | 3 +--
 drivers/irqchip/spear-shirq.c| 3 +--
 44 files changed, 42 insertions(+), 79 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c 
b/drivers/irqchip/exynos-combiner.c
index 5c82e3b..05cdccc 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -15,13 +15,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 
-#include "irqchip.h"
-
 #define COMBINER_ENABLE_SET0x0
 #define COMBINER_ENABLE_CLEAR  0x4
 #define COMBINER_INT_STATUS0xC
diff --git a/drivers/irqchip/irq-armada-370-xp.c 
b/drivers/irqchip/irq-armada-370-xp.c
index 0d3b0fe..73b73ac 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -33,8 +34,6 @@
 #include 
 #include 
 
-#include "irqchip.h"
-
 /* Interrupt Controller Registers Map */
 #define ARMADA_370_XP_INT_SET_MASK_OFFS(0x48)
 #define ARMADA_370_XP_INT_CLEAR_MASK_OFFS  (0x4C)
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index dae3604..dbbf30a 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,7 +32,6 @@
 #include 
 
 #include "irq-atmel-aic-common.h"
-#include "irqchip.h"
 
 /* Number of irq lines managed by AIC */
 #define NR_AIC_IRQS32
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index 459bf44..ff2e832 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,7 +32,6 @@
 #include 
 
 #include "irq-atmel-aic-common.h"
-#include "irqchip.h"
 
 /* Number of irq lines managed by AIC */
 #define NR_AIC5_IRQS   128
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index e68c3b6..a36ba96 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -48,13 +48,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
 #include 
 

[tip:irq/core] irqchip/dw-apb-ictl: Fix generic domain chip wreckage

2015-07-11 Thread tip-bot for Thomas Gleixner
Commit-ID:  b66231183a8542de1414e42326dd1c6bc4af75f4
Gitweb: http://git.kernel.org/tip/b66231183a8542de1414e42326dd1c6bc4af75f4
Author: Thomas Gleixner 
AuthorDate: Mon, 6 Jul 2015 15:32:25 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 11 Jul 2015 23:14:23 +0200

irqchip/dw-apb-ictl: Fix generic domain chip wreckage

The num_ct argument of irq_alloc_domain_generic_chips() tells the core
code how many chip types (for different control flows,
e.g. edge/level) should be allocated. It does not control how many
generic chip instances are created because that's determined from the
irq domain size and the number of interrupts per chip.

The dw-apb init abuses the num_ct argument for allocating one or two
chip types depending on the number of interrupts. That's completely
wrong because the alternate type is never used.

This code was obviously never tested on a system which has more than
32 interrupts as that would have never worked due to the unitialized
second generic chip instance.

Hand in the proper num_ct=1 and fixup the chip initialization along
with the interrupt handler.

Signed-off-by: Thomas Gleixner 
Tested-by: Jisheng Zhang 
Cc: Sebastian Hesselbarth 
Cc: Mark Rutland 
Cc: Jason Cooper 
Link: http://lkml.kernel.org/r/20150706101543.373582...@linutronix.de
---
 drivers/irqchip/irq-dw-apb-ictl.c | 53 ---
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/drivers/irqchip/irq-dw-apb-ictl.c 
b/drivers/irqchip/irq-dw-apb-ictl.c
index 53bb732..ca22f4e 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -25,24 +25,25 @@
 #define APB_INT_MASK_H 0x0c
 #define APB_INT_FINALSTATUS_L  0x30
 #define APB_INT_FINALSTATUS_H  0x34
+#define APB_INT_BASE_OFFSET0x04
 
 static void dw_apb_ictl_handler(unsigned int irq, struct irq_desc *desc)
 {
-   struct irq_chip *chip = irq_get_chip(irq);
-   struct irq_chip_generic *gc = irq_get_handler_data(irq);
-   struct irq_domain *d = gc->private;
-   u32 stat;
+   struct irq_domain *d = irq_desc_get_handler_data(desc);
+   struct irq_chip *chip = irq_desc_get_chip(desc);
int n;
 
chained_irq_enter(chip, desc);
 
-   for (n = 0; n < gc->num_ct; n++) {
-   stat = readl_relaxed(gc->reg_base +
-APB_INT_FINALSTATUS_L + 4 * n);
+   for (n = 0; n < d->revmap_size; n += 32) {
+   struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, n);
+   u32 stat = readl_relaxed(gc->reg_base + APB_INT_FINALSTATUS_L);
+
while (stat) {
u32 hwirq = ffs(stat) - 1;
-   generic_handle_irq(irq_find_mapping(d,
-   gc->irq_base + hwirq + 32 * n));
+   u32 virq = irq_find_mapping(d, gc->irq_base + hwirq);
+
+   generic_handle_irq(virq);
stat &= ~(1 << hwirq);
}
}
@@ -73,7 +74,7 @@ static int __init dw_apb_ictl_init(struct device_node *np,
struct irq_domain *domain;
struct irq_chip_generic *gc;
void __iomem *iobase;
-   int ret, nrirqs, irq;
+   int ret, nrirqs, irq, i;
u32 reg;
 
/* Map the parent interrupt for the chained handler */
@@ -128,35 +129,25 @@ static int __init dw_apb_ictl_init(struct device_node *np,
goto err_unmap;
}
 
-   ret = irq_alloc_domain_generic_chips(domain, 32, (nrirqs > 32) ? 2 : 1,
-np->name, handle_level_irq, clr, 0,
-IRQ_GC_MASK_CACHE_PER_TYPE |
+   ret = irq_alloc_domain_generic_chips(domain, 32, 1, np->name,
+handle_level_irq, clr, 0,
 IRQ_GC_INIT_MASK_CACHE);
if (ret) {
pr_err("%s: unable to alloc irq domain gc\n", np->full_name);
goto err_unmap;
}
 
-   gc = irq_get_domain_generic_chip(domain, 0);
-   gc->private = domain;
-   gc->reg_base = iobase;
-
-   gc->chip_types[0].regs.mask = APB_INT_MASK_L;
-   gc->chip_types[0].regs.enable = APB_INT_ENABLE_L;
-   gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit;
-   gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit;
-   gc->chip_types[0].chip.irq_resume = dw_apb_ictl_resume;
-
-   if (nrirqs > 32) {
-   gc->chip_types[1].regs.mask = APB_INT_MASK_H;
-   gc->chip_types[1].regs.enable = APB_INT_ENABLE_H;
-   gc->chip_types[1].chip.irq_mask = irq_gc_mask_set_bit;
-   gc->chip_types[1].chip.irq_unmask = irq_gc_mask_clr_bit;
-   gc->chip_types[1].chip.irq_resume = dw_apb_ictl_resume;
+   for (i = 0; i < DIV_ROUND_UP(nrirqs, 32); i++) {
+   gc = irq_get_domain_generic_chip(domain, i * 32);
+

Re: V4.1-RC build error after commit 77a3c6f

2015-07-11 Thread Larry Finger

The subject should say V4.2-RC. Sorry.

Larry

On 07/11/2015 04:27 PM, Larry Finger wrote:

Beginning with the commit in the subject, I get the following build error:

   CC [M]  drivers/media/v4l2-core/videobuf2-core.o
drivers/media/v4l2-core/videobuf2-core.c: In function ‘vb2_warn_zero_bytesused’:
drivers/media/v4l2-core/videobuf2-core.c:1253:2: error: implicit declaration of
function ‘__WARN’ [-Werror=implicit-function-declaration]
   __WARN();
   ^
cc1: some warnings being treated as errors

My .config is attached.

This problem has been bisected to the following commit:

commit 77a3c6fd90c94f635edb00d4a65f485687538791
Author: Laurent Pinchart 
Date:   Fri Jun 19 08:50:07 2015 -0300

 [media] vb2: Don't WARN when v4l2_buffer.bytesused is 0 for multiplanar
buffers

 Commit f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the
 vb2_queue struct") added a WARN_ONCE to catch usage of a deprecated API
 using a zero value for v4l2_buffer.bytesused.

 However, the condition is checked incorrectly, as the v4L2_buffer
 bytesused field is supposed to be ignored for multiplanar buffers. This
 results in spurious warnings when using the multiplanar API.

 Fix it by checking v4l2_buffer.bytesused for uniplanar buffers and
 v4l2_plane.bytesused for multiplanar buffers.

 Fixes: f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the
vb2_queue struct")

 Signed-off-by: Laurent Pinchart 
 Cc: sta...@vger.kernel.org # for v4.0
 Signed-off-by: Mauro Carvalho Chehab 

Thanks,

Larry


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86,kvm: Add a kernel parameter to disable PV spinlock

2015-07-11 Thread Waiman Long
Xen has an kernel command line argument "xen_nopvspin" to disable
paravirtual spinlocks. This patch adds a similar "kvm_nopvspin"
argument to disable paravirtual spinlocks for KVM. This can be useful
for testing as well as allowing administrators to choose unfair lock
for their KVM guests if they want to.

Signed-off-by: Waiman Long 
---
 Documentation/kernel-parameters.txt |7 ++-
 arch/x86/kernel/kvm.c   |   15 +--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1d6f045..032d37d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1747,6 +1747,11 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
feature (tagged TLBs) on capable Intel chips.
Default is 1 (enabled)
 
+   kvm_nopvspin[X86,KVM]
+   Disables the paravirtualized spinlock slowpath
+   optimizations for KVM.
+
+
l2cr=   [PPC]
 
l3cr=   [PPC]
@@ -4091,7 +4096,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
never -- do not unplug even if version check succeeds
 
xen_nopvspin[X86,XEN]
-   Disables the ticketlock slowpath using Xen PV
+   Disables the spinlock slowpath using Xen PV
optimizations.
 
xen_nopv[X86]
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 47190bd..6373dca 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,17 @@ static void kvm_kick_cpu(int cpu)
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
 }
 
+static bool kvm_pvspin = true;
+
+/*
+ * Allow disabling of PV spinlock in kernel command line
+ */
+static __init int kvm_parse_nopvspin(char *arg)
+{
+   kvm_pvspin = false;
+   return 0;
+}
+early_param("kvm_nopvspin", kvm_parse_nopvspin);
 
 #ifdef CONFIG_QUEUED_SPINLOCKS
 
@@ -857,7 +868,7 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, 
__ticket_t ticket)
  */
 void __init kvm_spinlock_init(void)
 {
-   if (!kvm_para_available())
+   if (!kvm_para_available() || !kvm_pvspin)
return;
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
@@ -877,7 +888,7 @@ void __init kvm_spinlock_init(void)
 
 static __init int kvm_spinlock_init_jump(void)
 {
-   if (!kvm_para_available())
+   if (!kvm_para_available() || !kvm_pvspin)
return 0;
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/7] locking/qspinlock: Enhance pvqspinlock & introduce queued unfair lock

2015-07-11 Thread Waiman Long
This patchset consists of two parts:

 1) Patches 1-5 enhance the performance of PV qspinlock especially for
overcommitted guest. The first patch moves all the CPU kicking to
the unlock code. The 2nd and 3rd patches implement a kick-ahead
and wait-early mechanism that was shown to improve performance for
overcommitted guest. They are inspired by the "Do Virtual Machines
Really Scale?" blog from Sanidhya Kashyap.  The 4th patch adds
code to collect PV qspinlock statistics. The last patch adds the
pending bit support to PV qspinlock to improve performance at
light load. This is important as the PV queuing code has even
higher overhead than the native queuing code.

 2) Patches 6 introduces queued unfair lock as a replacement of the
existing unfair byte lock. The queued unfair lock is fairer
than the byte lock currently in the qspinlock while improving
performance at high contention level.  Patch 7 adds a kernel
command line option to KVM for disabling PV spinlock, similar to
the one in Xen, if the administrators choose to do so. The last
patch adds statistics collection to the queued unfair lock code.

Linux kernel builds were run in KVM guest on an 8-socket, 4
cores/socket Westmere-EX system and a 4-socket, 8 cores/socket
Haswell-EX system. So both systems have 32 physical CPUs. VM guests
(no NUMA pinning) were set up with 32, 48 and 60 vCPUs. The kernel
build times (make -j , where  was the number of vCPUs) on
various configurations were as follows:

Westere-EX (8x4):

  Kernel32 vCPUs48 vCPUs60 vCPUs
  --
  pvticketlock (4.1.1)   5m02.0s13m27.6s15m49.9s
  pvqspinlock (4.2-rc1)  3m39.9s11.17.8s12m19.9s
  patched pvqspinlock3m38.5s 9m27.8s 9m39.4s
  unfair byte lock   4m23.8s 7m14.7s 8m50.4s
  unfair queued lock 3m03.4s 3m29.7s 4m15.4s

Haswell-EX (4x8):

  Kernel32 vCPUs48 vCPUs60 vCPUs
  --
  pvticketlock (4.1.1)   1m58.9s18m57.0s20m46.1s
  pvqspinlock (4.2-rc1)  1m59.9s18m44.2s18m57.0s
  patched pvqspinlock2m01.7s 8m03.7s 8m29.5s
  unfair byte lock   2m04.5s 2m46.7s 3m15.6s
  unfair queued lock 1m59.4s 2m04.9s 2m18.6s

It can be seen that queued unfair lock has the best performance in
almost all the cases. As can be seen in patch 4, the overhead of PV
kicking and waiting is quite high. Unfair locks avoid those overhead
and spend the time on productive work instead. On the other hand,
the pvqspinlock is fair while the byte lock is not. The queued unfair
lock is kind of in the middle between those two. It is not as fair
as the pvqspinlock, but is fairer than the byte lock.

Looking at the PV locks, the pvqspinlock patch did increase performance
in the overcommitted guests by about 20% in Westmere-EX and more than
2X in Haswell-EX. More investigation may be needed to find out why
there was slowdown in Haswell-EX compared with Westmere-EX.

In conclusion, unfair lock is actually better performance-wise when a
VM guest is over-committed. If there is no over-commitment, PV locks
work fine, too.

When the VM guest was changed to NUMA pinned (direct mapping between
physical and virtual CPUs) in the Westmere-EX system, the build
times became:

  Kernel32 vCPUs
  --
  pvticketlock (4.1.1)   2m47.1s
  pvqspinlock (4.2-rc1)  2m45.9s
  patched pvqspinlock2m45.2s
  unfair byte lock   2m45.4s
  unfair queued lock 2m44.9s

It can be seen that the build times are virtually the same for all
the configurations.

Waiman Long (7):
  locking/pvqspinlock: Only kick CPU at unlock time
  locking/pvqspinlock: Allow vCPUs kick-ahead
  locking/pvqspinlock: Implement wait-early for overcommitted guest
  locking/pvqspinlock: Collect slowpath lock statistics
  locking/pvqspinlock: Add pending bit support
  locking/qspinlock: A fairer queued unfair lock
  locking/qspinlock: Collect queued unfair lock slowpath statistics

 arch/x86/Kconfig|8 +
 arch/x86/include/asm/qspinlock.h|   17 +-
 kernel/locking/qspinlock.c  |  140 ++-
 kernel/locking/qspinlock_paravirt.h |  436 ---
 kernel/locking/qspinlock_unfair.h   |  327 ++
 5 files changed, 880 insertions(+), 48 deletions(-)
 create mode 100644 kernel/locking/qspinlock_unfair.h

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/7] locking/pvqspinlock: Allow vCPUs kick-ahead

2015-07-11 Thread Waiman Long
Frequent CPU halting (vmexit) and CPU kicking (vmenter) lengthens
critical section and block forward progress.  This patch implements
a kick-ahead mechanism where the unlocker will kick the queue head
vCPUs as well as up to two additional vCPUs next to the queue head if
they were halted.  The kickings are done after exiting the critical
section to improve parallelism.

The amount of kick-ahead allowed depends on the number of vCPUs in
the VM guest.  This change should improve overall system performance
in a busy overcommitted guest.

Signed-off-by: Waiman Long 
---
 kernel/locking/qspinlock_paravirt.h |   71 ++-
 1 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index d302c39..4c1a299 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -67,6 +67,12 @@ static struct pv_hash_entry *pv_lock_hash;
 static unsigned int pv_lock_hash_bits __read_mostly;
 
 /*
+ * Allow kick-ahead of vCPUs at unlock time
+ */
+#define PV_KICK_AHEAD_MAX  4
+static int pv_kick_ahead __read_mostly;
+
+/*
  * Allocate memory for the PV qspinlock hash buckets
  *
  * This function should be called from the paravirt spinlock initialization
@@ -74,7 +80,16 @@ static unsigned int pv_lock_hash_bits __read_mostly;
  */
 void __init __pv_init_lock_hash(void)
 {
-   int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
+   int ncpus = num_possible_cpus();
+   int pv_hash_size = ALIGN(4 * ncpus, PV_HE_PER_LINE);
+   int i;
+
+   /*
+* The minimum number of vCPUs required in each kick-ahead level
+*/
+   static const u8 kick_ahead_threshold[PV_KICK_AHEAD_MAX] = {
+   4, 8, 16, 32
+   };
 
if (pv_hash_size < PV_HE_MIN)
pv_hash_size = PV_HE_MIN;
@@ -88,6 +103,18 @@ void __init __pv_init_lock_hash(void)
   pv_hash_size, 0, HASH_EARLY,
   _lock_hash_bits, NULL,
   pv_hash_size, pv_hash_size);
+   /*
+* Enable the unlock kick ahead mode according to the number of
+* vCPUs available.
+*/
+   for (i = PV_KICK_AHEAD_MAX; i > 0; i--)
+   if (ncpus >= kick_ahead_threshold[i - 1]) {
+   pv_kick_ahead = i;
+   break;
+   }
+   if (pv_kick_ahead)
+   printk(KERN_INFO "PV unlock kick ahead level %d enabled\n",
+  pv_kick_ahead);
 }
 
 #define for_each_hash_entry(he, offset, hash)  
\
@@ -317,13 +344,33 @@ static void pv_wait_head(struct qspinlock *lock, struct 
mcs_spinlock *node)
 }
 
 /*
+ * Helper to get the address of the next kickable node
+ * The node has to be in the halted state and is being transitioned to
+ * running state by this function. Otherwise, NULL will be returned.
+ */
+static inline struct pv_node *pv_get_kick_node(struct pv_node *node)
+{
+   struct pv_node *next = (struct pv_node *)READ_ONCE(node->mcs.next);
+
+   if (!next)
+   return NULL;
+
+   if ((READ_ONCE(next->state) != vcpu_halted) ||
+   (cmpxchg(>state, vcpu_halted, vcpu_running) != vcpu_halted))
+   next = NULL;/* No kicking is needed */
+
+   return next;
+}
+
+/*
  * PV version of the unlock function to be used in stead of
  * queued_spin_unlock().
  */
 __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
struct __qspinlock *l = (void *)lock;
-   struct pv_node *node;
+   struct pv_node *node, *nxt, *next[PV_KICK_AHEAD_MAX];
+   int i, nr_kick;
 
/*
 * We must not unlock if SLOW, because in that case we must first
@@ -340,6 +387,20 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
*lock)
node = pv_unhash(lock);
 
/*
+* Implement kick-ahead mode
+*
+* Access the next group of nodes, if available, and prepare to kick
+* them after releasing the lock if they are in the halted state. This
+* should improve performance on an overcommitted system.
+*/
+   for (nr_kick = 0, nxt = node; nr_kick < pv_kick_ahead;
+nxt = next[nr_kick], nr_kick++) {
+   next[nr_kick] = pv_get_kick_node(nxt);
+   if (!next[nr_kick])
+   break;
+   }
+
+   /*
 * Now that we have a reference to the (likely) blocked pv_node,
 * release the lock.
 */
@@ -354,6 +415,12 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
*lock)
 */
if (READ_ONCE(node->state) == vcpu_hashed)
pv_kick(node->cpu);
+
+   /*
+* Kick the next group of vCPUs, if available.
+*/
+   for (i = 0; i < nr_kick; i++)
+   pv_kick(next[i]->cpu);
 }
 

[PATCH 4/7] locking/pvqspinlock: Collect slowpath lock statistics

2015-07-11 Thread Waiman Long
This patch enables the accumulation of kicking and waiting related
PV qspinlock statistics when the new QUEUED_LOCK_STAT configuration
option is selected. It also enables the collection of kicking and
wakeup latencies which have a heavy dependency on the CPUs being used.

The measured latencies for different CPUs are:

CPU Wakeup  Kicking
--- --  ---
Haswell-EX  26.4us   9.2us
Westmere-EX 99.4US  25.5us

So Haswell is much faster than Westmere.

The accumulated lock statistics will be reported in debugfs under the
pv-qspinlock directory.

Signed-off-by: Waiman Long 
---
 arch/x86/Kconfig|7 ++
 kernel/locking/qspinlock_paravirt.h |  173 ++-
 2 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 55bced1..299a1c4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -679,6 +679,13 @@ config PARAVIRT_SPINLOCKS
 
  If you are unsure how to answer this question, answer Y.
 
+config QUEUED_LOCK_STAT
+   bool "Paravirt queued lock statistics"
+   depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS
+   ---help---
+ Enable the collection of statistical data on the behavior of
+ paravirtualized queued spinlocks and report them on debugfs.
+
 source "arch/x86/xen/Kconfig"
 
 config KVM_GUEST
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index b3fe5bb..efc9a72 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -60,6 +60,155 @@ struct pv_node {
 };
 
 /*
+ * PV qspinlock statistics
+ */
+enum pv_qlock_stat {
+   pvstat_wait_head,
+   pvstat_wait_node,
+   pvstat_wait_early,
+   pvstat_kick_wake,
+   pvstat_kick_cpu,
+   pvstat_kick_ahead,
+   pvstat_no_kick,
+   pvstat_spurious,
+   pvstat_hash,
+   pvstat_hops,
+   pvstat_num  /* Total number of statistics counts */
+};
+
+#ifdef CONFIG_QUEUED_LOCK_STAT
+/*
+ * Collect pvqspinlock statiatics
+ */
+#include 
+#include 
+
+static const char * const stat_fsnames[pvstat_num] = {
+   [pvstat_wait_head]   = "wait_head_count",
+   [pvstat_wait_node]   = "wait_node_count",
+   [pvstat_wait_early]  = "wait_early_count",
+   [pvstat_kick_wake]   = "kick_wake_count",
+   [pvstat_kick_cpu]= "kick_cpu_count",
+   [pvstat_kick_ahead]  = "kick_ahead_count",
+   [pvstat_no_kick] = "no_kick_count",
+   [pvstat_spurious]= "spurious_wakeup",
+   [pvstat_hash]= "hash_count",
+   [pvstat_hops]= "hash_hops_count",
+};
+
+static atomic_t pvstats[pvstat_num];
+
+/*
+ * pv_kick_latencies = sum of all pv_kick latencies in ns
+ * pv_wake_latencies = sum of all wakeup latencies in ns
+ *
+ * Avg kick latency = pv_kick_latencies/(kick_cpu_count + kick_ahead_count)
+ * Avg wake latency = pv_wake_latencies/kick_wake_count
+ */
+static atomic64_t pv_kick_latencies, pv_wake_latencies;
+static DEFINE_PER_CPU(u64, pv_kick_time);
+
+/*
+ * Reset all the statistics counts if set
+ */
+static bool reset_cnts __read_mostly;
+
+/*
+ * Initialize debugfs for the PV qspinlock statistics
+ */
+static int __init pv_qspinlock_debugfs(void)
+{
+   struct dentry *d_pvqlock = debugfs_create_dir("pv-qspinlock", NULL);
+   int i;
+
+   if (!d_pvqlock)
+   printk(KERN_WARNING
+  "Could not create 'pv-qspinlock' debugfs directory\n");
+
+   for (i = 0; i < pvstat_num; i++)
+   debugfs_create_u32(stat_fsnames[i], 0444, d_pvqlock,
+ (u32 *)[i]);
+   debugfs_create_u64("kick_latencies", 0444, d_pvqlock,
+  (u64 *)_kick_latencies);
+   debugfs_create_u64("wake_latencies", 0444, d_pvqlock,
+  (u64 *)_wake_latencies);
+   debugfs_create_bool("reset_cnts", 0644, d_pvqlock, (u32 *)_cnts);
+   return 0;
+}
+fs_initcall(pv_qspinlock_debugfs);
+
+/*
+ * Reset all the counts
+ */
+static noinline void pvstat_reset(void)
+{
+   int i;
+
+   for (i = 0; i < pvstat_num; i++)
+   atomic_set([i], 0);
+   atomic64_set(_kick_latencies, 0);
+   atomic64_set(_wake_latencies, 0);
+   reset_cnts = 0;
+}
+
+/*
+ * Increment the PV qspinlock statistics counts
+ */
+static inline void pvstat_inc(enum pv_qlock_stat stat)
+{
+   atomic_inc([stat]);
+   if (unlikely(reset_cnts))
+   pvstat_reset();
+}
+
+/*
+ * PV hash hop count
+ */
+static inline void pvstat_hop(int hopcnt)
+{
+   atomic_inc([pvstat_hash]);
+   atomic_add(hopcnt, [pvstat_hops]);
+}
+
+/*
+ * Replacement function for pv_kick()
+ */
+static inline void __pv_kick(int cpu)
+{
+   u64 start = sched_clock();
+
+   *per_cpu_ptr(_kick_time, cpu) = start;
+   pv_kick(cpu);
+   atomic64_add(sched_clock() - start, 

[PATCH 7/7] locking/qspinlock: Collect queued unfair lock slowpath statistics

2015-07-11 Thread Waiman Long
This patch enables the accumulation of unfair qspinlock statistics
when the CONFIG_QUEUED_LOCK_STAT configuration parameter is set.

The accumulated lock statistics will be reported in debugfs under
the unfair-qspinlock directory.

On a KVM guest with 32 vCPUs, the statistics counts after bootup were:

lsteal_cnts = 172219 2377 425 118 33 8 5 12 14 0 0 0
trylock_cnt = 1495372

So most of the lock stealing happened in the initial trylock before
entering the queue. Once a vCPU is in the queue, the chance of getting
the lock drop off significantly the further it is away from queue head.

Signed-off-by: Waiman Long 
---
 arch/x86/Kconfig  |7 ++-
 kernel/locking/qspinlock.c|2 +-
 kernel/locking/qspinlock_unfair.h |   89 +
 3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 299a1c4..aee6236 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -680,11 +680,12 @@ config PARAVIRT_SPINLOCKS
  If you are unsure how to answer this question, answer Y.
 
 config QUEUED_LOCK_STAT
-   bool "Paravirt queued lock statistics"
-   depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS
+   bool "Paravirt/Unfair queued lock statistics"
+   depends on DEBUG_FS && QUEUED_SPINLOCKS
---help---
  Enable the collection of statistical data on the behavior of
- paravirtualized queued spinlocks and report them on debugfs.
+ paravirtualized and unfair queued spinlocks and report them
+ on debugfs.
 
 source "arch/x86/xen/Kconfig"
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 65dead9..12e2e89 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -538,7 +538,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 #ifdef queued_spin_trylock
 #undef queued_spin_trylock
 #endif
-#define queued_spin_trylockqueued_spin_trylock_unfair
+#define queued_spin_trylock__queued_spin_trylock_unfair
 
 /*
  * The unfair lock code is used internally and so don't need to be exported
diff --git a/kernel/locking/qspinlock_unfair.h 
b/kernel/locking/qspinlock_unfair.h
index 0e8a40f..fc94578 100644
--- a/kernel/locking/qspinlock_unfair.h
+++ b/kernel/locking/qspinlock_unfair.h
@@ -44,6 +44,93 @@ struct uf_node {
u32 prev_tail;  /* Previous node tail code */
 };
 
+#ifdef CONFIG_QUEUED_LOCK_STAT
+
+#include 
+
+/*
+ * Unfair qspinlock statistics
+ *
+ * All spinning CPUs are grouped into buckets depending on the most
+ * significant bit in their lock stealing period. The first entry in
+ * the array is for the queue head.
+ */
+#define NR_LPERIOD_CNTS(LPERIOD_THRESHOLD_SHIFT - LPERIOD_MIN_SHIFT + 
6)
+static atomic_t lsteal_cnts[NR_LPERIOD_CNTS];
+
+/*
+ * # of successful trylocks at beginning of slowpath
+ */
+static atomic_t trylock_cnt;
+
+/*
+ * Counts reset flag
+ */
+static bool reset_cnts __read_mostly;
+
+/*
+ * Initialize debugfs for the unfair qspinlock statistics
+ */
+static int __init unfair_qspinlock_debugfs(void)
+{
+   struct dentry *d_ufqlock = debugfs_create_dir("unfair-qspinlock", NULL);
+
+   if (!d_ufqlock)
+   printk(KERN_WARNING
+ "Could not create 'unfair-qspinlock' debugfs directory\n");
+
+   debugfs_create_u32_array("lsteal_cnts", 0444, d_ufqlock,
+   (u32 *)lsteal_cnts, NR_LPERIOD_CNTS);
+   debugfs_create_u32("trylock_cnt", 0444, d_ufqlock, (u32 *)_cnt);
+   debugfs_create_bool("reset_cnts", 0644, d_ufqlock, (u32 *)_cnts);
+   return 0;
+}
+fs_initcall(unfair_qspinlock_debugfs);
+
+/*
+ * Reset all the statistics counts
+ */
+static noinline void reset_counts(void)
+{
+   int idx;
+
+   reset_cnts = false;
+   atomic_set(_cnt, 0);
+   for (idx = 0 ; idx < NR_LPERIOD_CNTS; idx++)
+   atomic_set(_cnts[idx], 0);
+}
+
+/*
+ * Increment the unfair qspinlock statistic count
+ */
+static inline void ustat_inc(struct uf_node *pn)
+{
+   /*
+* fls() returns the most significant 1 bit position + 1
+*/
+   int idx = fls(pn->lsteal_period) - LPERIOD_MIN_SHIFT;
+
+   if (idx >= NR_LPERIOD_CNTS)
+   idx = NR_LPERIOD_CNTS - 1;
+   atomic_inc(_cnts[idx]);
+   if (unlikely(reset_cnts))
+   reset_counts();
+}
+
+static inline bool __queued_spin_trylock_unfair(struct qspinlock *lock)
+{
+   bool ret = queued_spin_trylock_unfair(lock);
+
+   if (ret)
+   atomic_inc(_cnt);
+   return ret;
+}
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+static inline void ustat_inc(struct uf_node *pn) { }
+#define __queued_spin_trylock_unfair   queued_spin_trylock_unfair
+#endif /* CONFIG_QUEUED_LOCK_STAT */
+
 /**
  * cmpxchg_tail - Put in the new tail code if it matches the old one
  * @lock : Pointer to queue spinlock structure
@@ -125,6 +212,7 @@ static inline bool unfair_wait_node(struct 

[PATCH 6/7] locking/qspinlock: A fairer queued unfair lock

2015-07-11 Thread Waiman Long
For a virtual guest with the qspinlock patch, a simple unfair byte lock
will be used if PV spinlock is not configured in or the hypervisor
isn't either KVM or Xen. The byte lock works fine with small guest
of just a few vCPUs. On a much larger guest, however, byte lock can
have the following problems:

 1) Lock starvation is a real possibility especially if the number
of vCPUs is large.
 2) The constant reading and occasionally writing to the lock word can
put a lot of cacheline contention traffic on the affected
cacheline.

This patch introduces a queue-based unfair lock where all the vCPUs on
the queue can opportunistically steal the lock, but the frequency of
doing so decreases the further it is away from the queue head. It can
encourage a more FIFO like order of getting the lock and hence greatly
reduce the chance of lock starvation.  It can also reduce cacheline
contention problem and so improve the performance of the system.

This patch has no impact on native qspinlock performance at all. The
unfair lock code will only be compiled in if CONFIG_HYPERVISOR_GUEST
is defined.

A microbenchmark of running 1 million lock-unlock operation for various
number of threads running on a KVM guest with 32 pinned vCPUs and 4
vCPUs per node (8 nodes). This microbenchmark is intended to measure
the variability of the execution times.

Kernel  ThreadsMin/Avg/Max(ms)SD(ms)
--  --------
Unfair byte lock   4  133.1/386.0/509.0   153.48
   8 720.5/939.5/1,068.0  117.08
  162,237.8/6,045.8/7,550.3   1747.37
  325,880.2/37,028.2/44,668.7 10136.30
Unfair qspinlock   4  326.1/453.7/523.0   80.44
   8 681.6/1,126.4/1,486.5304.85
  161,543.0/3,633.4/4,568.1   1000.47
  322,356.8/7,103.3/7,894.9   1231.11

With small number of contending threads, both the performance and
variability of both types of unfair lock are similar. However, when
the number of contending threads increases, the byte lock has a much
higher variability than the unfair qspinlock.

Signed-off-by: Waiman Long 
---
 arch/x86/include/asm/qspinlock.h  |   17 ++--
 kernel/locking/qspinlock.c|   98 ++-
 kernel/locking/qspinlock_unfair.h |  238 +
 3 files changed, 340 insertions(+), 13 deletions(-)
 create mode 100644 kernel/locking/qspinlock_unfair.h

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 9d51fae..bc82ace 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -39,18 +39,19 @@ static inline void queued_spin_unlock(struct qspinlock 
*lock)
 }
 #endif
 
-#define virt_queued_spin_lock virt_queued_spin_lock
+#ifdef CONFIG_HYPERVISOR_GUEST
+#ifndef static_cpu_has_hypervisor
+#define static_cpu_has_hypervisor  static_cpu_has(X86_FEATURE_HYPERVISOR)
+#endif
 
-static inline bool virt_queued_spin_lock(struct qspinlock *lock)
+#define queued_spin_trylock_unfair queued_spin_trylock_unfair
+static inline bool queued_spin_trylock_unfair(struct qspinlock *lock)
 {
-   if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
-   return false;
-
-   while (atomic_cmpxchg(>val, 0, _Q_LOCKED_VAL) != 0)
-   cpu_relax();
+   u8 *l = (u8 *)lock;
 
-   return true;
+   return !READ_ONCE(*l) && (xchg(l, _Q_LOCKED_VAL) == 0);
 }
+#endif
 
 #include 
 
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5a25e89..65dead9 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -19,7 +19,11 @@
  *  Peter Zijlstra 
  */
 
-#ifndef _GEN_PV_LOCK_SLOWPATH
+#if defined(_GEN_PV_LOCK_SLOWPATH) || defined(_GEN_UNFAIR_LOCK_SLOWPATH)
+#define _GEN_LOCK_SLOWPATH
+#endif
+
+#ifndef _GEN_LOCK_SLOWPATH
 
 #include 
 #include 
@@ -68,7 +72,7 @@
 
 #include "mcs_spinlock.h"
 
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#ifdef CONFIG_HYPERVISOR_GUEST
 #define MAX_NODES  8
 #else
 #define MAX_NODES  4
@@ -81,6 +85,7 @@
  * Exactly fits one 64-byte cacheline on a 64-bit architecture.
  *
  * PV doubles the storage and uses the second cacheline for PV state.
+ * Unfair lock (mutually exclusive to PV) also uses the second cacheline.
  */
 static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
 
@@ -277,7 +282,18 @@ static __always_inline void __pv_wait_head(struct 
qspinlock *lock,
 #define queued_spin_lock_slowpath  native_queued_spin_lock_slowpath
 #endif
 
-#endif /* _GEN_PV_LOCK_SLOWPATH */
+#ifdef CONFIG_HYPERVISOR_GUEST
+static void unfair_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+#else
+static __always_inline void
+unfair_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { }
+#endif
+
+#ifndef static_cpu_has_hypervisor
+#define 

[PATCH 3/7] locking/pvqspinlock: Implement wait-early for overcommitted guest

2015-07-11 Thread Waiman Long
In an overcommitted guest where some vCPUs have to be halted to make
forward progress in other areas, it is highly likely that a vCPU
later in the spinlock queue will be spinning while the ones earlier in
the queue would have been halted already. The spinning in the later
vCPUs is then just a waste of precious CPU cycles because they are
not going to get the lock soon as the earlier ones have to be woken
up and take their turn to get the lock.

This patch implements a wait-early mechanism where the vCPU will
call pv_wait() earlier if the previous vCPU is in the halted state
already. In this case, it will spin less before calling pv_wait(). On
the other hand, if the previous vCPU was running and then becomes
halted, the current vCPU will call pv_wait() immmediately in this case.

This patch also separates the spin threshold for queue head and
queue nodes.  It favors the queue head by allowing it to spin longer
before calling pv_wait().

Signed-off-by: Waiman Long 
---
 kernel/locking/qspinlock.c  |5 ++-
 kernel/locking/qspinlock_paravirt.h |   52 +--
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index d2e0fc1..782bc18 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -238,7 +238,8 @@ static __always_inline void set_locked(struct qspinlock 
*lock)
  */
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
+static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
+  struct mcs_spinlock *prev) { }
 static __always_inline void __pv_scan_next(struct qspinlock *lock,
   struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
@@ -391,7 +392,7 @@ queue:
prev = decode_tail(old);
WRITE_ONCE(prev->next, node);
 
-   pv_wait_node(node);
+   pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(>locked);
}
 
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 4c1a299..b3fe5bb 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,6 +22,26 @@
 #define _Q_SLOW_VAL(3U << _Q_LOCKED_OFFSET)
 
 /*
+ * Queued Spinlock Spin Thresholds
+ * ---
+ * Because of the cacheline contention effect of the ticket spinlock, the
+ * same spin threshold for queued spinlock will run a bit faster. So we set
+ * a slight larger threshold for the queue head (1.25X) while the other queue
+ * nodes will keep the same threshold.
+ *
+ * A queue node vCPU will spin less if the vCPU in the previous node is halted.
+ * The queue node vCPU will also monitor the state of the previous node
+ * periodically if it is not halted. When the previous node vCPU transitions
+ * from active to halted, the current one will go to halted state too. It is
+ * because it takes quite a lot of cycles for a vCPU to perform vmexit and
+ * vmenter. So it is better for the current vCPU to go be halted too.
+ */
+#define QHEAD_SPIN_THRESHOLD   (SPIN_THRESHOLD + (SPIN_THRESHOLD/4))
+#define QNODE_SPIN_THRESHOLD   SPIN_THRESHOLD
+#define QNODE_SPIN_THRESHOLD_SHORT (QNODE_SPIN_THRESHOLD >> 4)
+#define QNODE_SPIN_CHECK_MASK  0xff
+
+/*
  * Queue node uses: vcpu_running & vcpu_halted.
  * Queue head uses: vcpu_running & vcpu_hashed.
  */
@@ -187,15 +207,41 @@ static void pv_init_node(struct mcs_spinlock *node)
  * pv_scan_next() is used to set _Q_SLOW_VAL and fill in hash table on its
  * behalf.
  */
-static void pv_wait_node(struct mcs_spinlock *node)
+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
 {
struct pv_node *pn = (struct pv_node *)node;
+   struct pv_node *pp = (struct pv_node *)prev;
+   bool prev_halted;
int loop;
 
for (;;) {
-   for (loop = SPIN_THRESHOLD; loop; loop--) {
+   /*
+* Spin less if the previous vCPU was in the halted state
+*/
+   prev_halted = (READ_ONCE(pp->state) != vcpu_running);
+   loop = prev_halted ? QNODE_SPIN_THRESHOLD_SHORT
+  : QNODE_SPIN_THRESHOLD;
+   while (loop--) {
if (READ_ONCE(node->locked))
return;
+   /*
+* Look for state transition at previous node.
+*
+* running => halted:
+*  call pv_wait() now to halt current vCPU
+* halted => running:
+*  reset spin threshold to QNODE_SPIN_THRESHOLD
+*/
+  

[PATCH 5/7] locking/pvqspinlock: Add pending bit support

2015-07-11 Thread Waiman Long
Like the native qspinlock, using the pending bit when it is lightly
loaded to acquire the lock is faster than going through the PV queuing
process which is even slower than the native queuing process. It also
avoids loading two additional cachelines (the MCS and PV nodes).

This patch adds the pending bit support for PV qspinlock. The
pending bit code has a smaller spin threshold. It will default back
to the queuing method if it cannot acquired the lock within a certain
time limit.

Signed-off-by: Waiman Long 
---
 kernel/locking/qspinlock.c  |   27 +++-
 kernel/locking/qspinlock_paravirt.h |   61 +++
 2 files changed, 87 insertions(+), 1 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 782bc18..5a25e89 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -162,6 +162,17 @@ static __always_inline void 
clear_pending_set_locked(struct qspinlock *lock)
WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
 }
 
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+   struct __qspinlock *l = (void *)lock;
+
+   WRITE_ONCE(l->pending, 0);
+}
+
 /*
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
@@ -193,6 +204,15 @@ static __always_inline void 
clear_pending_set_locked(struct qspinlock *lock)
 }
 
 /**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+   atomic_add(-_Q_PENDING_VAL, >val);
+}
+
+/**
  * xchg_tail - Put in the new queue tail code word & retrieve previous one
  * @lock : Pointer to queued spinlock structure
  * @tail : The new queue tail code word
@@ -246,6 +266,7 @@ static __always_inline void __pv_wait_head(struct qspinlock 
*lock,
   struct mcs_spinlock *node) { }
 
 #define pv_enabled()   false
+#define pv_pending_lock(l, v)  false
 
 #define pv_init_node   __pv_init_node
 #define pv_wait_node   __pv_wait_node
@@ -287,8 +308,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 
val)
 
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
-   if (pv_enabled())
+   if (pv_enabled()) {
+   if (pv_pending_lock(lock, val))
+   return; /* Got the lock via pending bit */
goto queue;
+   }
 
if (virt_queued_spin_lock(lock))
return;
@@ -464,6 +488,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 #undef pv_wait_node
 #undef pv_scan_next
 #undef pv_wait_head
+#undef pv_pending_lock
 
 #undef  queued_spin_lock_slowpath
 #define queued_spin_lock_slowpath  __pv_queued_spin_lock_slowpath
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index efc9a72..d770694 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -40,6 +40,7 @@
 #define QNODE_SPIN_THRESHOLD   SPIN_THRESHOLD
 #define QNODE_SPIN_THRESHOLD_SHORT (QNODE_SPIN_THRESHOLD >> 4)
 #define QNODE_SPIN_CHECK_MASK  0xff
+#define PENDING_SPIN_THRESHOLD QNODE_SPIN_THRESHOLD_SHORT
 
 /*
  * Queue node uses: vcpu_running & vcpu_halted.
@@ -70,6 +71,8 @@ enum pv_qlock_stat {
pvstat_kick_cpu,
pvstat_kick_ahead,
pvstat_no_kick,
+   pvstat_pend_lock,
+   pvstat_pend_fail,
pvstat_spurious,
pvstat_hash,
pvstat_hops,
@@ -91,6 +94,8 @@ static const char * const stat_fsnames[pvstat_num] = {
[pvstat_kick_cpu]= "kick_cpu_count",
[pvstat_kick_ahead]  = "kick_ahead_count",
[pvstat_no_kick] = "no_kick_count",
+   [pvstat_pend_lock]   = "pending_lock_count",
+   [pvstat_pend_fail]   = "pending_fail_count",
[pvstat_spurious]= "spurious_wakeup",
[pvstat_hash]= "hash_count",
[pvstat_hops]= "hash_hops_count",
@@ -355,6 +360,62 @@ static void pv_init_node(struct mcs_spinlock *node)
 }
 
 /*
+ * Try to acquire the lock and wait using the pending bit
+ */
+static int pv_pending_lock(struct qspinlock *lock, u32 val)
+{
+   int loop = PENDING_SPIN_THRESHOLD;
+   u32 new, old;
+
+   /*
+* wait for in-progress pending->locked hand-overs
+*/
+   if (val == _Q_PENDING_VAL) {
+   while (((val = atomic_read(>val)) == _Q_PENDING_VAL) &&
+   loop--)
+   cpu_relax();
+   }
+
+   /*
+* trylock || pending
+*/
+   for (;;) {
+   if (val & ~_Q_LOCKED_MASK)
+   goto queue;
+   new = _Q_LOCKED_VAL;
+   if (val == new)
+   new |= _Q_PENDING_VAL;
+   old = 

[PATCH 1/7] locking/pvqspinlock: Only kick CPU at unlock time

2015-07-11 Thread Waiman Long
For an over-committed guest with more vCPUs than physical CPUs
available, it is possible that a vCPU may be kicked twice before
getting the lock - one before it becomes queue head and once before
it gets the lock. All these CPU kicking and halting (VMEXIT) can be
expensive and slow down system performance.

This patch adds a new vCPU state (vcpu_hashed) which enables the code
to delay CPU kicking until at unlock time. Once this state is set,
the new lock holder will set _Q_SLOW_VAL and fill in the hash table
on behalf of the halted queue head vCPU. The original vcpu_halted
state will be used by pv_wait_node() only to differentiate other
queue nodes from the qeue head.

Signed-off-by: Waiman Long 
---
 kernel/locking/qspinlock.c  |   10 ++--
 kernel/locking/qspinlock_paravirt.h |   83 ++-
 2 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38c4920..d2e0fc1 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock 
*lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_scan_next(struct qspinlock *lock,
+  struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
   struct mcs_spinlock *node) { }
 
@@ -248,7 +248,7 @@ static __always_inline void __pv_wait_head(struct qspinlock 
*lock,
 
 #define pv_init_node   __pv_init_node
 #define pv_wait_node   __pv_wait_node
-#define pv_kick_node   __pv_kick_node
+#define pv_scan_next   __pv_scan_next
 #define pv_wait_head   __pv_wait_head
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
@@ -440,7 +440,7 @@ queue:
cpu_relax();
 
arch_mcs_spin_unlock_contended(>locked);
-   pv_kick_node(next);
+   pv_scan_next(lock, next);
 
 release:
/*
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
 #undef pv_init_node
 #undef pv_wait_node
-#undef pv_kick_node
+#undef pv_scan_next
 #undef pv_wait_head
 
 #undef  queued_spin_lock_slowpath
diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 04ab181..d302c39 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -21,9 +21,14 @@
 
 #define _Q_SLOW_VAL(3U << _Q_LOCKED_OFFSET)
 
+/*
+ * Queue node uses: vcpu_running & vcpu_halted.
+ * Queue head uses: vcpu_running & vcpu_hashed.
+ */
 enum vcpu_state {
vcpu_running = 0,
-   vcpu_halted,
+   vcpu_halted,/* Used only in pv_wait_node */
+   vcpu_hashed,/* = pv_hash'ed + vcpu_halted */
 };
 
 struct pv_node {
@@ -152,7 +157,8 @@ static void pv_init_node(struct mcs_spinlock *node)
 
 /*
  * Wait for node->locked to become true, halt the vcpu after a short spin.
- * pv_kick_node() is used to wake the vcpu again.
+ * pv_scan_next() is used to set _Q_SLOW_VAL and fill in hash table on its
+ * behalf.
  */
 static void pv_wait_node(struct mcs_spinlock *node)
 {
@@ -171,9 +177,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
 *
 * [S] pn->state = vcpu_halted[S] next->locked = 1
 * MB MB
-* [L] pn->locked   [RmW] pn->state = vcpu_running
+* [L] pn->locked   [RmW] pn->state = vcpu_hashed
 *
-* Matches the xchg() from pv_kick_node().
+* Matches the cmpxchg() from pv_scan_next().
 */
smp_store_mb(pn->state, vcpu_halted);
 
@@ -181,9 +187,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
pv_wait(>state, vcpu_halted);
 
/*
-* Reset the vCPU state to avoid unncessary CPU kicking
+* Reset the state except when vcpu_hashed is set.
 */
-   WRITE_ONCE(pn->state, vcpu_running);
+   cmpxchg(>state, vcpu_halted, vcpu_running);
 
/*
 * If the locked flag is still not set after wakeup, it is a
@@ -193,6 +199,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 * MCS lock will be released soon.
 */
}
+
/*
 * By now our node->locked should be 1 and our caller will not actually
 * spin-wait for it. We do however rely on our caller to do a
@@ -201,24 +208,32 @@ static void pv_wait_node(struct mcs_spinlock *node)
 }
 
 /*
- * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * Called after setting 

Re: [PATCH] MIPS: ath79: irq: Remove the include of drivers/irqchip/irqchip.h

2015-07-11 Thread Thomas Gleixner
On Wed, 8 Jul 2015, Alban Bedel wrote:
> We shouldn't include irqchip.h from outside of the drivers/irqchip
> directory. The irq driver should idealy be there, however this not
> trivial at the moment. We still need to support platforms without DT
> support and the interface to the DDR controller still use a custom
> arch specific API.
> 
> For now just redefine the IRQCHIP_DECLARE macro to avoid the cross
> tree include.

The macro has been moved to linux/irqchip.h.

But even if it would still be in drivers/irqchip such a redefine is
even worse than the ../../... include. And the proper solution from
the very beginning would have been to move the macro to the global
header instead of this horrible include.

Sigh,

tglx

 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] arm64, mm: Use flush_tlb_all_local() in flush_context().

2015-07-11 Thread David Daney
From: David Daney 

When CONFIG_SMP, we end up calling flush_context() on each CPU
(indirectly) from __new_context().  Because of this, doing a broadcast
TLB invalidate is overkill, as all CPUs will be doing a local
invalidation.

Change the scope of the TLB invalidation operation to be local,
resulting in nr_cpus invalidations, rather than nr_cpus^2.

On CPUs with a large ASID space this operation is not often done.
But, when it is, this reduces the overhead.

Benchmarked "time make -j48" kernel build with and without the patch on
Cavium ThunderX system, one run to warm up the caches, and then five
runs measured:

original  with-patch
139.299s  139.0766s
S.D. 0.321S.D. 0.159

Probably a little faster, but could be measurement noise.

Signed-off-by: David Daney 
---
 arch/arm64/mm/context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 76c1e6c..ab5b8d3 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -48,7 +48,7 @@ static void flush_context(void)
 {
/* set the reserved TTBR0 before flushing the TLB */
cpu_set_reserved_ttbr0();
-   flush_tlb_all();
+   flush_tlb_all_local();
if (icache_is_aivivt())
__flush_icache_all();
 }
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] arm64, mm: Use IPIs for TLB invalidation.

2015-07-11 Thread David Daney
From: David Daney 

This patch set (or something like it) is needed for the Cavium
ThunderX, but its performance improvements may make it compelling on
its own merits.

Summery: On ThunerX we cannot use broadcast TLB invalidation, so we
use IPIs where necessary.  The funny thing is that it also happens to
make workloads similar to kernel builds much faster.

David Daney (3):
  arm64, mm: Add flush_tlb_all_local() function.
  arm64, mm: Use flush_tlb_all_local() in flush_context().
  arm64, mm: Use IPIs for TLB invalidation.

 arch/arm64/include/asm/tlbflush.h | 64 ---
 arch/arm64/mm/context.c   |  2 +-
 arch/arm64/mm/flush.c | 46 
 3 files changed, 59 insertions(+), 53 deletions(-)

-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] arm64, mm: Add flush_tlb_all_local() function.

2015-07-11 Thread David Daney
From: David Daney 

To be used in follow-on patch.

Signed-off-by: David Daney 
---
 arch/arm64/include/asm/tlbflush.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/include/asm/tlbflush.h 
b/arch/arm64/include/asm/tlbflush.h
index 934815d..42c09ec 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -105,6 +105,13 @@ static inline void __flush_tlb_range(struct vm_area_struct 
*vma,
dsb(ish);
 }
 
+static inline void flush_tlb_all_local(void)
+{
+   dsb(ishst);
+   asm("tlbi   vmalle1");
+   isb();
+}
+
 static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long 
end)
 {
unsigned long addr;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] arm64, mm: Use IPIs for TLB invalidation.

2015-07-11 Thread David Daney
From: David Daney 

Most broadcast TLB invalidations are unnecessary.  So when
invalidating for a given mm/vma target the only the needed CPUs via
and IPI.

For global TLB invalidations, also use IPI.

Tested on Cavium ThunderX.

This change reduces 'time make -j48' on kernel from 139s to 116s (83%
as long).

The patch is needed because of a ThunderX Pass1 erratum: Exclusive
store operations unreliable in the presence of broadcast TLB
invalidations.  The performance improvements shown make it compelling
even without the erratum workaround need.

Signed-off-by: David Daney 
---
 arch/arm64/include/asm/tlbflush.h | 67 ++-
 arch/arm64/mm/flush.c | 46 +++
 2 files changed, 56 insertions(+), 57 deletions(-)

diff --git a/arch/arm64/include/asm/tlbflush.h 
b/arch/arm64/include/asm/tlbflush.h
index 42c09ec..2c132b0 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -63,46 +63,22 @@
  * only require the D-TLB to be invalidated.
  * - kaddr - Kernel virtual memory address
  */
-static inline void flush_tlb_all(void)
-{
-   dsb(ishst);
-   asm("tlbi   vmalle1is");
-   dsb(ish);
-   isb();
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-   unsigned long asid = (unsigned long)ASID(mm) << 48;
+void flush_tlb_all(void);
 
-   dsb(ishst);
-   asm("tlbi   aside1is, %0" : : "r" (asid));
-   dsb(ish);
-}
+void flush_tlb_mm(struct mm_struct *mm);
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
  unsigned long uaddr)
 {
-   unsigned long addr = uaddr >> 12 |
-   ((unsigned long)ASID(vma->vm_mm) << 48);
-
-   dsb(ishst);
-   asm("tlbi   vae1is, %0" : : "r" (addr));
-   dsb(ish);
+   /* Simplify to entire mm. */
+   flush_tlb_mm(vma->vm_mm);
 }
 
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 unsigned long start, unsigned long end)
 {
-   unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
-   unsigned long addr;
-   start = asid | (start >> 12);
-   end = asid | (end >> 12);
-
-   dsb(ishst);
-   for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-   asm("tlbi vae1is, %0" : : "r"(addr));
-   dsb(ish);
+   /* Simplify to entire mm. */
+   flush_tlb_mm(vma->vm_mm);
 }
 
 static inline void flush_tlb_all_local(void)
@@ -112,40 +88,17 @@ static inline void flush_tlb_all_local(void)
isb();
 }
 
-static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long 
end)
-{
-   unsigned long addr;
-   start >>= 12;
-   end >>= 12;
-
-   dsb(ishst);
-   for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-   asm("tlbi vaae1is, %0" : : "r"(addr));
-   dsb(ish);
-   isb();
-}
-
-/*
- * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
- * necessarily a performance improvement.
- */
-#define MAX_TLB_RANGE  (1024UL << PAGE_SHIFT)
-
 static inline void flush_tlb_range(struct vm_area_struct *vma,
   unsigned long start, unsigned long end)
 {
-   if ((end - start) <= MAX_TLB_RANGE)
-   __flush_tlb_range(vma, start, end);
-   else
-   flush_tlb_mm(vma->vm_mm);
+   /* Simplify to entire mm. */
+   flush_tlb_mm(vma->vm_mm);
 }
 
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long 
end)
 {
-   if ((end - start) <= MAX_TLB_RANGE)
-   __flush_tlb_kernel_range(start, end);
-   else
-   flush_tlb_all();
+   /* Simplify to all. */
+   flush_tlb_all();
 }
 
 /*
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 4dfa397..45f24d3 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -27,6 +28,51 @@
 
 #include "mm.h"
 
+static void flush_tlb_local(void *info)
+{
+   asm volatile("\n"
+"  tlbivmalle1\n"
+"  isb sy"
+   );
+}
+
+static void flush_tlb_mm_local(void *info)
+{
+   unsigned long asid = (unsigned long)info;
+
+   asm volatile("\n"
+"  tlbiaside1, %0\n"
+"  isb sy"
+: : "r" (asid)
+   );
+}
+
+void flush_tlb_all(void)
+{
+   /* Make sure page table modifications are visible. */
+   dsb(ishst);
+   /* IPI to all CPUs to do local flush. */
+   on_each_cpu(flush_tlb_local, NULL, 1);
+
+}
+EXPORT_SYMBOL(flush_tlb_all);
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+   if (!mm) {
+   flush_tlb_all();
+   } else {
+   unsigned long asid = (unsigned long)ASID(mm) << 48;
+   /* Make sure page table 

Re: [BUG] mellanox IB driver fails to load on large config

2015-07-11 Thread Or Gerlitz
On Fri, Jul 10, 2015 at 10:15 PM, andrew banman  wrote:
> I'm seeing a large number of allocation errors originating from the Mellanox 
> IB
> driver when booting the 4.2-rc1 kernel on a 4096cpu 32TB memory system:

Just to make sure, mlx4 works fine on this small (...) system with 4.1
and 4.2-rc1 breaks, or 4.2-rc1 is the 1st time you're trying that
config?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] selinux: fix mprotect PROT_EXEC regression caused by mm change

2015-07-11 Thread Hugh Dickins
On Fri, 10 Jul 2015, Stephen Smalley wrote:

> commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip
> security check and lockdep conflict with XFS") caused a regression for
> SELinux by disabling any SELinux checking of mprotect PROT_EXEC on
> shared anonymous mappings.  However, even before that regression, the
> checking on such mprotect PROT_EXEC calls was inconsistent with the
> checking on a mmap PROT_EXEC call for a shared anonymous mapping.  On a
> mmap, the security hook is passed a NULL file and knows it is dealing with
> an anonymous mapping and therefore applies an execmem check and no file
> checks.  On a mprotect, the security hook is passed a vma with a
> non-NULL vm_file (as this was set from the internally-created shmem
> file during mmap) and therefore applies the file-based execute check and
> no execmem check.  Since the aforementioned commit now marks the shmem
> zero inode with the S_PRIVATE flag, the file checks are disabled and
> we have no checking at all on mprotect PROT_EXEC.  Add a test to
> the mprotect hook logic for such private inodes, and apply an execmem
> check in that case.  This makes the mmap and mprotect checking consistent
> for shared anonymous mappings, as well as for /dev/zero and ashmem.
> 
> Signed-off-by: Stephen Smalley 

Thank you for correcting that, Stephen (and for the nicely detailed
commit description): it looks right to me so I'll say

Acked-by: Hugh Dickins 

but I know far too little of SElinux, and its defaults, to confirm
whether it actually does all you need - I'll trust you on that.

(There being various other references to the file in file_map_prot_check()
and selinux_file_mprotect(), and I couldn't tell if they should or should
not be modified by IS_PRIVATE(file_inode(file) checks too: my best guess
was that they wouldn't matter.)

> ---
>  security/selinux/hooks.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 6231081..564079c 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -3283,7 +3283,8 @@ static int file_map_prot_check(struct file *file, 
> unsigned long prot, int shared
>   int rc = 0;
>  
>   if (default_noexec &&
> - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE {
> + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
> +(!shared && (prot & PROT_WRITE {
>   /*
>* We are making executable an anonymous mapping or a
>* private file mapping that will also be writable.
> -- 
> 2.1.0
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/3] usb: dwc3: gadget: return error if command sent to DEPCMD register fails

2015-07-11 Thread Felipe Balbi
Hi,

On Sat, Jul 11, 2015 at 05:17:32PM +, Subbaraya Sundeep Bhatta wrote:
> > > >> Hi Felipe,
> > > >>
> > > >> Just an update on this.
> > > >>
> > > >> I'm trying to get this working with our latest IP with dwc3 from
> > > >> your testing/next branch. It fails the usbtest with a problem
> > > >> unrelated to this patch.
> > > >>.
> > > >> It passes on 4.1.1.
> > > >>
> > > >> I'll have to look into the failure but I won't get to it until next
> > > >> week as I'm off the rest of this week.
> > > >
> > > > interesting... If you could post failure signature, I can help
> > > > looking at it, but I guess it's too late to ask :-)
> > > >
> > > > thanks for helping though
> > > >
> > >
> > >
> > > Hi Felipe,
> > >
> > > Nevermind about my issue, it ended up being a setup-related problem.
> > >
> > > I actually do see the same error as you due to this series of patches.
> > > Except I see it happening before even the first iteration. I get a
> > > completion status of 1 for the Set Endpoint Transfer Resources
> > > command. I'm not sure why this is.
> > >
> > > I don't see any conflict with any previous Transfer Complete.
> 
> Same behavior at my end too. Fails before first iteration and I get
> completion status of 1 for Set Endpoint Resource command. Attached the
> logs of testing done with this patch and without this patch.
> Without this patch I often see completion status of 1 for Set Endpoint
> Transfer Resources command for Bulk and Isoc endpoints but test
> proceeds because driver just logs command completion status and moves
> on. We can revert this patch for time being. IP version is 2.90a.

yeah, that's what I mean, it really seems like it's the IP misbehaving.

John, let's try to figure out what's the root cause of this, we really
want to use command completion status at some point, but for now we need
to revert the patch :-(

Let me know if you want me to log STARS ticket on your solvnet system.

cheers

-- 
balbi


signature.asc
Description: Digital signature


Re: [PATCH RESEND] iio: adc: rockchip_saradc: add missing MODULE_* data

2015-07-11 Thread Heiko Stübner
Hi Jonathan,

Am Samstag, 11. Juli 2015, 18:32:42 schrieb Jonathan Cameron:
> On 08/07/15 15:17, Heiko Stuebner wrote:
> > The module-data is currently missing. This includes the
> > license-information
> > which makes the driver taint the kernel and miss symbols when compiled as
> > module.
> > 
> > Fixes: 44d6f2ef94f9 ("iio: adc: add driver for Rockchip saradc")
> > Signed-off-by: Heiko Stuebner 
> 
> Sorry Heiko,
> 
> Not entirely sure why I haven't picked this up before.
> 
> Anyhow, now applied to the fixes-for-4.2 branch of iio.git
> and marked for stable.  I need to catch up with a bit of a
> backlog, but should get a pull request out to Greg sometime
> early next week.

really no problem. I track my patches and generally simply keep pestering 
people for as long as it takes ;-)

Heiko
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re:

2015-07-11 Thread Mustapha Abiola



0001-Fix-redundant-check-against-unsigned-int-in-broken-a.patch
Description: Binary data


Re: [PATCH v2] net: dsa: mv88e6xxx: add write access to debugfs regs file

2015-07-11 Thread Vivien Didelot
Hi David,

On Jul 11, 2015, at 2:01 AM, David da...@davemloft.net wrote:

> From: Vivien Didelot 
> Date: Thu,  9 Jul 2015 17:13:29 -0400
> 
>> Allow write access to the regs file in the debugfs interface, with the
>> following parameters:
>> 
>> echo> regs
>> 
>> Where "name" is the register name (as shown in the header row), "reg" is
>> the register address (as shown in the first column) and "value" is the
>> 16-bit value. e.g.:
>> 
>> echo GLOBAL 1a 5550 > regs
>> 
>> Signed-off-by: Vivien Didelot 
> 
> I don't know about this.
> 
> This starts to smell like a back door for proprietary userspace SDKs to
> program the switch hardware.
> 
> Yes, they can do it via other mechanisms, but we don't have to make it
> any eaiser for them either.

I agree with you and I wouldn't want that neither.

> If you want to poke registers, hack the module just like any other
> person with appropriate privileges can do.

I'm not sure what you mean. Keeping some custom patches in our local tree?

> Frankly, all of this debugfs crap in the DSA drivers smells like poo.
> I don't like it _AT_ _ALL_, and I shouldn't have allowed any of it
> into the tree in the first place.
> 
> I might just remove it all myself, it bothers me so much.
> 
> Fetching information should be done by well typed, generic, interfaces
> that apply to any similar device or object.  All of this debugfs stuff
> smells of hacks and special case crap that's only usable for one
> device type and that makes it the single most terrible interface to
> give to users.

In the meantime, this is really useful for development. i.e. ensuring a good
switchdev/DSA interaction without being able to read and write directly the
hardware VLAN table, is a bit a PITA. A dynamic debugfs looked appropriate.

On the other hand, the mv88e6xxx driver gets cluttered with all this code. I'd
gladly move all this code in a mv88e6xxx-debugfs.c file, and conditionally
compile it with:

mv88e6xxx_drv-$(CONFIG_DEBUG_FS) += mv88e6xxx-debugfs.o

similar to what the i2400m driver does.

Would that be appreciated?

Thanks,
-v
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341

2015-07-11 Thread Arjan van de Ven

On 7/11/2015 11:26 AM, Porteus Kiosk wrote:

Hello Arjan,

We need it for setting up the time in the hardware clock through the 'hwclock' 
command.

Thank you.



hmm thinking about it after coffee... there is an RTC that can be exposed to 
userspace.
hrmpf. Wonder why its not there for you




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341

2015-07-11 Thread Arjan van de Ven

On 7/11/2015 11:21 AM, Arjan van de Ven wrote:

On 7/11/2015 10:59 AM, Larry Finger wrote:

On a Winbook TW100 BayTrail tablet, kernel 4.0 and later do not create 
/dev/rtc0 when CONFIG_PINCTRL_BAYTRAIL is set in the configuration. Removing 
this option from the
config creates a real-time clock; however, it is no longer possible to get the 
tablet to sleep using the power button. Only complete shutdown works.

This problem was bisected to the following commit:


in "hardware reduced mode" (e.g. tablets) on Baytrail the RTC is not actually 
enabled/initialized by the firmware; talking to it may appear to work but it's really not
a good idea (and breaks things likes suspend/resume etc).


(or in other words, many of the legacy PC things are not supposed to be there)

what did you want to use rtc0 for?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341

2015-07-11 Thread Arjan van de Ven

On 7/11/2015 10:59 AM, Larry Finger wrote:

On a Winbook TW100 BayTrail tablet, kernel 4.0 and later do not create 
/dev/rtc0 when CONFIG_PINCTRL_BAYTRAIL is set in the configuration. Removing 
this option from the
config creates a real-time clock; however, it is no longer possible to get the 
tablet to sleep using the power button. Only complete shutdown works.

This problem was bisected to the following commit:


in "hardware reduced mode" (e.g. tablets) on Baytrail the RTC is not actually 
enabled/initialized by the firmware; talking to it may appear to work but it's really not
a good idea (and breaks things likes suspend/resume etc).



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] drm/atomic: fix null dereference

2015-07-11 Thread Rob Clark
On Sat, Jul 11, 2015 at 1:24 PM, Sudip Mukherjee
 wrote:
> We are checking the size of e->event but we were doing it when e is
> known to be NULL.

nak, this will leak event_space..  since it is a sizeof, it isn't
actually deref'ing e, but rather just using the static type info, so
it's ok (although perhaps funny looking)

BR,
-R


> Signed-off-by: Sudip Mukherjee 
> ---
>  drivers/gpu/drm/drm_atomic.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
> index acebd16..51d3a85 100644
> --- a/drivers/gpu/drm/drm_atomic.c
> +++ b/drivers/gpu/drm/drm_atomic.c
> @@ -1311,7 +1311,6 @@ static struct drm_pending_vblank_event 
> *create_vblank_event(
> e = kzalloc(sizeof *e, GFP_KERNEL);
> if (e == NULL) {
> spin_lock_irqsave(>event_lock, flags);
> -   file_priv->event_space += sizeof e->event;
> spin_unlock_irqrestore(>event_lock, flags);
> goto out;
> }
> --
> 1.8.1.2
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] xen/blkfront: convert to blk-mq APIs

2015-07-11 Thread Jens Axboe

On 07/11/2015 07:30 AM, Bob Liu wrote:

Note: This patch is based on original work of Arianna's internship for
GNOME's Outreach Program for Women.


Great to see this finally get prepped to go in!


Only one hardware queue is used now, so there is no performance change.


I would hope that the blk-mq path, even with one queue, is a perf win 
over the old interface. So I'm not sure that is correct. But the bigger 
win will be with more queues, of course.



The legacy non-mq code is deleted completely which is the same as other
drivers like virtio, mtip, and nvme.

Also dropped one unnecessary holding of info->io_lock when calling
blk_mq_stop_hw_queues().

Changes in v2:
  - Reorganized blk_mq_queue_rq()
  - Restored most io_locks in place


Looks good to me. The most common error case is the busy-out not 
stopping queues, or not restarting them at completion. But that all 
looks fine.


I would, however, rename blk_mq_queue_rq(). It sounds like a core 
function. blkif_queue_rq() would be more appropriate.



Signed-off-by: Arianna Avanzini 
Signed-off-by: Bob Liu 


Acked-by: Jens Axboe 


--
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 02/11] blkcg: use blkg_free() in blkcg_init_queue() failure path

2015-07-11 Thread Tejun Heo
When blkcg_init_queue() fails midway after creating a new blkg, it
performs kfree() directly; however, this doesn't free the policy data
areas.  Make it use blkg_free() instead.  In turn, blkg_free() is
updated to handle root request_list special case.

While this fixes a possible memory leak, it's on an unlikely failure
path of an already cold path and the size leaked per occurrence is
miniscule too.  I don't think it needs to be tagged for -stable.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fbb0b65..64cc48f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -70,7 +70,8 @@ static void blkg_free(struct blkcg_gq *blkg)
for (i = 0; i < BLKCG_MAX_POLS; i++)
kfree(blkg->pd[i]);
 
-   blk_exit_rl(>rl);
+   if (blkg->blkcg != _root)
+   blk_exit_rl(>rl);
kfree(blkg);
 }
 
@@ -934,7 +935,7 @@ int blkcg_init_queue(struct request_queue *q)
radix_tree_preload_end();
 
if (IS_ERR(blkg)) {
-   kfree(new_blkg);
+   blkg_free(new_blkg);
return PTR_ERR(blkg);
}
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCHSET v3 block/for-4.3] blkcg: blkcg policy methods and data handling cleanup

2015-07-11 Thread Tejun Heo
This is v3 of blkcg_policy methods cleanup patchset.  Changes from the
last take [L] are

* Rebased on top of block/for-linus.

* 0003-blkcg-remove-unnecessary-blkcg_root-handling-from-cs.patch and
  0004-blkcg-restructure-blkg_policy_data-allocation-in-blk.patch
  added.  These are follow-up cleanups for the blkcg_policy_data
  handling fixes which went into block/for-linus.

* 0010-blkcg-cosmetic-updates-about-blkcg_policy_data.patch and
  0011-blkcg-replace-blkcg_policy-cpd_size-with-cpd_alloc-f.patch
  added so that blkcg_policy_data handling is consistent with
  blkg_policy_data handling.

This patchset contains assorted cleanups for blkcg_policy methods and
blk[c]g_policy_data handling.

* alloc/free added for blkg_policy_data.  exit dropped.

* alloc/free added for blkcg_policy_data.

* blk-throttle's async percpu allocation is replaced with direct
  allocation.

* all methods now take blk[c]g_policy_data instead of blkcg_gq or
  blkcg.

This patchset contains the following 11 patches.

 0001-blkcg-remove-unnecessary-request_list-blkg-NULL-test.patch
 0002-blkcg-use-blkg_free-in-blkcg_init_queue-failure-path.patch
 0003-blkcg-remove-unnecessary-blkcg_root-handling-from-cs.patch
 0004-blkcg-restructure-blkg_policy_data-allocation-in-blk.patch
 0005-blkcg-make-blkcg_activate_policy-allow-NULL-pd_init_.patch
 0006-blkcg-replace-blkcg_policy-pd_size-with-pd_alloc-fre.patch
 0007-blk-throttle-remove-asynchrnous-percpu-stats-allocat.patch
 0008-blk-throttle-clean-up-blkg_policy_data-alloc-init-ex.patch
 0009-blkcg-make-blkcg_policy-methods-take-a-pointer-to-bl.patch
 0010-blkcg-cosmetic-updates-about-blkcg_policy_data.patch
 0011-blkcg-replace-blkcg_policy-cpd_size-with-cpd_alloc-f.patch

0001-0005 are misc cleanups.  0006-0008 add alloc/free methods and
remove blk-throttle's async percpu allocation mechanism.  0009 makes
all methods take blkcg_policy_data.  0010-0011 apply similar cleanups
to blkcg_policy_data handling.

This patchset is also available in the following git branch.

 git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git 
review-blkcg-methods-cleanup

and is on top of

  block/for-linus 06b285bd1125 ("blkcg: fix blkcg_policy_data allocation bug")
+ [1] [PATCHSET block/for-4.3] writeback: cgroup writeback updates
+ [2] [PATCHSET v2 block/for-4.3] block, cgroup: make cfq charge async IOs to 
the appropriate blkcgs

diffstat follows, thanks.

 block/blk-cgroup.c |  171 +++-
 block/blk-throttle.c   |  173 +
 block/cfq-iosched.c|   68 +
 include/linux/blk-cgroup.h |   65 
 4 files changed, 214 insertions(+), 263 deletions(-)

--
tejun

[L] http://lkml.kernel.org/g/1436284293-4666-1-git-send-email...@kernel.org
[1] http://lkml.kernel.org/g/1436281823-1947-1-git-send-email...@kernel.org
[2] http://lkml.kernel.org/g/1436283361-3889-1-git-send-email...@kernel.org
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 04/11] blkcg: restructure blkg_policy_data allocation in blkcg_activate_policy()

2015-07-11 Thread Tejun Heo
When a policy gets activated, it needs to allocate and install its
policy data on all existing blkg's (blkcg_gq's).  Because blkg
iteration is protected by a spinlock, it currently counts the total
number of blkg's in the system, allocates the matching number of
policy data on a list and installs them during a single iteration.

This can be simplified by using speculative GFP_NOWAIT allocations
while iterating and falling back to a preallocated policy data on
failure.  If the preallocated one has already been consumed, it
releases the lock, preallocate with GFP_KERNEL and then restarts the
iteration.  This can be a bit more expensive than before but policy
activation is a very cold path and shouldn't matter.

Signed-off-by: Tejun Heo 
---
 block/blk-cgroup.c | 55 ++
 include/linux/blk-cgroup.h |  3 ---
 2 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2a493ce..5dbbacd 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1043,65 +1043,52 @@ EXPORT_SYMBOL_GPL(blkio_cgrp_subsys);
 int blkcg_activate_policy(struct request_queue *q,
  const struct blkcg_policy *pol)
 {
-   LIST_HEAD(pds);
+   struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg;
-   struct blkg_policy_data *pd, *nd;
-   int cnt = 0, ret;
+   int ret;
 
if (blkcg_policy_enabled(q, pol))
return 0;
 
-   /* count and allocate policy_data for all existing blkgs */
blk_queue_bypass_start(q);
-   spin_lock_irq(q->queue_lock);
-   list_for_each_entry(blkg, >blkg_list, q_node)
-   cnt++;
-   spin_unlock_irq(q->queue_lock);
-
-   /* allocate per-blkg policy data for all existing blkgs */
-   while (cnt--) {
-   pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
-   if (!pd) {
+pd_prealloc:
+   if (!pd_prealloc) {
+   pd_prealloc = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
+   if (!pd_prealloc) {
ret = -ENOMEM;
-   goto out_free;
+   goto out_bypass_end;
}
-   list_add_tail(>alloc_node, );
}
 
-   /*
-* Install the allocated pds and cpds. With @q bypassing, no new blkg
-* should have been created while the queue lock was dropped.
-*/
spin_lock_irq(q->queue_lock);
 
list_for_each_entry(blkg, >blkg_list, q_node) {
-   if (WARN_ON(list_empty())) {
-   /* umm... this shouldn't happen, just abort */
-   ret = -ENOMEM;
-   goto out_unlock;
-   }
-   pd = list_first_entry(, struct blkg_policy_data, 
alloc_node);
-   list_del_init(>alloc_node);
+   struct blkg_policy_data *pd;
 
-   /* grab blkcg lock too while installing @pd on @blkg */
-   spin_lock(>blkcg->lock);
+   if (blkg->pd[pol->plid])
+   continue;
+
+   pd = kzalloc_node(pol->pd_size, GFP_NOWAIT, q->node);
+   if (!pd)
+   swap(pd, pd_prealloc);
+   if (!pd) {
+   spin_unlock_irq(q->queue_lock);
+   goto pd_prealloc;
+   }
 
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
pol->pd_init_fn(blkg);
-
-   spin_unlock(>blkcg->lock);
}
 
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
-out_unlock:
+
spin_unlock_irq(q->queue_lock);
-out_free:
+out_bypass_end:
blk_queue_bypass_end(q);
-   list_for_each_entry_safe(pd, nd, , alloc_node)
-   kfree(pd);
+   kfree(pd_prealloc);
return ret;
 }
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 9711fc2..db82288 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -80,9 +80,6 @@ struct blkg_policy_data {
/* the blkg and policy id this per-policy data belongs to */
struct blkcg_gq *blkg;
int plid;
-
-   /* used during policy activation */
-   struct list_headalloc_node;
 };
 
 /*
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 07/11] blk-throttle: remove asynchrnous percpu stats allocation mechanism

2015-07-11 Thread Tejun Heo
Because percpu allocator couldn't do non-blocking allocations,
blk-throttle was forced to implement an ad-hoc asynchronous allocation
mechanism for its percpu stats for cases where blkg's (blkcg_gq's) are
allocated from an IO path without sleepable context.

Now that percpu allocator can handle gfp_mask and blkg_policy_data
alloc / free are handled by policy methods, the ad-hoc asynchronous
allocation mechanism can be replaced with direct allocation from
tg_stats_alloc_fn().  Rit it out.

This ensures that an active throtl_grp always has valid non-NULL
->stats_cpu.  Remove checks on it.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-throttle.c | 112 ---
 1 file changed, 25 insertions(+), 87 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f1dd691..3c86976 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -144,9 +144,6 @@ struct throtl_grp {
 
/* Per cpu stats pointer */
struct tg_stats_cpu __percpu *stats_cpu;
-
-   /* List of tgs waiting for per cpu stats memory to be allocated */
-   struct list_head stats_alloc_node;
 };
 
 struct throtl_data
@@ -168,13 +165,6 @@ struct throtl_data
struct work_struct dispatch_work;
 };
 
-/* list and work item to allocate percpu group stats */
-static DEFINE_SPINLOCK(tg_stats_alloc_lock);
-static LIST_HEAD(tg_stats_alloc_list);
-
-static void tg_stats_alloc_fn(struct work_struct *);
-static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn);
-
 static void throtl_pending_timer_fn(unsigned long arg);
 
 static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
@@ -256,53 +246,6 @@ static struct throtl_data *sq_to_td(struct 
throtl_service_queue *sq)
}   \
 } while (0)
 
-static void tg_stats_init(struct tg_stats_cpu *tg_stats)
-{
-   blkg_rwstat_init(_stats->service_bytes);
-   blkg_rwstat_init(_stats->serviced);
-}
-
-/*
- * Worker for allocating per cpu stat for tgs. This is scheduled on the
- * system_wq once there are some groups on the alloc_list waiting for
- * allocation.
- */
-static void tg_stats_alloc_fn(struct work_struct *work)
-{
-   static struct tg_stats_cpu *stats_cpu;  /* this fn is non-reentrant */
-   struct delayed_work *dwork = to_delayed_work(work);
-   bool empty = false;
-
-alloc_stats:
-   if (!stats_cpu) {
-   int cpu;
-
-   stats_cpu = alloc_percpu(struct tg_stats_cpu);
-   if (!stats_cpu) {
-   /* allocation failed, try again after some time */
-   schedule_delayed_work(dwork, msecs_to_jiffies(10));
-   return;
-   }
-   for_each_possible_cpu(cpu)
-   tg_stats_init(per_cpu_ptr(stats_cpu, cpu));
-   }
-
-   spin_lock_irq(_stats_alloc_lock);
-
-   if (!list_empty(_stats_alloc_list)) {
-   struct throtl_grp *tg = list_first_entry(_stats_alloc_list,
-struct throtl_grp,
-stats_alloc_node);
-   swap(tg->stats_cpu, stats_cpu);
-   list_del_init(>stats_alloc_node);
-   }
-
-   empty = list_empty(_stats_alloc_list);
-   spin_unlock_irq(_stats_alloc_lock);
-   if (!empty)
-   goto alloc_stats;
-}
-
 static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
 {
INIT_LIST_HEAD(>node);
@@ -405,7 +348,27 @@ static void throtl_service_queue_exit(struct 
throtl_service_queue *sq)
 
 static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 {
-   return kzalloc_node(sizeof(struct throtl_grp), gfp, node);
+   struct throtl_grp *tg;
+   int cpu;
+
+   tg = kzalloc_node(sizeof(*tg), gfp, node);
+   if (!tg)
+   return NULL;
+
+   tg->stats_cpu = alloc_percpu_gfp(struct tg_stats_cpu, gfp);
+   if (!tg->stats_cpu) {
+   kfree(tg);
+   return NULL;
+   }
+
+   for_each_possible_cpu(cpu) {
+   struct tg_stats_cpu *stats_cpu = per_cpu_ptr(tg->stats_cpu, 
cpu);
+
+   blkg_rwstat_init(_cpu->service_bytes);
+   blkg_rwstat_init(_cpu->serviced);
+   }
+
+   return >pd;
 }
 
 static void throtl_pd_init(struct blkcg_gq *blkg)
@@ -413,7 +376,6 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_data *td = blkg->q->td;
struct throtl_service_queue *parent_sq;
-   unsigned long flags;
int rw;
 
/*
@@ -448,16 +410,6 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
tg->bps[WRITE] = -1;
tg->iops[READ] = -1;
tg->iops[WRITE] = -1;
-
-   /*
-* Ugh... We need to perform per-cpu allocation for tg->stats_cpu
-* but percpu 

[PATCH 06/11] blkcg: replace blkcg_policy->pd_size with ->pd_alloc/free_fn() methods

2015-07-11 Thread Tejun Heo
A blkg (blkcg_gq) represents the relationship between a cgroup and
request_queue.  Each active policy has a pd (blkg_policy_data) on each
blkg.  The pd's were allocated by blkcg core and each policy could
request to allocate extra space at the end by setting
blkcg_policy->pd_size larger than the size of pd.

This is a bit unusual but was done this way mostly to simplify error
handling and all the existing use cases could be handled this way;
however, this is becoming too restrictive now that percpu memory can
be allocated without blocking.

This introduces two new mandatory blkcg_policy methods - pd_alloc_fn()
and pd_free_fn() - which are used to allocate and release pd for a
given policy.  As pd allocation is now done from policy side, it can
simply allocate a larger area which embeds pd at the beginning.  This
change makes ->pd_size pointless.  Removed.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 21 +++--
 block/blk-throttle.c   | 13 -
 block/cfq-iosched.c| 13 -
 include/linux/blk-cgroup.h | 18 +-
 4 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b558705..9d83623 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -68,7 +68,8 @@ static void blkg_free(struct blkcg_gq *blkg)
return;
 
for (i = 0; i < BLKCG_MAX_POLS; i++)
-   kfree(blkg->pd[i]);
+   if (blkg->pd[i])
+   blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
if (blkg->blkcg != _root)
blk_exit_rl(>rl);
@@ -114,7 +115,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, 
struct request_queue *q,
continue;
 
/* alloc per-policy data and attach it to blkg */
-   pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
+   pd = pol->pd_alloc_fn(gfp_mask, q->node);
if (!pd)
goto err_free;
 
@@ -1053,7 +1054,7 @@ int blkcg_activate_policy(struct request_queue *q,
blk_queue_bypass_start(q);
 pd_prealloc:
if (!pd_prealloc) {
-   pd_prealloc = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
+   pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
if (!pd_prealloc) {
ret = -ENOMEM;
goto out_bypass_end;
@@ -1068,7 +1069,7 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkg->pd[pol->plid])
continue;
 
-   pd = kzalloc_node(pol->pd_size, GFP_NOWAIT, q->node);
+   pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node);
if (!pd)
swap(pd, pd_prealloc);
if (!pd) {
@@ -1089,7 +1090,8 @@ int blkcg_activate_policy(struct request_queue *q,
spin_unlock_irq(q->queue_lock);
 out_bypass_end:
blk_queue_bypass_end(q);
-   kfree(pd_prealloc);
+   if (pd_prealloc)
+   pol->pd_free_fn(pd_prealloc);
return ret;
 }
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1124,8 +1126,10 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (pol->pd_exit_fn)
pol->pd_exit_fn(blkg);
 
-   kfree(blkg->pd[pol->plid]);
-   blkg->pd[pol->plid] = NULL;
+   if (blkg->pd[pol->plid]) {
+   pol->pd_free_fn(blkg->pd[pol->plid]);
+   blkg->pd[pol->plid] = NULL;
+   }
 
spin_unlock(>blkcg->lock);
}
@@ -1147,9 +1151,6 @@ int blkcg_policy_register(struct blkcg_policy *pol)
struct blkcg *blkcg;
int i, ret;
 
-   if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
-   return -EINVAL;
-
mutex_lock(_pol_register_mutex);
mutex_lock(_pol_mutex);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b231935..f1dd691 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -403,6 +403,11 @@ static void throtl_service_queue_exit(struct 
throtl_service_queue *sq)
del_timer_sync(>pending_timer);
 }
 
+static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
+{
+   return kzalloc_node(sizeof(struct throtl_grp), gfp, node);
+}
+
 static void throtl_pd_init(struct blkcg_gq *blkg)
 {
struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -493,6 +498,11 @@ static void throtl_pd_exit(struct blkcg_gq *blkg)
throtl_service_queue_exit(>service_queue);
 }
 
+static void throtl_pd_free(struct blkg_policy_data *pd)
+{
+   kfree(pd);
+}
+
 static void throtl_pd_reset_stats(struct blkcg_gq *blkg)
 {
struct throtl_grp *tg = blkg_to_tg(blkg);
@@ -1468,12 +1478,13 @@ static void throtl_shutdown_wq(struct request_queue *q)
 }
 
 static struct blkcg_policy blkcg_policy_throtl = {
-   .pd_size= sizeof(struct throtl_grp),

[PATCH 05/11] blkcg: make blkcg_activate_policy() allow NULL ->pd_init_fn

2015-07-11 Thread Tejun Heo
blkg_create() allows NULL ->pd_init_fn() but blkcg_activate_policy()
doesn't.  As both in-kernel policies implement ->pd_init_fn, it
currently doesn't break anything.  Update blkcg_activate_policy() so
that its behavior is consistent with blkg_create().

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5dbbacd..b558705 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1079,7 +1079,8 @@ int blkcg_activate_policy(struct request_queue *q,
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
-   pol->pd_init_fn(blkg);
+   if (pol->pd_init_fn)
+   pol->pd_init_fn(blkg);
}
 
__set_bit(pol->plid, q->blkcg_pols);
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 09/11] blkcg: make blkcg_policy methods take a pointer to blkcg_policy_data

2015-07-11 Thread Tejun Heo
The newly added ->pd_alloc_fn() and ->pd_free_fn() deal with pd
(blkg_policy_data) while the older ones use blkg (blkcg_gq).  As using
blkg doesn't make sense for ->pd_alloc_fn() and after allocation pd
can always be mapped to blkg and given that these are policy-specific
methods, it makes sense to converge on pd.

This patch makes all methods deal with pd instead of blkg.  Most
conversions are trivial.  In blk-cgroup.c, a couple method invocation
sites now test whether pd exists instead of policy state for
consistency.  This shouldn't cause any behavioral differences.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 18 --
 block/blk-throttle.c   | 13 +++--
 block/cfq-iosched.c| 14 +++---
 include/linux/blk-cgroup.h |  8 
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e509bc8..d18cdb6 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -242,7 +242,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_policy *pol = blkcg_policy[i];
 
if (blkg->pd[i] && pol->pd_init_fn)
-   pol->pd_init_fn(blkg);
+   pol->pd_init_fn(blkg->pd[i]);
}
 
/* insert */
@@ -256,7 +256,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_policy *pol = blkcg_policy[i];
 
if (blkg->pd[i] && pol->pd_online_fn)
-   pol->pd_online_fn(blkg);
+   pol->pd_online_fn(blkg->pd[i]);
}
}
blkg->online = true;
@@ -347,7 +347,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
struct blkcg_policy *pol = blkcg_policy[i];
 
if (blkg->pd[i] && pol->pd_offline_fn)
-   pol->pd_offline_fn(blkg);
+   pol->pd_offline_fn(blkg->pd[i]);
}
blkg->online = false;
 
@@ -468,9 +468,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state 
*css,
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
 
-   if (blkcg_policy_enabled(blkg->q, pol) &&
-   pol->pd_reset_stats_fn)
-   pol->pd_reset_stats_fn(blkg);
+   if (blkg->pd[i] && pol->pd_reset_stats_fn)
+   pol->pd_reset_stats_fn(blkg->pd[i]);
}
}
 
@@ -1072,7 +1071,7 @@ int blkcg_activate_policy(struct request_queue *q,
pd->blkg = blkg;
pd->plid = pol->plid;
if (pol->pd_init_fn)
-   pol->pd_init_fn(blkg);
+   pol->pd_init_fn(pd);
}
 
__set_bit(pol->plid, q->blkcg_pols);
@@ -1112,10 +,9 @@ void blkcg_deactivate_policy(struct request_queue *q,
/* grab blkcg lock too while removing @pd from @blkg */
spin_lock(>blkcg->lock);
 
-   if (pol->pd_offline_fn)
-   pol->pd_offline_fn(blkg);
-
if (blkg->pd[pol->plid]) {
+   if (pol->pd_offline_fn)
+   pol->pd_offline_fn(blkg->pd[pol->plid]);
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c3a235b..c2c7547 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -377,9 +377,10 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, 
int node)
return >pd;
 }
 
-static void throtl_pd_init(struct blkcg_gq *blkg)
+static void throtl_pd_init(struct blkg_policy_data *pd)
 {
-   struct throtl_grp *tg = blkg_to_tg(blkg);
+   struct throtl_grp *tg = pd_to_tg(pd);
+   struct blkcg_gq *blkg = tg_to_blkg(tg);
struct throtl_data *td = blkg->q->td;
struct throtl_service_queue *sq = >service_queue;
 
@@ -417,13 +418,13 @@ static void tg_update_has_rules(struct throtl_grp *tg)
(tg->bps[rw] != -1 || tg->iops[rw] != -1);
 }
 
-static void throtl_pd_online(struct blkcg_gq *blkg)
+static void throtl_pd_online(struct blkg_policy_data *pd)
 {
/*
 * We don't want new groups to escape the limits of its ancestors.
 * Update has_rules[] after a new group is brought online.
 */
-   tg_update_has_rules(blkg_to_tg(blkg));
+   tg_update_has_rules(pd_to_tg(pd));
 }
 
 static void throtl_pd_free(struct blkg_policy_data *pd)
@@ -435,9 +436,9 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
kfree(tg);
 }
 
-static void throtl_pd_reset_stats(struct blkcg_gq *blkg)
+static void throtl_pd_reset_stats(struct blkg_policy_data *pd)
 {
-   struct throtl_grp *tg = blkg_to_tg(blkg);
+   

[PATCH 08/11] blk-throttle: clean up blkg_policy_data alloc/init/exit/free methods

2015-07-11 Thread Tejun Heo
With the recent addition of alloc and free methods, things became
messier.  This patch reorganizes them according to the followings.

* ->pd_alloc_fn()

  Responsible for allocation and static initializations - the ones
  which can be done independent of where the pd might be attached.

* ->pd_init_fn()

  Initializations which require the knowledge of where the pd is
  attached.

* ->pd_free_fn()

  The counter part of pd_alloc_fn().  Static de-init and freeing.

This leaves ->pd_exit_fn() without any users.  Removed.

While at it, collapse an one liner function throtl_pd_exit(), which
has only one user, into its user.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 11 -
 block/blk-throttle.c   | 57 --
 block/cfq-iosched.c| 15 
 include/linux/blk-cgroup.h |  2 --
 4 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9d83623..e509bc8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -402,15 +402,6 @@ static void blkg_destroy_all(struct request_queue *q)
 void __blkg_release_rcu(struct rcu_head *rcu_head)
 {
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, 
rcu_head);
-   int i;
-
-   /* tell policies that this one is being freed */
-   for (i = 0; i < BLKCG_MAX_POLS; i++) {
-   struct blkcg_policy *pol = blkcg_policy[i];
-
-   if (blkg->pd[i] && pol->pd_exit_fn)
-   pol->pd_exit_fn(blkg);
-   }
 
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(>blkcg->css);
@@ -1123,8 +1114,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
if (pol->pd_offline_fn)
pol->pd_offline_fn(blkg);
-   if (pol->pd_exit_fn)
-   pol->pd_exit_fn(blkg);
 
if (blkg->pd[pol->plid]) {
pol->pd_free_fn(blkg->pd[pol->plid]);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 3c86976..c3a235b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -330,26 +330,19 @@ static struct bio *throtl_pop_queued(struct list_head 
*queued,
 }
 
 /* init a service_queue, assumes the caller zeroed it */
-static void throtl_service_queue_init(struct throtl_service_queue *sq,
- struct throtl_service_queue *parent_sq)
+static void throtl_service_queue_init(struct throtl_service_queue *sq)
 {
INIT_LIST_HEAD(>queued[0]);
INIT_LIST_HEAD(>queued[1]);
sq->pending_tree = RB_ROOT;
-   sq->parent_sq = parent_sq;
setup_timer(>pending_timer, throtl_pending_timer_fn,
(unsigned long)sq);
 }
 
-static void throtl_service_queue_exit(struct throtl_service_queue *sq)
-{
-   del_timer_sync(>pending_timer);
-}
-
 static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 {
struct throtl_grp *tg;
-   int cpu;
+   int rw, cpu;
 
tg = kzalloc_node(sizeof(*tg), gfp, node);
if (!tg)
@@ -361,6 +354,19 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, 
int node)
return NULL;
}
 
+   throtl_service_queue_init(>service_queue);
+
+   for (rw = READ; rw <= WRITE; rw++) {
+   throtl_qnode_init(>qnode_on_self[rw], tg);
+   throtl_qnode_init(>qnode_on_parent[rw], tg);
+   }
+
+   RB_CLEAR_NODE(>rb_node);
+   tg->bps[READ] = -1;
+   tg->bps[WRITE] = -1;
+   tg->iops[READ] = -1;
+   tg->iops[WRITE] = -1;
+
for_each_possible_cpu(cpu) {
struct tg_stats_cpu *stats_cpu = per_cpu_ptr(tg->stats_cpu, 
cpu);
 
@@ -375,8 +381,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 {
struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_data *td = blkg->q->td;
-   struct throtl_service_queue *parent_sq;
-   int rw;
+   struct throtl_service_queue *sq = >service_queue;
 
/*
 * If on the default hierarchy, we switch to properly hierarchical
@@ -391,25 +396,10 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 * Limits of a group don't interact with limits of other groups
 * regardless of the position of the group in the hierarchy.
 */
-   parent_sq = >service_queue;
-
+   sq->parent_sq = >service_queue;
if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent)
-   parent_sq = _to_tg(blkg->parent)->service_queue;
-
-   throtl_service_queue_init(>service_queue, parent_sq);
-
-   for (rw = READ; rw <= WRITE; rw++) {
-   throtl_qnode_init(>qnode_on_self[rw], tg);
-   throtl_qnode_init(>qnode_on_parent[rw], tg);
-   }
-
-   RB_CLEAR_NODE(>rb_node);
+   sq->parent_sq = _to_tg(blkg->parent)->service_queue;
tg->td = td;
-
-   tg->bps[READ] = -1;
-   

[PATCH 11/11] blkcg: replace blkcg_policy->cpd_size with ->cpd_alloc/free_fn() methods

2015-07-11 Thread Tejun Heo
Each active policy has a cpd (blkcg_policy_data) on each blkcg.  The
cpd's were allocated by blkcg core and each policy could request to
allocate extra space at the end by setting blkcg_policy->cpd_size
larger than the size of cpd.

This is a bit unusual but blkg (blkcg_gq) policy data used to be
handled this way too so it made sense to be consistent; however, blkg
policy data switched to alloc/free callbacks.

This patch makes similar changes to cpd handling.
blkcg_policy->cpd_alloc/free_fn() are added to replace ->cpd_size.  As
cpd allocation is now done from policy side, it can simply allocate a
larger area which embeds cpd at the beginning.

As ->cpd_alloc_fn() may be able to perform all necessary
initializations, this patch makes ->cpd_init_fn() optional.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
Cc: Arianna Avanzini 
---
 block/blk-cgroup.c | 39 ---
 block/cfq-iosched.c| 19 ++-
 include/linux/blk-cgroup.h | 17 ++---
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8173e06..48d95ca 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -813,11 +813,15 @@ static void blkcg_css_free(struct cgroup_subsys_state 
*css)
int i;
 
mutex_lock(_pol_mutex);
+
list_del(>all_blkcgs_node);
-   mutex_unlock(_pol_mutex);
 
for (i = 0; i < BLKCG_MAX_POLS; i++)
-   kfree(blkcg->cpd[i]);
+   if (blkcg->cpd[i])
+   blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
+
+   mutex_unlock(_pol_mutex);
+
kfree(blkcg);
 }
 
@@ -850,18 +854,18 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 * check if the policy requires any specific per-cgroup
 * data: if it does, allocate and initialize it.
 */
-   if (!pol || !pol->cpd_size)
+   if (!pol || !pol->cpd_alloc_fn)
continue;
 
-   BUG_ON(blkcg->cpd[i]);
-   cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+   cpd = pol->cpd_alloc_fn(GFP_KERNEL);
if (!cpd) {
ret = ERR_PTR(-ENOMEM);
goto free_pd_blkcg;
}
blkcg->cpd[i] = cpd;
cpd->plid = i;
-   pol->cpd_init_fn(cpd);
+   if (pol->cpd_init_fn)
+   pol->cpd_init_fn(cpd);
}
 
spin_lock_init(>lock);
@@ -877,7 +881,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 
 free_pd_blkcg:
for (i--; i >= 0; i--)
-   kfree(blkcg->cpd[i]);
+   if (blkcg->cpd[i])
+   blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
 free_blkcg:
kfree(blkcg);
mutex_unlock(_pol_mutex);
@@ -1154,11 +1159,11 @@ int blkcg_policy_register(struct blkcg_policy *pol)
blkcg_policy[pol->plid] = pol;
 
/* allocate and install cpd's */
-   if (pol->cpd_size) {
+   if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) {
struct blkcg_policy_data *cpd;
 
-   cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+   cpd = pol->cpd_alloc_fn(GFP_KERNEL);
if (!cpd) {
mutex_unlock(_pol_mutex);
goto err_free_cpds;
@@ -1180,10 +1185,12 @@ int blkcg_policy_register(struct blkcg_policy *pol)
return 0;
 
 err_free_cpds:
-   if (pol->cpd_size) {
+   if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) {
-   kfree(blkcg->cpd[pol->plid]);
-   blkcg->cpd[pol->plid] = NULL;
+   if (blkcg->cpd[pol->plid]) {
+   pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+   blkcg->cpd[pol->plid] = NULL;
+   }
}
}
blkcg_policy[pol->plid] = NULL;
@@ -1216,10 +1223,12 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
/* remove cpds and unregister */
mutex_lock(_pol_mutex);
 
-   if (pol->cpd_size) {
+   if (pol->cpd_alloc_fn) {
list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) {
-   kfree(blkcg->cpd[pol->plid]);
-   blkcg->cpd[pol->plid] = NULL;
+   if (blkcg->cpd[pol->plid]) {
+   pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+   blkcg->cpd[pol->plid] = NULL;
+   }
}
}
blkcg_policy[pol->plid] = NULL;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dd6ea9e..a4429b3 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1568,6 +1568,16 @@ static void 

[PATCH 03/11] blkcg: remove unnecessary blkcg_root handling from css_alloc/free paths

2015-07-11 Thread Tejun Heo
blkcg_css_alloc() bypasses policy data allocation and blkcg_css_free()
bypasses policy data and blkcg freeing for blkcg_root.  There's no
reason to to treat policy data any differently for blkcg_root.  If the
root css gets allocated after policies are registered, policy
registration path will add policy data; otherwise, the alloc path
will.  The free path isn't never invoked for root csses.

This patch removes the unnecessary special handling of blkcg_root from
css_alloc/free paths.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 block/blk-cgroup.c | 25 ++---
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 64cc48f..2a493ce 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -819,18 +819,15 @@ static void blkcg_css_offline(struct cgroup_subsys_state 
*css)
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
struct blkcg *blkcg = css_to_blkcg(css);
+   int i;
 
mutex_lock(_pol_mutex);
list_del(>all_blkcgs_node);
mutex_unlock(_pol_mutex);
 
-   if (blkcg != _root) {
-   int i;
-
-   for (i = 0; i < BLKCG_MAX_POLS; i++)
-   kfree(blkcg->pd[i]);
-   kfree(blkcg);
-   }
+   for (i = 0; i < BLKCG_MAX_POLS; i++)
+   kfree(blkcg->pd[i]);
+   kfree(blkcg);
 }
 
 static struct cgroup_subsys_state *
@@ -844,13 +841,12 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 
if (!parent_css) {
blkcg = _root;
-   goto done;
-   }
-
-   blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
-   if (!blkcg) {
-   ret = ERR_PTR(-ENOMEM);
-   goto free_blkcg;
+   } else {
+   blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
+   if (!blkcg) {
+   ret = ERR_PTR(-ENOMEM);
+   goto free_blkcg;
+   }
}
 
for (i = 0; i < BLKCG_MAX_POLS ; i++) {
@@ -877,7 +873,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
pol->cpd_init_fn(blkcg);
}
 
-done:
spin_lock_init(>lock);
INIT_RADIX_TREE(>blkg_tree, GFP_NOWAIT);
INIT_HLIST_HEAD(>blkg_list);
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 10/11] blkcg: cosmetic updates about blkcg_policy_data

2015-07-11 Thread Tejun Heo
* Rename blkcg->pd[] to blkcg->cpd[] so that cpd is consistently used
  for blkcg_policy_data.

* Make blkcg_policy->cpd_init_fn() take blkcg_policy_data instead of
  blkcg.  This makes it consistent with blkg_policy_data methods and
  to-be-added cpd alloc/free methods.

* blkcg_policy_data->blkcg and cpd_to_blkcg() added so that
  cpd_init_fn() can determine the associated blkcg from
  blkcg_policy_data.

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
Cc: Arianna Avanzini 
---
 block/blk-cgroup.c | 22 +++---
 block/cfq-iosched.c| 11 +--
 include/linux/blk-cgroup.h | 14 ++
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d18cdb6..8173e06 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -817,7 +817,7 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
mutex_unlock(_pol_mutex);
 
for (i = 0; i < BLKCG_MAX_POLS; i++)
-   kfree(blkcg->pd[i]);
+   kfree(blkcg->cpd[i]);
kfree(blkcg);
 }
 
@@ -853,15 +853,15 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
if (!pol || !pol->cpd_size)
continue;
 
-   BUG_ON(blkcg->pd[i]);
+   BUG_ON(blkcg->cpd[i]);
cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
if (!cpd) {
ret = ERR_PTR(-ENOMEM);
goto free_pd_blkcg;
}
-   blkcg->pd[i] = cpd;
+   blkcg->cpd[i] = cpd;
cpd->plid = i;
-   pol->cpd_init_fn(blkcg);
+   pol->cpd_init_fn(cpd);
}
 
spin_lock_init(>lock);
@@ -877,7 +877,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 
 free_pd_blkcg:
for (i--; i >= 0; i--)
-   kfree(blkcg->pd[i]);
+   kfree(blkcg->cpd[i]);
 free_blkcg:
kfree(blkcg);
mutex_unlock(_pol_mutex);
@@ -1164,9 +1164,9 @@ int blkcg_policy_register(struct blkcg_policy *pol)
goto err_free_cpds;
}
 
-   blkcg->pd[pol->plid] = cpd;
+   blkcg->cpd[pol->plid] = cpd;
cpd->plid = pol->plid;
-   pol->cpd_init_fn(blkcg);
+   pol->cpd_init_fn(cpd);
}
}
 
@@ -1182,8 +1182,8 @@ int blkcg_policy_register(struct blkcg_policy *pol)
 err_free_cpds:
if (pol->cpd_size) {
list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) {
-   kfree(blkcg->pd[pol->plid]);
-   blkcg->pd[pol->plid] = NULL;
+   kfree(blkcg->cpd[pol->plid]);
+   blkcg->cpd[pol->plid] = NULL;
}
}
blkcg_policy[pol->plid] = NULL;
@@ -1218,8 +1218,8 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
 
if (pol->cpd_size) {
list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) {
-   kfree(blkcg->pd[pol->plid]);
-   blkcg->pd[pol->plid] = NULL;
+   kfree(blkcg->cpd[pol->plid]);
+   blkcg->cpd[pol->plid] = NULL;
}
}
blkcg_policy[pol->plid] = NULL;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 95e6b0c..dd6ea9e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -220,7 +220,7 @@ struct cfqg_stats {
 /* Per-cgroup data */
 struct cfq_group_data {
/* must be the first member */
-   struct blkcg_policy_data pd;
+   struct blkcg_policy_data cpd;
 
unsigned int weight;
unsigned int leaf_weight;
@@ -612,7 +612,7 @@ static inline struct cfq_group *pd_to_cfqg(struct 
blkg_policy_data *pd)
 static struct cfq_group_data
 *cpd_to_cfqgd(struct blkcg_policy_data *cpd)
 {
-   return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL;
+   return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL;
 }
 
 static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
@@ -1568,12 +1568,11 @@ static void cfqg_stats_init(struct cfqg_stats *stats)
 #endif
 }
 
-static void cfq_cpd_init(const struct blkcg *blkcg)
+static void cfq_cpd_init(struct blkcg_policy_data *cpd)
 {
-   struct cfq_group_data *cgd =
-   cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]);
+   struct cfq_group_data *cgd = cpd_to_cfqgd(cpd);
 
-   if (blkcg == _root) {
+   if (cpd_to_blkcg(cpd) == _root) {
cgd->weight = 2 * CFQ_WEIGHT_DEFAULT;
cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
} else {
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index ddd4b8b..7988d47 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -45,7 +45,7 @@ struct blkcg {
struct blkcg_gq *blkg_hint;
struct 

[PATCH 01/11] blkcg: remove unnecessary request_list->blkg NULL test in blk_put_rl()

2015-07-11 Thread Tejun Heo
Since ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root
blkcg"), a request_list always has its blkg associated.  Drop
unnecessary rl->blkg NULL test from blk_put_rl().

Signed-off-by: Tejun Heo 
Cc: Vivek Goyal 
---
 include/linux/blk-cgroup.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1b62d76..9711fc2 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -394,8 +394,7 @@ static inline struct request_list *blk_get_rl(struct 
request_queue *q,
  */
 static inline void blk_put_rl(struct request_list *rl)
 {
-   /* root_rl may not have blkg set */
-   if (rl->blkg && rl->blkg->blkcg != _root)
+   if (rl->blkg->blkcg != _root)
blkg_put(rl->blkg);
 }
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] iio: Drop owner assignment from i2c_driver

2015-07-11 Thread Jonathan Cameron
On 10/07/15 06:54, Krzysztof Kozlowski wrote:
> i2c_driver does not need to set an owner because i2c_register_driver()
> will set it.
> 
> Signed-off-by: Krzysztof Kozlowski 
> 
Applied to the togreg branch of iio.git

Thanks,

Jonathan
> ---
> 
> The coccinelle script which generated the patch was sent here:
> http://www.spinics.net/lists/kernel/msg2029903.html
> ---
>  drivers/iio/accel/bma180.c | 1 -
>  drivers/iio/accel/st_accel_i2c.c   | 1 -
>  drivers/iio/adc/mcp3422.c  | 1 -
>  drivers/iio/adc/ti-adc081c.c   | 1 -
>  drivers/iio/dac/ad5064.c   | 1 -
>  drivers/iio/dac/ad5380.c   | 1 -
>  drivers/iio/dac/ad5446.c   | 1 -
>  drivers/iio/dac/max5821.c  | 1 -
>  drivers/iio/gyro/itg3200_core.c| 1 -
>  drivers/iio/gyro/st_gyro_i2c.c | 1 -
>  drivers/iio/humidity/si7005.c  | 1 -
>  drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 1 -
>  drivers/iio/light/apds9300.c   | 1 -
>  drivers/iio/light/bh1750.c | 1 -
>  drivers/iio/light/cm32181.c| 1 -
>  drivers/iio/light/cm3232.c | 1 -
>  drivers/iio/light/cm36651.c| 1 -
>  drivers/iio/light/gp2ap020a00f.c   | 1 -
>  drivers/iio/light/isl29125.c   | 1 -
>  drivers/iio/light/jsa1212.c| 1 -
>  drivers/iio/light/ltr501.c | 1 -
>  drivers/iio/light/tcs3414.c| 1 -
>  drivers/iio/light/tcs3472.c| 1 -
>  drivers/iio/light/tsl4531.c| 1 -
>  drivers/iio/light/vcnl4000.c   | 1 -
>  drivers/iio/magnetometer/st_magn_i2c.c | 1 -
>  drivers/iio/pressure/ms5611_i2c.c  | 1 -
>  drivers/iio/pressure/st_pressure_i2c.c | 1 -
>  drivers/iio/temperature/mlx90614.c | 1 -
>  drivers/iio/temperature/tmp006.c   | 1 -
>  30 files changed, 30 deletions(-)
> 
> diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c
> index 75c6d2103e07..f04b88406995 100644
> --- a/drivers/iio/accel/bma180.c
> +++ b/drivers/iio/accel/bma180.c
> @@ -846,7 +846,6 @@ MODULE_DEVICE_TABLE(i2c, bma180_ids);
>  static struct i2c_driver bma180_driver = {
>   .driver = {
>   .name   = "bma180",
> - .owner  = THIS_MODULE,
>   .pm = BMA180_PM_OPS,
>   },
>   .probe  = bma180_probe,
> diff --git a/drivers/iio/accel/st_accel_i2c.c 
> b/drivers/iio/accel/st_accel_i2c.c
> index d4ad72ca4a3d..a2f1c20319eb 100644
> --- a/drivers/iio/accel/st_accel_i2c.c
> +++ b/drivers/iio/accel/st_accel_i2c.c
> @@ -122,7 +122,6 @@ MODULE_DEVICE_TABLE(i2c, st_accel_id_table);
>  
>  static struct i2c_driver st_accel_driver = {
>   .driver = {
> - .owner = THIS_MODULE,
>   .name = "st-accel-i2c",
>   .of_match_table = of_match_ptr(st_accel_of_match),
>   },
> diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c
> index b96c636470ef..3555122008b4 100644
> --- a/drivers/iio/adc/mcp3422.c
> +++ b/drivers/iio/adc/mcp3422.c
> @@ -404,7 +404,6 @@ MODULE_DEVICE_TABLE(of, mcp3422_of_match);
>  static struct i2c_driver mcp3422_driver = {
>   .driver = {
>   .name = "mcp3422",
> - .owner = THIS_MODULE,
>   .of_match_table = of_match_ptr(mcp3422_of_match),
>   },
>   .probe = mcp3422_probe,
> diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c
> index b3a82b4d1a75..2c8374f86252 100644
> --- a/drivers/iio/adc/ti-adc081c.c
> +++ b/drivers/iio/adc/ti-adc081c.c
> @@ -140,7 +140,6 @@ MODULE_DEVICE_TABLE(of, adc081c_of_match);
>  static struct i2c_driver adc081c_driver = {
>   .driver = {
>   .name = "adc081c",
> - .owner = THIS_MODULE,
>   .of_match_table = of_match_ptr(adc081c_of_match),
>   },
>   .probe = adc081c_probe,
> diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
> index f03b92fd3803..c067e6821496 100644
> --- a/drivers/iio/dac/ad5064.c
> +++ b/drivers/iio/dac/ad5064.c
> @@ -630,7 +630,6 @@ MODULE_DEVICE_TABLE(i2c, ad5064_i2c_ids);
>  static struct i2c_driver ad5064_i2c_driver = {
>   .driver = {
>  .name = "ad5064",
> -.owner = THIS_MODULE,
>   },
>   .probe = ad5064_i2c_probe,
>   .remove = ad5064_i2c_remove,
> diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
> index 9de4c4d38280..130de9b3e0bf 100644
> --- a/drivers/iio/dac/ad5380.c
> +++ b/drivers/iio/dac/ad5380.c
> @@ -593,7 +593,6 @@ MODULE_DEVICE_TABLE(i2c, ad5380_i2c_ids);
>  static struct i2c_driver ad5380_i2c_driver = {
>   .driver = {
>  .name = "ad5380",
> -.owner = THIS_MODULE,
>   },
>   .probe = ad5380_i2c_probe,
>   .remove = ad5380_i2c_remove,
> diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
> index 46bb62a5c1d4..07e17d72a3f3 100644

Re: [PATCH 1/3] staging: iio: Drop owner assignment from i2c_driver

2015-07-11 Thread Jonathan Cameron
On 10/07/15 07:34, Krzysztof Kozlowski wrote:
> i2c_driver does not need to set an owner because i2c_register_driver()
> will set it.
> 
> Signed-off-by: Krzysztof Kozlowski 
Applied to the togreg branch of iio.git

Thanks,

Jonathan
> 
> ---
> 
> The coccinelle script which generated the patch was sent here:
> http://www.spinics.net/lists/kernel/msg2029903.html
> ---
>  drivers/staging/iio/addac/adt7316-i2c.c | 1 -
>  drivers/staging/iio/light/isl29018.c| 1 -
>  drivers/staging/iio/light/isl29028.c| 1 -
>  3 files changed, 3 deletions(-)
> 
> diff --git a/drivers/staging/iio/addac/adt7316-i2c.c 
> b/drivers/staging/iio/addac/adt7316-i2c.c
> index 75ddd4f801a3..78fe0b557280 100644
> --- a/drivers/staging/iio/addac/adt7316-i2c.c
> +++ b/drivers/staging/iio/addac/adt7316-i2c.c
> @@ -124,7 +124,6 @@ static struct i2c_driver adt7316_driver = {
>   .driver = {
>   .name = "adt7316",
>   .pm = ADT7316_PM_OPS,
> - .owner  = THIS_MODULE,
>   },
>   .probe = adt7316_i2c_probe,
>   .id_table = adt7316_i2c_id,
> diff --git a/drivers/staging/iio/light/isl29018.c 
> b/drivers/staging/iio/light/isl29018.c
> index e646c5d24004..019ba5245c23 100644
> --- a/drivers/staging/iio/light/isl29018.c
> +++ b/drivers/staging/iio/light/isl29018.c
> @@ -838,7 +838,6 @@ static struct i2c_driver isl29018_driver = {
>   .name = "isl29018",
>   .acpi_match_table = ACPI_PTR(isl29018_acpi_match),
>   .pm = ISL29018_PM_OPS,
> - .owner = THIS_MODULE,
>   .of_match_table = isl29018_of_match,
>   },
>   .probe   = isl29018_probe,
> diff --git a/drivers/staging/iio/light/isl29028.c 
> b/drivers/staging/iio/light/isl29028.c
> index e5b2fdc2334b..cd6f2727aa58 100644
> --- a/drivers/staging/iio/light/isl29028.c
> +++ b/drivers/staging/iio/light/isl29028.c
> @@ -547,7 +547,6 @@ static struct i2c_driver isl29028_driver = {
>   .class  = I2C_CLASS_HWMON,
>   .driver  = {
>   .name = "isl29028",
> - .owner = THIS_MODULE,
>   .of_match_table = isl29028_of_match,
>   },
>   .probe   = isl29028_probe,
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Staging: iio: dummy: Fix blank line warnings

2015-07-11 Thread Jonathan Cameron
On 10/07/15 15:10, Cristina Opriceana wrote:
> Multiple blank lines should not be used as indicated by checkpatch.pl.
> Also, a line should be used after a function/structure declaration.
> 
> Signed-off-by: Cristina Opriceana 
Applied to the togreg branch of iio.git

Thanks,

Jonathan
> ---
>  drivers/staging/iio/iio_dummy_evgen.c | 1 +
>  drivers/staging/iio/iio_simple_dummy.c| 2 --
>  drivers/staging/iio/iio_simple_dummy.h| 1 +
>  drivers/staging/iio/iio_simple_dummy_buffer.c | 2 +-
>  4 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/staging/iio/iio_dummy_evgen.c 
> b/drivers/staging/iio/iio_dummy_evgen.c
> index c54d5b5..6d38854 100644
> --- a/drivers/staging/iio/iio_dummy_evgen.c
> +++ b/drivers/staging/iio/iio_dummy_evgen.c
> @@ -214,6 +214,7 @@ static struct device iio_evgen_dev = {
>   .groups = iio_evgen_groups,
>   .release = _evgen_release,
>  };
> +
>  static __init int iio_dummy_evgen_init(void)
>  {
>   int ret = iio_dummy_evgen_create();
> diff --git a/drivers/staging/iio/iio_simple_dummy.c 
> b/drivers/staging/iio/iio_simple_dummy.c
> index 1629a8a..381f90f 100644
> --- a/drivers/staging/iio/iio_simple_dummy.c
> +++ b/drivers/staging/iio/iio_simple_dummy.c
> @@ -611,7 +611,6 @@ static int iio_dummy_probe(int index)
>*/
>   iio_dummy_devs[index] = indio_dev;
>  
> -
>   /*
>* Set the device name.
>*
> @@ -675,7 +674,6 @@ static void iio_dummy_remove(int index)
>*/
>   struct iio_dev *indio_dev = iio_dummy_devs[index];
>  
> -
>   /* Unregister the device */
>   iio_device_unregister(indio_dev);
>  
> diff --git a/drivers/staging/iio/iio_simple_dummy.h 
> b/drivers/staging/iio/iio_simple_dummy.h
> index e877a99..8d00224 100644
> --- a/drivers/staging/iio/iio_simple_dummy.h
> +++ b/drivers/staging/iio/iio_simple_dummy.h
> @@ -119,6 +119,7 @@ static inline int 
> iio_simple_dummy_configure_buffer(struct iio_dev *indio_dev)
>  {
>   return 0;
>  };
> +
>  static inline
>  void iio_simple_dummy_unconfigure_buffer(struct iio_dev *indio_dev)
>  {};
> diff --git a/drivers/staging/iio/iio_simple_dummy_buffer.c 
> b/drivers/staging/iio/iio_simple_dummy_buffer.c
> index a651b89..00ed774 100644
> --- a/drivers/staging/iio/iio_simple_dummy_buffer.c
> +++ b/drivers/staging/iio/iio_simple_dummy_buffer.c
> @@ -32,6 +32,7 @@ static const s16 fakedata[] = {
>   [diffvoltage3m4] = -2,
>   [accelx] = 344,
>  };
> +
>  /**
>   * iio_simple_dummy_trigger_h() - the trigger handler function
>   * @irq: the interrupt number
> @@ -178,7 +179,6 @@ error_free_buffer:
>   iio_kfifo_free(indio_dev->buffer);
>  error_ret:
>   return ret;
> -
>  }
>  
>  /**
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/8] i2c: core: Add support for best effort block read emulation

2015-07-11 Thread Jonathan Cameron
On 10/07/15 18:14, Tirdea, Irina wrote:
> 
> 
>> -Original Message-
>> From: Jonathan Cameron [mailto:ji...@kernel.org]
>> Sent: 05 July, 2015 14:59
>> To: Tirdea, Irina; Wolfram Sang; linux-...@vger.kernel.org; 
>> linux-...@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org; Pandruvada, Srinivas; Peter Meerwald
>> Subject: Re: [PATCH v3 1/8] i2c: core: Add support for best effort block 
>> read emulation
>>
>> On 03/07/15 10:33, Irina Tirdea wrote:
>>> There are devices that need to handle block transactions
>>> regardless of the capabilities exported by the adapter.
>>> For performance reasons, they need to use i2c read blocks
>>> if available, otherwise emulate the block transaction with word
>>> or byte transactions.
>>>
>>> Add support for a helper function that would read a data block
>>> using the best transfer available: I2C_FUNC_SMBUS_READ_I2C_BLOCK,
>>> I2C_FUNC_SMBUS_READ_WORD_DATA or I2C_FUNC_SMBUS_READ_BYTE_DATA.
>>>
>>> Signed-off-by: Irina Tirdea 
>> Looks good to me - I vaguely wondered if it would make sense to use
>> an endian conversion in the word case, but as we have possible odd
>> numbers of bytes that gets fiddly.
>>
> 
> Thanks for the review, Jonathan!
> 
>> I wonder what devices do if you do a word read beyond their end address?
>> Perhaps in odd cases we should always fall back to byte reads?
> 
> In my tests I can read beyond the end address, but I cannot be sure if this 
> is OK for all
> devices. This was actually a suggestion from Wolfram for v1, but maybe I'm 
> missing
> something.
> 
> Wolfram, is it safe to read one byte beyond the end address or should I 
> better use
> only byte reads for odd lengths?
> 
>>
>>> ---
>>>  drivers/i2c/i2c-core.c | 60 
>>> ++
>>>  include/linux/i2c.h|  3 +++
>>>  2 files changed, 63 insertions(+)
>>>
>>> diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
>>> index 96771ea..55a3455 100644
>>> --- a/drivers/i2c/i2c-core.c
>>> +++ b/drivers/i2c/i2c-core.c
>>> @@ -2914,6 +2914,66 @@ trace:
>>>  }
>>>  EXPORT_SYMBOL(i2c_smbus_xfer);
>>>
>>> +/**
>>> + * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
>>> + * @client: Handle to slave device
>>> + * @command: Byte interpreted by slave
>>> + * @length: Size of data block; SMBus allows at most 32 bytes
>>> + * @values: Byte array into which data will be read; big enough to hold
>>> + * the data returned by the slave.  SMBus allows at most 32 bytes.
>>> + *
>>> + * This executes the SMBus "block read" protocol if supported by the 
>>> adapter.
>>> + * If block read is not supported, it emulates it using either word or byte
>>> + * read protocols depending on availability.
>>> + *
>>> + * Before using this function you must double-check if the I2C slave does
>>> + * support exchanging a block transfer with a byte transfer.
>> Add something here about addressing assumptions.  You get odd devices which
>> will give bulk reads of addresses not mapped to a nice linear region when
>> you do byte reads.
> 
> OK, I'll add this to the comment above:
> "The addresses of the I2C slave device that are accessed with this function
>  must be mapped to a linear region, so that a block read will have the same
>  effect as a byte read."
> 
Works for me.
> Thanks,
> Irina
> 
>>> + */
>>> +s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client 
>>> *client,
>>> + u8 command, u8 length, u8 *values)
>>> +{
>>> +   u8 i;
>>> +   int status;
>>> +
>>> +   if (length > I2C_SMBUS_BLOCK_MAX)
>>> +   length = I2C_SMBUS_BLOCK_MAX;
>>> +
>>> +   if (i2c_check_functionality(client->adapter,
>>> +   I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
>>> +   return i2c_smbus_read_i2c_block_data(client, command,
>>> +length, values);
>>> +   } else if (i2c_check_functionality(client->adapter,
>>> +  I2C_FUNC_SMBUS_READ_WORD_DATA)) {
>>> +   for (i = 0; i < length; i += 2) {
>>> +   status = i2c_smbus_read_word_data(client, command + i);
>>> +   if (status < 0)
>>> +   return status;
>>> +   values[i] = status & 0xff;
>>> +   if ((i + 1) < length)
>>> +   values[i + 1] = status >> 8;
>>> +   }
>>> +   if (i > length)
>>> +   return length;
>>> +   return i;
>>> +   } else if (i2c_check_functionality(client->adapter,
>>> +  I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
>>> +   for (i = 0; i < length; i++) {
>>> +   status = i2c_smbus_read_byte_data(client, command + i);
>>> +   if (status < 0)
>>> +   return status;
>>> +   values[i] = status;
>>> +   }
>>> +   return i;
>>> +   }
>>> +
>>> +   

Re: [PATCH v2 2/2] ARM: dts: vfxxx: Add property for minimum sample time

2015-07-11 Thread Jonathan Cameron
On 10/07/15 19:06, maitysancha...@gmail.com wrote:
> Hello Shawn,
> 
> On 15-07-10 16:53:24, Shawn Guo wrote:
>> On Wed, Jun 24, 2015 at 02:03:41PM +0530, Sanchayan Maity wrote:
>>> Add a device tree property which allows to specify the minimum sample
>>> time which can be used to calculate the actual ADC cycles required
>>> depending on the hardware.
>>>
>>> Signed-off-by: Sanchayan Maity 
>>> ---
>>>  arch/arm/boot/dts/vfxxx.dtsi | 2 ++
>>>  1 file changed, 2 insertions(+)
>>>
>>> diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
>>> index 90a03d5..71d9c08 100644
>>> --- a/arch/arm/boot/dts/vfxxx.dtsi
>>> +++ b/arch/arm/boot/dts/vfxxx.dtsi
>>> @@ -229,6 +229,7 @@
>>> status = "disabled";
>>> fsl,adck-max-frequency = <3000>, <4000>,
>>> <2000>;
>>> +   min-sample-time = <1000>;
>>> };
>>>  
>>> wdoga5: wdog@4003e000 {
>>> @@ -447,6 +448,7 @@
>>> status = "disabled";
>>> fsl,adck-max-frequency = <3000>, <4000>,
>>> <2000>;
>>> +   min-sample-time = <1000>;
>>
>> Can we code 1000 as the default in kernel driver, so that only boards
>> requiring different value need to have this property?  Doing so makes
>> the property optional rather than required.
>>
> 
> Not sure if hardcoding it in the driver is the right approach.
If it is a true feature of the device (i.e. if in the case of perfect
front end electronics) this is the right option, then a default makes
a lot of sense.  If that isn't the case (I suspect not) then if we
drop it be optional chances are no one will bother thinking about it
or trying to tune this at all.

Hence seems wrong to put a fairly arbitrary default value on it.
However, we do need to still work with old device trees and new kernels
so need to cope without it.

Hence to my mind, if we had started out with this in the first driver
version, then the default would be a bad idea.  As we didn't then we
really need to cope with nothing specified (as best we can) and so
we do need a sensible default (or perhaps even sensible worst
case default) in there. 
> 
> However if the maintainers and others agree on doing this, I will do
> the necessary change.
> 
> Thanks.
> 
> Regards,
> Sanchayan.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] iio: adc: rockchip_saradc: add missing MODULE_* data

2015-07-11 Thread Jonathan Cameron
On 08/07/15 15:17, Heiko Stuebner wrote:
> The module-data is currently missing. This includes the license-information
> which makes the driver taint the kernel and miss symbols when compiled as
> module.
> 
> Fixes: 44d6f2ef94f9 ("iio: adc: add driver for Rockchip saradc")
> Signed-off-by: Heiko Stuebner 
Sorry Heiko,

Not entirely sure why I haven't picked this up before.

Anyhow, now applied to the fixes-for-4.2 branch of iio.git
and marked for stable.  I need to catch up with a bit of a
backlog, but should get a pull request out to Greg sometime
early next week.

Jonathan
> ---
>  drivers/iio/adc/rockchip_saradc.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/iio/adc/rockchip_saradc.c 
> b/drivers/iio/adc/rockchip_saradc.c
> index 8d4e019..9c311c1 100644
> --- a/drivers/iio/adc/rockchip_saradc.c
> +++ b/drivers/iio/adc/rockchip_saradc.c
> @@ -349,3 +349,7 @@ static struct platform_driver rockchip_saradc_driver = {
>  };
>  
>  module_platform_driver(rockchip_saradc_driver);
> +
> +MODULE_AUTHOR("Heiko Stuebner ");
> +MODULE_DESCRIPTION("Rockchip SARADC driver");
> +MODULE_LICENSE("GPL v2");
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] iio: frequency: adf4350: Delete blank line

2015-07-11 Thread Jonathan Cameron
On 08/07/15 15:04, Ana Calinov wrote:
> This patch removes an unnecessary blank line
> found by checkpatch.pl --strict:
> Blank lines aren't necessary after an open brace '{'.
> 
> Signed-off-by: Ana Calinov 
Applied to the togreg branch of iio.git. Initially pushed
out as testing for the autobuilders to play with it.

Thanks,

Jonathan
> ---
>  drivers/iio/frequency/adf4350.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c
> index 10a0dfc..9890c81 100644
> --- a/drivers/iio/frequency/adf4350.c
> +++ b/drivers/iio/frequency/adf4350.c
> @@ -72,7 +72,6 @@ static int adf4350_sync_config(struct adf4350_state *st)
>   for (i = ADF4350_REG5; i >= ADF4350_REG0; i--) {
>   if ((st->regs_hw[i] != st->regs[i]) ||
>   ((i == ADF4350_REG0) && doublebuf)) {
> -
>   switch (i) {
>   case ADF4350_REG1:
>   case ADF4350_REG4:
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] iio: accel: kxcjk-1013: Remove blank lines

2015-07-11 Thread Jonathan Cameron
On 08/07/15 13:56, Daniel Baluta wrote:
> On Wed, Jul 8, 2015 at 3:44 PM, Ana Calinov  wrote:
>> This patch fixes the the following errors given by
>> checkpatch.pl with --strict:
>> Please don't use multiple blank lines.
>> Blank lines aren't necessary after an open brace '{'.
>>
>> Signed-off-by: Ana Calinov 
> 
> 
> Looks good to me.
> 
> Reviewed-by: Daniel Baluta 
> 
> Thanks Ana!
> 
> Daniel.
> 
Applied to the togreg branch of iio.git.  Will push out
as testing for the autobuilders to play with it.

A sensible checkpatch based cleanup. Thanks

Jonathan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] sysctl: fix int -> unsigned long assignments in INT_MIN case

2015-07-11 Thread Ilya Dryomov
The following

if (val < 0)
*lvalp = (unsigned long)-val;

is incorrect because the compiler is free to assume -val to be positive
and use a sign-extend instruction for extending the bit pattern.  This
is a problem if val == INT_MIN:

# echo -2147483648 >/proc/sys/dev/scsi/logging_level
# cat /proc/sys/dev/scsi/logging_level
-18446744071562067968

Cast to unsigned long before negation - that way we first sign-extend
and then negate an unsigned, which is well defined.  With this:

# cat /proc/sys/dev/scsi/logging_level
-2147483648

Signed-off-by: Ilya Dryomov 
---
 kernel/sysctl.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 19b62b522158..0b4092b9fa00 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1995,7 +1995,7 @@ static int do_proc_dointvec_conv(bool *negp, unsigned 
long *lvalp,
int val = *valp;
if (val < 0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = -(unsigned long)val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2201,7 +2201,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, 
unsigned long *lvalp,
int val = *valp;
if (val < 0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = -(unsigned long)val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2436,7 +2436,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2459,7 +2459,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(bool 
*negp, unsigned long *lvalp
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2484,7 +2484,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = -(unsigned long)val;
} else {
*negp = false;
lval = (unsigned long)val;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 04/45] clk: efm32gg: Remove clk.h include

2015-07-11 Thread Uwe Kleine-König
On Fri, Jul 10, 2015 at 04:33:02PM -0700, Stephen Boyd wrote:
> Clock provider drivers generally shouldn't include clk.h because
> it's the consumer API. Remove the include here because this is a
> provider driver.
> 
> Cc: Uwe Kleine-König 
> Signed-off-by: Stephen Boyd 
Still compiles with your change, so fine.

Acked-by: Uwe Kleine-König 

Best regards
Uwe

-- 
Pengutronix e.K.   | Uwe Kleine-König|
Industrial Linux Solutions | http://www.pengutronix.de/  |
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] drm/atomic: fix null dereference

2015-07-11 Thread Sudip Mukherjee
We are checking the size of e->event but we were doing it when e is
known to be NULL.

Signed-off-by: Sudip Mukherjee 
---
 drivers/gpu/drm/drm_atomic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index acebd16..51d3a85 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1311,7 +1311,6 @@ static struct drm_pending_vblank_event 
*create_vblank_event(
e = kzalloc(sizeof *e, GFP_KERNEL);
if (e == NULL) {
spin_lock_irqsave(>event_lock, flags);
-   file_priv->event_space += sizeof e->event;
spin_unlock_irqrestore(>event_lock, flags);
goto out;
}
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sysctl: Fix conversion of INT_MIN for LP64 systems

2015-07-11 Thread Ilya Dryomov
On Sat, Jul 11, 2015 at 10:35 AM, Robert Xiao  wrote:
> On LP64 systems, reading a sysctl file containing an INT_MIN (-2147483648)
> could incorrectly show -18446744071562067968 due to an incorrect conversion
> in do_proc_dointvec_conv. This patch fixes the edge case by converting to
> unsigned int first to avoid sign extending INT_MIN to unsigned long.
>
> Test:
>
> root:/proc/sys/kernel# echo -2147483648 0 0 0 > printk
> root:/proc/sys/kernel# cat printk
>
> Without patch, produces -18446744071562067968 0 0 0.
> With patch, should produce -2147483648 0 0 0.
>
> Signed-off-by: Robert Xiao 
> ---
>  kernel/sysctl.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 19b62b5..464df36 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1995,10 +1995,10 @@ static int do_proc_dointvec_conv(bool *negp, unsigned 
> long *lvalp,
> int val = *valp;
> if (val < 0) {
> *negp = true;
> -   *lvalp = (unsigned long)-val;
> +   *lvalp = (unsigned int)-val;
> } else {
> *negp = false;
> -   *lvalp = (unsigned long)val;
> +   *lvalp = (unsigned int)val;
> }
> }
> return 0;

I don't know why am I CC'ed on this - CC'ing Andrew along with Eric and
Kees who seem to have worked directly on sysctl.c not too long ago.

That said, I took a look at this and I think this patch is wrong.
Casting to unsigned int instead of unsigned long *after* the negation
is bogus, because we have

if (val < 0)
...
*lvalp = (unsigned long)-val;

and the compiler is free to assume -val to be positive and use the
sign-extend instruction.  On gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7)
that I have here the cast to unsigned int works only with -O1, with -O2
it goes to town and uses cltq which sign-extends:

neg%eax
movb   $0x1,(%rdi)
cltq

IMO the right way to do this would be to first cast to unsigned long
and then negate - that way we will first sign-extend and then negate an
unsigned, which is well defined.  Also, this needs to be done not just
for do_proc_dointvec_conv(), but for do_proc_dointvec_minmax_conv() and
jiffies functions as well (although it's probably virtually impossible
to set val to exactly INT_MIN through jiffies write branches).

Speaking of write branches, only do_proc_dointvec_conv() does check
it's input properly, so that's something to look at.

Thanks,

Ilya
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: PCI devices (buses?) and 3GB of RAM lost with 4.2rc1

2015-07-11 Thread Stefan Seyfried
Am 08.07.2015 um 22:09 schrieb Stefan Seyfried:
> this is on a Thinkpad X200s, 5 years old and working fine, until 4.2rc1
> came along.
> 
> With that booted, I do not have a WiFi card anymore, it doesn't even
> appear in "lspci" output.

> From diffing the dmesg's, it also looks like I lost some of my RAM:
> 
> -Memory: 8050048K/8280176K available (6401K kernel code, 980K rwdata,
> 4864K rodata, 1532K init, 1516K bss, 230128K reserved, 0K cma-reserved)
> +Memory: 5104620K/8280176K available (6823K kernel code, 1096K rwdata,
> 3220K rodata, 1556K init, 1520K bss, 227792K reserved, 0K cma-reserved)

This was only a one-off thing, it looks like the hardware was confused
when first booting 4.2-rc1
(I found out when I wanted to bisect it, all the kernels I built did
just work, and then I finally booted the distro-kernel again and it also
worked :-)

So everything is fine, sorry for the noise.
-- 
-- 
Stefan Seyfried
Linux Consultant & Developer
Mail: seyfr...@b1-systems.de GPG Key: 0x731B665B

B1 Systems GmbH
Osterfeldstraße 7 / 85088 Vohburg / http://www.b1-systems.de
GF: Ralph Dehner / Unternehmenssitz: Vohburg / AG: Ingolstadt,HRB 3537
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net: switchdev: don't abort unsupported operations

2015-07-11 Thread Scott Feldman
On Fri, Jul 10, 2015 at 4:48 PM, Vivien Didelot
 wrote:
> There is no need to abort attribute setting or object addition, if the
> prepare phase returned operation not supported.
>
> Thus, abort these two transactions only if the error is not -EOPNOTSUPP.
>
> Signed-off-by: Vivien Didelot 

Acked-by: Scott Feldman 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: ipw2100: fix timeout bug - always evaluated to 0

2015-07-11 Thread Kalle Valo

> commit 2c86c275015c ("Add ipw2100 wireless driver.") introduced 
> HW_PHY_OFF_LOOP_DELAY (HZ / 5000) which always evaluated to 0. Clarified
> by Stanislav Yakovlev  that it should be 50
> milliseconds thus fixed up to msecs_to_jiffies(50).
> 
> Signed-off-by: Nicholas Mc Guire 
> Acked-by: Stanislav Yakovlev 

Thanks, applied to wireless-drivers-next.git.

Kalle Valo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] hwmon: (nct7802) Add autopoint attributes

2015-07-11 Thread Constantine Shulyupin
Introduced pwm[1..3]_auto_point[1..5]_temp, pwm[1..3]_auto_point[1..5]_pwm,
nct7802_auto_point_attrs, nct7802_auto_point_group.
nct7802_auto_point_is_visible,

---

Changed in v2:
- Removed PWM_REG, TEMP_REG
- auto_point[1..4]_temp, auto_point[1..4]_pwm and auto_point_crit_temp
expanded and replaced with pwm[1..3]_auto_point[1..5]_temp
and pwm[1..3]_auto_point[1..5]_pwm.
- Introduced nct7802_auto_point_is_visible
- added used sysfs_update_group in store_pwm_enable

Signed-off-by: Constantine Shulyupin 
---
 drivers/hwmon/nct7802.c | 179 +++-
 1 file changed, 162 insertions(+), 17 deletions(-)

diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c
index 2f6bbe5..e1bc7a6 100644
--- a/drivers/hwmon/nct7802.c
+++ b/drivers/hwmon/nct7802.c
@@ -130,6 +130,9 @@ static ssize_t show_pwm(struct device *dev, struct 
device_attribute *devattr,
unsigned int val;
int ret;
 
+   if (!attr->index)
+   return sprintf(buf, "255\n");
+
ret = regmap_read(data->regmap, attr->index, );
if (ret < 0)
return ret;
@@ -170,23 +173,7 @@ static ssize_t show_pwm_enable(struct device *dev,
 
 static ssize_t store_pwm_enable(struct device *dev,
struct device_attribute *attr,
-   const char *buf, size_t count)
-{
-   struct nct7802_data *data = dev_get_drvdata(dev);
-   struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
-   u8 val;
-   int ret;
-
-   ret = kstrtou8(buf, 0, );
-   if (ret < 0)
-   return ret;
-   if (val < 1 || val > 2)
-   return -EINVAL;
-   ret = regmap_update_bits(data->regmap, REG_SMARTFAN_EN(sattr->index),
-1 << SMARTFAN_EN_SHIFT(sattr->index),
-(val - 1) << SMARTFAN_EN_SHIFT(sattr->index));
-   return ret ? : count;
-}
+   const char *buf, size_t count);
 
 static int nct7802_read_temp(struct nct7802_data *data,
 u8 reg_temp, u8 reg_temp_low, int *temp)
@@ -890,11 +877,169 @@ static struct attribute_group nct7802_pwm_group = {
.attrs = nct7802_pwm_attrs,
 };
 
+/* 7.2.115... 0x80-0x83, 0x84 Temperature (X-axis) transition */
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x80, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x81, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x82, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x83, 0);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_point5_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x84, 0);
+
+/* 7.2.120... 0x85-0x88 PWM (Y-axis) transition */
+static SENSOR_DEVICE_ATTR(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x85);
+static SENSOR_DEVICE_ATTR(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x86);
+static SENSOR_DEVICE_ATTR(pwm1_auto_point3_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x87);
+static SENSOR_DEVICE_ATTR(pwm1_auto_point4_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x88);
+static SENSOR_DEVICE_ATTR(pwm1_auto_point5_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, NULL, 0);
+
+/* 7.2.124 Table 2 X-axis Transition Point 1 Register */
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x90, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x91, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x92, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x93, 0);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_point5_temp, S_IRUGO | S_IWUSR,
+   show_temp, store_temp, 0x94, 0);
+
+/* 7.2.129 Table 2 Y-axis Transition Point 1 Register */
+static SENSOR_DEVICE_ATTR(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x95);
+static SENSOR_DEVICE_ATTR(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x96);
+static SENSOR_DEVICE_ATTR(pwm2_auto_point3_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x97);
+static SENSOR_DEVICE_ATTR(pwm2_auto_point4_pwm, S_IRUGO | S_IWUSR,
+ show_pwm, store_pwm, 0x98);
+static SENSOR_DEVICE_ATTR(pwm2_auto_point5_pwm, S_IRUGO | S_IWUSR,
+ 

Re: [PATCH 1/5] x86/vm86: Move userspace accesses to do_sys_vm86()

2015-07-11 Thread Andy Lutomirski
On Fri, Jul 10, 2015 at 10:09 PM, Brian Gerst  wrote:
> Move the userspace accesses down into the common function in
> preparation for the next set of patches.
>

One thing I don't like about the current code that makes these patches
harder to review is the bizarre approach to copying.  If you changed
this:

> -   tmp = copy_vm86_regs_from_user(, >regs,
> -  offsetof(struct kernel_vm86_struct, 
> vm86plus) -
> -  sizeof(info.regs));

into a normal field-by-field get_user / copy_from_user (the latter for
the big regs struct) then it would be clear what the ABI is and it
would be much easier to read the patches and confirm that you aren't
accidentally changing the ABI.

You could also get rid of the constraint that certain fields in
apparently kernel-internal structs had to be in a certain order.

Other than that, patches 1-4 look good on cursory inspection.  I'll
look more carefully later.  I need to think about patch 5 more.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 0/2] kconfig: warn of unhandled characters in Kconfig commands

2015-07-11 Thread Ulf Magnusson
On Fri, Jul 10, 2015 at 10:25:31AM +0200, Andreas Ruprecht wrote:
> This patchset changes the lexer file to emit a warning if any unhandled
> characters are found in the input. So far, Kconfig options like
> 
>  +config FOO
> bool
> [...]
> 
> (note the wrong '+'!) were parsed without a warning. As simply adding a
> warning for '.' produces lots of warnings as occasionally '---help---'
> is used instead of 'help' (and thus '-' is recognized as an unhandled
> character), we need to handle '---help---' separately.
> 
> Changes to v1:
>   - add '---help---' in zconf.gperf instead of special casing
> it in zconf.l
> 
> Changes to v2:
>   - Do no constify char parameter to warn_ignored_character
>   - Shorten rule definitions for '.'
> 
> Andreas Ruprecht (2):
>   kconfig: warn of unhandled characters in Kconfig commands
>   kconfig: Regenerate shipped zconf.{hash,lex}.c files
> 
>  scripts/kconfig/zconf.gperf  |   1 +
>  scripts/kconfig/zconf.hash.c_shipped |  58 ---
>  scripts/kconfig/zconf.l  |  20 ++-
>  scripts/kconfig/zconf.lex.c_shipped  | 325 
> +--
>  4 files changed, 204 insertions(+), 200 deletions(-)
> 
> -- 
> 1.9.1
> 

Looks good to me.

I ran the Kconfiglib test suite on it too. Since it simply compares the
output of Kconfiglib and the C implementation, it doubles as a good
regression test for the C implementation.

Cheers,
Ulf
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] smpboot: Allow to pass the cpumask on per-cpu thread registration

2015-07-11 Thread Frederic Weisbecker
It makes the registration cheaper and simpler for the smpboot per-cpu
kthread users that don't need to always update the cpumask after threads
creation.

Reviewed-by: Chris Metcalf 
Cc: Andrew Morton 
Cc: Chris Metcalf 
Cc: Don Zickus 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: Ulrich Obergfell 
Signed-off-by: Frederic Weisbecker 
---
 include/linux/smpboot.h | 11 ++-
 kernel/smpboot.c| 12 
 kernel/watchdog.c   |  9 +++--
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index da3c593..e6109a6 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -48,7 +48,16 @@ struct smp_hotplug_thread {
const char  *thread_comm;
 };
 
-int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
+int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread 
*plug_thread,
+  const struct cpumask *cpumask);
+
+static inline int
+smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
+{
+   return smpboot_register_percpu_thread_cpumask(plug_thread,
+ cpu_possible_mask);
+}
+
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
 int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread 
*plug_thread,
 const struct cpumask *);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 60aa858..d99a41d 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -273,19 +273,22 @@ static void smpboot_destroy_threads(struct 
smp_hotplug_thread *ht)
 }
 
 /**
- * smpboot_register_percpu_thread - Register a per_cpu thread related to 
hotplug
+ * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related
+ * to hotplug
  * @plug_thread:   Hotplug thread descriptor
+ * @cpumask:   The cpumask where threads run
  *
  * Creates and starts the threads on all online cpus.
  */
-int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
+int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread 
*plug_thread,
+  const struct cpumask *cpumask)
 {
unsigned int cpu;
int ret = 0;
 
if (!alloc_cpumask_var(_thread->cpumask, GFP_KERNEL))
return -ENOMEM;
-   cpumask_copy(plug_thread->cpumask, cpu_possible_mask);
+   cpumask_copy(plug_thread->cpumask, cpumask);
 
get_online_cpus();
mutex_lock(_threads_lock);
@@ -296,7 +299,8 @@ int smpboot_register_percpu_thread(struct 
smp_hotplug_thread *plug_thread)
free_cpumask_var(plug_thread->cpumask);
goto out;
}
-   smpboot_unpark_thread(plug_thread, cpu);
+   if (cpumask_test_cpu(cpu, cpumask))
+   smpboot_unpark_thread(plug_thread, cpu);
}
list_add(_thread->list, _threads);
 out:
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index a6ffa43..e5bb86f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -713,15 +713,12 @@ static int watchdog_enable_all_cpus(void)
int err = 0;
 
if (!watchdog_running) {
-   err = smpboot_register_percpu_thread(_threads);
+   err = smpboot_register_percpu_thread_cpumask(_threads,
+_cpumask);
if (err)
pr_err("Failed to create watchdog threads, disabled\n");
-   else {
-   if (smpboot_update_cpumask_percpu_thread(
-   _threads, _cpumask))
-   pr_err("Failed to set cpumask for watchdog 
threads\n");
+   else
watchdog_running = 1;
-   }
} else {
/*
 * Enable/disable the lockup detectors or
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   >