Re: [PATCH] net: switchdev: don't abort unsupported operations
From: Vivien Didelot Date: Fri, 10 Jul 2015 19:48:58 -0400 > There is no need to abort attribute setting or object addition, if the > prepare phase returned operation not supported. > > Thus, abort these two transactions only if the error is not -EOPNOTSUPP. > > Signed-off-by: Vivien Didelot Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] mpt2sas: Refcount fw_events and fix unsafe list usage
The fw_event_work struct is concurrently referenced at shutdown, so add a refcount to protect it, and refactor the code to use it. Additionally, refactor _scsih_fw_event_cleanup_queue() such that it no longer iterates over the list without holding the lock, since _firmware_event_work() concurrently deletes items from the list. Cc: Christoph Hellwig Cc: Bart Van Assche Signed-off-by: Calvin Owens --- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 101 --- 1 file changed, 81 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index fad80ce..8b267af 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -176,9 +176,37 @@ struct fw_event_work { u8 VP_ID; u8 ignore; u16 event; + struct kref refcount; charevent_data[0] __aligned(4); }; +static void fw_event_work_free(struct kref *r) +{ + kfree(container_of(r, struct fw_event_work, refcount)); +} + +static void fw_event_work_get(struct fw_event_work *fw_work) +{ + kref_get(_work->refcount); +} + +static void fw_event_work_put(struct fw_event_work *fw_work) +{ + kref_put(_work->refcount, fw_event_work_free); +} + +static struct fw_event_work *alloc_fw_event_work(int len) +{ + struct fw_event_work *fw_event; + + fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC); + if (!fw_event) + return NULL; + + kref_init(_event->refcount); + return fw_event; +} + /* raid transport support */ static struct raid_template *mpt2sas_raid_template; @@ -2844,36 +2872,39 @@ _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) return; spin_lock_irqsave(>fw_event_lock, flags); + fw_event_work_get(fw_event); list_add_tail(_event->list, >fw_event_list); INIT_DELAYED_WORK(_event->delayed_work, _firmware_event_work); + fw_event_work_get(fw_event); queue_delayed_work(ioc->firmware_event_thread, _event->delayed_work, 0); spin_unlock_irqrestore(>fw_event_lock, flags); } /** - * _scsih_fw_event_free - delete fw_event + * _scsih_fw_event_del_from_list - delete fw_event from the list * @ioc: per adapter object * @fw_event: object describing the event * Context: This function will acquire ioc->fw_event_lock. * - * This removes firmware event object from link list, frees associated memory. + * If the fw_event is on the fw_event_list, remove it and do a put. * * Return nothing. */ static void -_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work +_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) { unsigned long flags; spin_lock_irqsave(>fw_event_lock, flags); - list_del(_event->list); - kfree(fw_event); + if (!list_empty(_event->list)) { + list_del_init(_event->list); + fw_event_work_put(fw_event); + } spin_unlock_irqrestore(>fw_event_lock, flags); } - /** * _scsih_error_recovery_delete_devices - remove devices not responding * @ioc: per adapter object @@ -2888,13 +2919,14 @@ _scsih_error_recovery_delete_devices(struct MPT2SAS_ADAPTER *ioc) if (ioc->is_driver_loading) return; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); } /** @@ -2908,12 +2940,29 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) { struct fw_event_work *fw_event; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); +} + +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc) +{ + unsigned long flags; + struct fw_event_work *fw_event = NULL; + + spin_lock_irqsave(>fw_event_lock, flags); + if (!list_empty(>fw_event_list)) { + fw_event = list_first_entry(>fw_event_list, + struct fw_event_work, list); + list_del_init(_event->list); + } + spin_unlock_irqrestore(>fw_event_lock, flags); + + return fw_event; } /** @@ -2928,17 +2977,25 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) static void _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc) { - struct
[PATCH 0/2 v2] Fixes for memory corruption in mpt2sas
Hello all, This patchset attempts to address problems we've been having with panics due to memory corruption from the mpt2sas driver. Thanks, Calvin Patches in this series: [PATCH 1/2] mpt2sas: Refcount sas_device objects and fix unsafe list usage [PATCH 2/2] mpt2sas: Refcount fw_events and fix unsafe list usage Changes since v1: * Squished patches 1-3 and 4-6 into two patches * s/BUG_ON(!spin_is_locked/assert_spin_locked/g * Use more succinct fuction names * Store a pointer to the sas_device object in ->hostdata to eliminate the need for several lookups on the lists. * Remove the fw_event from fw_event_list at the start of _firmware_event_work() * Explicitly separate fw_event_list removal from fw_event freeing Total diffstat: drivers/scsi/mpt2sas/mpt2sas_base.h | 22 +- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 535 +-- drivers/scsi/mpt2sas/mpt2sas_transport.c | 12 +- 3 files changed, 396 insertions(+), 173 deletions(-) Diff showing changes v1 => v2: http://jcalvinowens.github.io/stuff/mpt2sas-patchset-v1v2.patch -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] mpt2sas: Refcount sas_device objects and fix unsafe list usage
These objects can be referenced concurrently throughout the driver, we need a way to make sure threads can't delete them out from under each other. This patch adds the refcount, and refactors the code to use it. Additionally, we cannot iterate over the sas_device_list without holding the lock, or we risk corrupting random memory if items are added or deleted as we iterate. This patch refactors _scsih_probe_sas() to use the sas_device_list in a safe way. Cc: Christoph Hellwig Cc: Bart Van Assche Signed-off-by: Calvin Owens --- drivers/scsi/mpt2sas/mpt2sas_base.h | 22 +- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 434 --- drivers/scsi/mpt2sas/mpt2sas_transport.c | 12 +- 3 files changed, 315 insertions(+), 153 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index caff8d1..78f41ac 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -238,6 +238,7 @@ * @flags: MPT_TARGET_FLAGS_XXX flags * @deleted: target flaged for deletion * @tm_busy: target is busy with TM request. + * @sdev: The sas_device associated with this target */ struct MPT2SAS_TARGET { struct scsi_target *starget; @@ -248,6 +249,7 @@ struct MPT2SAS_TARGET { u32 flags; u8 deleted; u8 tm_busy; + struct _sas_device *sdev; }; @@ -376,8 +378,24 @@ struct _sas_device { u8 phy; u8 responding; u8 pfa_led_on; + struct kref refcount; }; +static inline void sas_device_get(struct _sas_device *s) +{ + kref_get(>refcount); +} + +static inline void sas_device_free(struct kref *r) +{ + kfree(container_of(r, struct _sas_device, refcount)); +} + +static inline void sas_device_put(struct _sas_device *s) +{ + kref_put(>refcount, sas_device_free); +} + /** * struct _raid_device - raid volume link list * @list: sas device list @@ -1095,7 +1113,9 @@ struct _sas_node *mpt2sas_scsih_expander_find_by_handle(struct MPT2SAS_ADAPTER * u16 handle); struct _sas_node *mpt2sas_scsih_expander_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, u64 sas_address); -struct _sas_device *mpt2sas_scsih_sas_device_find_by_sas_address( +struct _sas_device *mpt2sas_get_sdev_by_addr( +struct MPT2SAS_ADAPTER *ioc, u64 sas_address); +struct _sas_device *__mpt2sas_get_sdev_by_addr( struct MPT2SAS_ADAPTER *ioc, u64 sas_address); void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 3f26147..fad80ce 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -526,8 +526,43 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, } } +struct _sas_device * +__mpt2sas_get_sdev_from_target(struct MPT2SAS_TARGET *tgt_priv) +{ + struct _sas_device *ret; + + ret = tgt_priv->sdev; + if (ret) + sas_device_get(ret); + + return ret; +} + +struct _sas_device * +__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, +u64 sas_address) +{ + struct _sas_device *sas_device; + + assert_spin_locked(>sas_device_lock); + + list_for_each_entry(sas_device, >sas_device_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + list_for_each_entry(sas_device, >sas_device_init_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + return NULL; + +found_device: + sas_device_get(sas_device); + return sas_device; +} + /** - * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search + * mpt2sas_get_sdev_by_addr - sas device search * @ioc: per adapter object * @sas_address: sas address * Context: Calling function should acquire ioc->sas_device_lock @@ -536,24 +571,44 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, * object. */ struct _sas_device * -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, +mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, u64 sas_address) { struct _sas_device *sas_device; + unsigned long flags; + + spin_lock_irqsave(>sas_device_lock, flags); + sas_device = __mpt2sas_get_sdev_by_addr(ioc, + sas_address); + spin_unlock_irqrestore(>sas_device_lock, flags); + + return sas_device; +} + +static struct _sas_device * +__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) +{ + struct _sas_device *sas_device; + + assert_spin_locked(>sas_device_lock); list_for_each_entry(sas_device, >sas_device_list, list) - if (sas_device->sas_address == sas_address) - return sas_device; + if (sas_device->handle == handle) + goto found_device;
Re: [PATCH 6/6] Fix unsafe fw_event_list usage
On Friday 07/03 at 09:02 -0700, Christoph Hellwig wrote: > On Mon, Jun 08, 2015 at 08:50:56PM -0700, Calvin Owens wrote: > > Since the fw_event deletes itself from the list, cleanup_queue() can > > walk onto garbage pointers or walk off into freed memory. > > > > This refactors the code in _scsih_fw_event_cleanup_queue() to not > > iterate over the fw_event_list without a lock. > > I think this really should be folded into the previous one, with the > fixes in this one the other refcounting change don't make a whole lot > sense. > > > +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER > > *ioc) > > +{ > > + unsigned long flags; > > + struct fw_event_work *fw_event = NULL; > > + > > + spin_lock_irqsave(>fw_event_lock, flags); > > + if (!list_empty(>fw_event_list)) { > > + fw_event = list_first_entry(>fw_event_list, > > + struct fw_event_work, list); > > + list_del_init(_event->list); > > + fw_event_work_get(fw_event); > > + } > > + spin_unlock_irqrestore(>fw_event_lock, flags); > > + > > + return fw_event; > > Shouldn't we have a reference for each item on the list that gets > transfer to whomever removes it from the list? Yes, this was a bit weird the way I did it. I redid this in v2, hopefully it's clearer. > Additionally _firmware_event_work should call dequeue_next_fw_event > first in the function so that item is off the list before we process > it, and can then just drop the reference once it's done. That works: cleanup_queue() won't wait on some already-running events, but destroy_workqueue() drains the wq, so we won't run ahead and free things from under the fw_event when unwinding. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/6] Refactor code to use new sas_device refcount
On Friday 07/03 at 08:38 -0700, Christoph Hellwig wrote: > > > > +struct _sas_device * > > +mpt2sas_scsih_sas_device_get_by_sas_address_nolock(struct MPT2SAS_ADAPTER > > *ioc, > > +u64 sas_address) > > Any chance to use a shorter name for this function? E.g. > __mpt2sas_get_sdev_by_addr ? Will do. > > +{ > > + struct _sas_device *sas_device; > > + > > + BUG_ON(!spin_is_locked(>sas_device_lock)); > > This will blow on UP builds. Please use assert_spin_locked or > lockdep_assert_held instead. And don't ask me which of the two, > that's a mystery I don't understand myself either. Will do. > > struct _sas_device * > > -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, > > +mpt2sas_scsih_sas_device_get_by_sas_address(struct MPT2SAS_ADAPTER *ioc, > > u64 sas_address) > > { > > > +static struct _sas_device * > > +_scsih_sas_device_get_by_handle_nolock(struct MPT2SAS_ADAPTER *ioc, u16 > > handle) > > > static struct _sas_device * > > -_scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) > > +_scsih_sas_device_get_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) > > Same comments about the function names as above. > > > + struct _sas_device *sas_device; > > + > > + BUG_ON(!spin_is_locked(>sas_device_lock)); > > Same comment about the right assert helpers as above. > > > @@ -594,9 +634,15 @@ _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc, > > if (!sas_device) > > return; > > > > + /* > > +* The lock serializes access to the list, but we still need to verify > > +* that nobody removed the entry while we were waiting on the lock. > > +*/ > > spin_lock_irqsave(>sas_device_lock, flags); > > - list_del(_device->list); > > - kfree(sas_device); > > + if (!list_empty(_device->list)) { > > + list_del_init(_device->list); > > + sas_device_put(sas_device); > > + } > > spin_unlock_irqrestore(>sas_device_lock, flags); > > This looks odd to me. Normally you'd have the lock from the list > iteration that finds the device. From looking at the code it seems > like this only called from probe failure paths, though. It seems like > for this case the device simplify shouldn't be added until the probe > succeeds and this function should go away? There's a horrible maze of dependencies on things being on the lists while being added that make this impossible: I spent some time trying to get this to work, but I always end up with no drives. :( (The path through _scsih_probe_sas() seems not to care) I was hopeful your suggestion below about putting the sas_device pointer in ->hostdata would eliminate the need for all the find_by_X() lookups, but some won't go away. > > @@ -1208,12 +1256,15 @@ _scsih_change_queue_depth(struct scsi_device *sdev, > > int qdepth) > > goto not_sata; > > if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) > > goto not_sata; > > + > > spin_lock_irqsave(>sas_device_lock, flags); > > - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, > > + sas_device = mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc, > >sas_device_priv_data->sas_target->sas_address); > > - if (sas_device && sas_device->device_info & > > - MPI2_SAS_DEVICE_INFO_SATA_DEVICE) > > + if (sas_device && sas_device->device_info > > + & MPI2_SAS_DEVICE_INFO_SATA_DEVICE) { > > max_depth = MPT2SAS_SATA_QUEUE_DEPTH; > > + sas_device_put(sas_device); > > + } > > Please store a pointer to the sas_device in struct scsi_target ->hostdata > in _scsih_target_alloc and avoid the need for this and other runtime > lookups where we have a scsi_device or scsi_target structure available. Will do. > > @@ -1324,13 +1377,15 @@ _scsih_target_destroy(struct scsi_target *starget) > > > > spin_lock_irqsave(>sas_device_lock, flags); > > rphy = dev_to_rphy(starget->dev.parent); > > - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, > > + sas_device = mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc, > >rphy->identify.sas_address); > > if (sas_device && (sas_device->starget == starget) && > > (sas_device->id == starget->id) && > > (sas_device->channel == starget->channel)) > > sas_device->starget = NULL; > > > > + if (sas_device) > > + sas_device_put(sas_device); > > spin_unlock_irqrestore(>sas_device_lock, flags); > > .. like this one. > > > out: > > @@ -1386,7 +1441,7 @@ _scsih_slave_alloc(struct scsi_device *sdev) > > > > if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { > > spin_lock_irqsave(>sas_device_lock, flags); > > - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, > > + sas_device = > > mpt2sas_scsih_sas_device_get_by_sas_address_nolock(ioc, > > sas_target_priv_data->sas_address); > >
Re: [PATCH TRIVIAL] README: GTK+ is a acronym
Thanks, that should have been a "an acronym" instead of "a acronym" Typo in my commit message. :-( Diego On Fri, Jul 10, 2015 at 6:17 PM, Jonathan Corbet wrote: > On Mon, 6 Jul 2015 14:33:21 -0300 > Diego Viola wrote: > >> - "make gconfig" X windows (Gtk) based configuration tool. >> + "make gconfig" X windows (GTK+) based configuration tool. > > Applied to the docs tree, thanks. > > jon -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 5/6] Refactor code to use new fw_event refcount
Thanks for this, I'm sending a v2 shortly. On Friday 07/03 at 09:00 -0700, Christoph Hellwig wrote: > On Mon, Jun 08, 2015 at 08:50:55PM -0700, Calvin Owens wrote: > > This refactors the fw_event code to use the new refcount. > > I spent some time looking over this code because it's so convoluted. > In general I think code should either embeed one work_struct (and it > really doesn't seem to need a delayed work here!) or if needed a list > and not both like this one. But it's probably too much work to sort > all this out, so let's go with your version. Yeah, I tried to get rid of fw_event_list altogether, since I think what cleanup_queue() does could be simplified to calling flush_workqueue(). The problem is _scsih_check_topo_delete_events(), which looks at the list and sometimes marks fw_events as "ignored" so they aren't executed. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] locking/pvqspinlock: Fix kernel panic in locking-selftest
On 2015/07/12 10:19, Waiman Long wrote: > Enabling locking-selftest in a VM guest may cause the following > kernel panic: > > kernel BUG at .../kernel/locking/qspinlock_paravirt.h:137! > > This is due to the fact that the pvqspinlock unlock function is > expecting either a _Q_LOCKED_VAL or _Q_SLOW_VAL in the lock byte. This > patch prevents that bug report by ignoring it when debug_locks_silent > is set. Otherwise, a warning will be printed if it contains an > unexpected value. > > With this patch applied, the kernel locking-selftest completed without > any noise. > OK, I've tested this with make allmodconfig && make localmodconfig kernel. (I've hit another issue to boot, but it seems not related to this issue) Tested-by: Masami Hiramatsu Thank you! > Signed-off-by: Waiman Long > --- > kernel/locking/qspinlock_paravirt.h | 12 +++- > 1 files changed, 11 insertions(+), 1 deletions(-) > > diff --git a/kernel/locking/qspinlock_paravirt.h > b/kernel/locking/qspinlock_paravirt.h > index 04ab181..15d3733 100644 > --- a/kernel/locking/qspinlock_paravirt.h > +++ b/kernel/locking/qspinlock_paravirt.h > @@ -4,6 +4,7 @@ > > #include > #include > +#include > > /* > * Implement paravirt qspinlocks; the general idea is to halt the vcpus > instead > @@ -286,15 +287,24 @@ __visible void __pv_queued_spin_unlock(struct qspinlock > *lock) > { > struct __qspinlock *l = (void *)lock; > struct pv_node *node; > + u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); > > /* >* We must not unlock if SLOW, because in that case we must first >* unhash. Otherwise it would be possible to have multiple @lock >* entries, which would be BAD. >*/ > - if (likely(cmpxchg(>locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL)) > + if (likely(lockval == _Q_LOCKED_VAL)) > return; > > + if (unlikely(lockval != _Q_SLOW_VAL)) { > + if (debug_locks_silent) > + return; > + WARN(1, "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", > + (unsigned long)lock, atomic_read(>val)); > + return; > + } > + > /* >* Since the above failed to release, this must be the SLOW path. >* Therefore start by looking up the blocked node and unhashing it. > -- Masami HIRAMATSU Linux Technology Research Center, System Productivity Research Dept. Center for Technology Innovation - Systems Engineering Hitachi, Ltd., Research & Development Group E-mail: masami.hiramatsu...@hitachi.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [BUG][tip/master] kernel panic while locking selftest at qspinlock_paravirt.h:137!
On 07/11/2015 01:05 AM, Masami Hiramatsu wrote: On 2015/07/11 10:27, Waiman Long wrote: On 07/10/2015 08:32 PM, Masami Hiramatsu wrote: On 2015/07/10 23:28, Peter Zijlstra wrote: On Fri, Jul 10, 2015 at 03:57:46PM +0200, Ingo Molnar wrote: * Peter Zijlstra wrote: Do we want to make double unlock non-fatal unconditionally? No, just don't BUG() out, don't crash the system - generate a warning? So that would be a yes.. Something like so then? Won't this generate a splat on that locking self test then? And upset people? Hmm, yes, this still noisy... Can't we avoid double-unlock completely? it seems that this warning can happen randomly, which means pv-spinlock randomly broken, doesn't it? It shouldn't randomly happen. The message should be printed at the first instance of double-unlock. If that is not case, there may be some problem in the code. Ah, OK. That comes from locking selftest. In that case, do we really need the warning while selftest, since we know it always fails ? Anyway, I have an alternative fix that should better capture the problem: Do we need both Peter's BUG() removing patch and this? No, you can choose either one. They are just different ways to solve the same BUG() problem. Cheers, Longman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 0/2] mm/shrinker: make unregister_shrinker() less fragile
Hello Christoph, On (07/11/15 03:02), Christoph Hellwig wrote: > > Shrinker API does not handle nicely unregister_shrinker() on a > > not-registered > > ->shrinker. Looking at shrinker users, they all have to > > (a) carry on some sort of a flag to make sure that "unregister_shrinker()" > > will not blow up later > > (b) be fishy (potentially can Oops) > > (c) access private members `struct shrinker' (e.g. `shrink.list.next') > > Ayone who does that is broken. You just need to have clear init (with > proper unwinding) and exit functions and order things properly. It > works like most register/unregister calls and should stay that way. > > Maye you you should ty to explain what practical problem you're seeing > to start with. Yes, but the main difference here is that it seems that shrinker users don't tend to treat shrinker registration failures as fatal errors and just continue with shrinker functionality disabled. And it makes sense. (copy paste from https://lkml.org/lkml/2015/7/9/751) > Ayone who does that is broken I'm afraid, in that case we almost don't have not-broken shrinker users. -- ignoring register_shrinker() error : int ldlm_pools_init(void) : { : int rc; : : rc = ldlm_pools_thread_start(); : if (rc == 0) { : register_shrinker(_pools_srv_shrinker); : register_shrinker(_pools_cli_shrinker); : } : return rc; : } : EXPORT_SYMBOL(ldlm_pools_init); : : void ldlm_pools_fini(void) : { : unregister_shrinker(_pools_srv_shrinker); : unregister_shrinker(_pools_cli_shrinker); : ldlm_pools_thread_stop(); : } : EXPORT_SYMBOL(ldlm_pools_fini); -- and here :void i915_gem_shrinker_init(struct drm_i915_private *dev_priv) :{ :dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan; :dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count; :dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS; :register_shrinker(_priv->mm.shrinker); : :dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; :register_oom_notifier(_priv->mm.oom_notifier); :} -- and here :int __init gfs2_glock_init(void) :{ :unsigned i; ... :register_shrinker(_shrinker); : :return 0; :} : :void gfs2_glock_exit(void) :{ :unregister_shrinker(_shrinker); :destroy_workqueue(glock_workqueue); :destroy_workqueue(gfs2_delete_workqueue); :} -- and here :static int __init lowmem_init(void) :{ :register_shrinker(_shrinker); :return 0; :} : :static void __exit lowmem_exit(void) :{ :unregister_shrinker(_shrinker); :} -- accessing private member 'c->shrink.list.next' to distinguish between 'register_shrinker() was successful and need to unregister it' and 'register_shrinker() failed, don't unregister_shrinker() because it may Oops' :struct cache_set { : ... : struct shrinker shrink; : ... :}; : : ... : : void bch_btree_cache_free(struct cache_set *c) : { : struct btree *b; : struct closure cl; : closure_init_stack(); : : if (c->shrink.list.next) : unregister_shrinker(>shrink); -- and here :int bch_btree_cache_alloc(struct cache_set *c) :{ ... :register_shrinker(>shrink); : : ... : :void bch_btree_cache_free(struct cache_set *c) :{ :struct btree *b; :struct closure cl; :closure_init_stack(); : :if (c->shrink.list.next) :unregister_shrinker(>shrink); : And so on and on. In fact, 'git grep = register_shrinker' gives only $ git grep '= register_shrinker' fs/ext4/extents_status.c: err = register_shrinker(>s_es_shrinker); fs/nfsd/nfscache.c: status = register_shrinker(_reply_cache_shrinker); fs/ubifs/super.c: err = register_shrinker(_shrinker_info); mm/huge_memory.c: err = register_shrinker(_zero_page_shrinker); mm/workingset.c:ret = register_shrinker(_shadow_shrinker); The rest is 'broken'. -ss -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 19/45] clk: mxs: Include clk.h in C files that use it
On Sat, Jul 11, 2015 at 7:33 AM, Stephen Boyd wrote: > Clock provider drivers generally shouldn't include clk.h because > it's the consumer API. The clk.h include is being included in all > mxs files because it's part of mxs/clk.h even though nothing > actually requires it in that file. Move the clk.h include to the > C files that are actually using it and remove the clk.h include > from the header file. The clkdev.h include isn't used either, so > drop it too. > > Cc: Shawn Guo > Signed-off-by: Stephen Boyd Acked-by: Shawn Guo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Subject: [PATCH 1/1] Fix redundant check against unsigned int in broken audit test fix for exec arg len
>From 55fae099d46749b73895934aab8c2823c5a23abe Mon Sep 17 00:00:00 2001 From: Mustapha Abiola Date: Sat, 11 Jul 2015 17:01:04 + Subject: [PATCH 1/1] Fix redundant check against unsigned int in broken audit test fix for exec arg len Quick patch to fix the needless check of `len` being < 0 as its an unsigned int. Signed-off-by: Mustapha Abiola --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e85bdfd..0012476 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,7 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { + if (WARN_ON_ONCE(len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } -- 1.9.1 0001-Fix-redundant-check-against-unsigned-int-in-broken-a.patch Description: Binary data
Re: [PATCH v2] net: dsa: mv88e6xxx: add write access to debugfs regs file
From: Vivien Didelot Date: Sat, 11 Jul 2015 14:36:12 -0400 (EDT) > In the meantime, this is really useful for development. i.e. ensuring a good > switchdev/DSA interaction without being able to read and write directly the > hardware VLAN table, is a bit a PITA. A dynamic debugfs looked appropriate. For "development" you can hack the driver, add tracepoints, or use another mechanism anyone hacking the kernel (which by definition someone doing "development" is doing) can do. I do not buy any of your arguments, and you really miss the grand opportunity to export the knobs and values in a way which are going to: 1) Be useful to users 2) Be usable by any similar DSA driver, not just _yours_ So please stop this myopic narrow thinking when you add facilities for development or export values. Think of the big picture and long term, not just your personal perceived immediate needs of today. Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3] tty: add missing rcu_read_lock for task_pgrp
On 06/29/2015 07:59 PM, Patrick Donnelly wrote: > task_pgrp requires an rcu or tasklist lock to be obtained if the returned pid > is to be dereferenced, which kill_pgrp does. Obtain an RCU lock for the > duration of use. > > Signed-off-by: Patrick Donnelly > --- > drivers/tty/n_tty.c | 12 ++-- > drivers/tty/tty_io.c | 17 - > 2 files changed, 22 insertions(+), 7 deletions(-) > > diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c > index c9c27f6..0d631f8 100644 > --- a/drivers/tty/n_tty.c > +++ b/drivers/tty/n_tty.c > @@ -2137,6 +2137,8 @@ extern ssize_t redirected_tty_write(struct file *, > const char __user *, > > static int job_control(struct tty_struct *tty, struct file *file) > { > + struct pid *pgrp; > + > /* Job control check -- must be done at start and after > every sleep (POSIX.1 7.1.1.4). */ > /* NOTE: not yet done after every sleep pending a thorough > @@ -2146,18 +2148,24 @@ static int job_control(struct tty_struct *tty, struct > file *file) > current->signal->tty != tty) > return 0; > > + rcu_read_lock(); > + pgrp = task_pgrp(current); > + > spin_lock_irq(>ctrl_lock); > + > if (!tty->pgrp) > printk(KERN_ERR "n_tty_read: no tty->pgrp!\n"); > - else if (task_pgrp(current) != tty->pgrp) { > + else if (pgrp != tty->pgrp) { > spin_unlock_irq(>ctrl_lock); > if (is_ignored(SIGTTIN) || is_current_pgrp_orphaned()) > return -EIO; I just realized there's a missing rcu_read_unlock() from this early return. Regards, Peter Hurley > - kill_pgrp(task_pgrp(current), SIGTTIN, 1); > + kill_pgrp(pgrp, SIGTTIN, 1); > + rcu_read_unlock(); > set_thread_flag(TIF_SIGPENDING); > return -ERESTARTSYS; > } > spin_unlock_irq(>ctrl_lock); > + rcu_read_unlock(); > return 0; > } > > diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c > index 57fc6ee..6bdfb98 100644 > --- a/drivers/tty/tty_io.c > +++ b/drivers/tty/tty_io.c > @@ -388,33 +388,40 @@ EXPORT_SYMBOL_GPL(tty_find_polling_driver); > int tty_check_change(struct tty_struct *tty) > { > unsigned long flags; > + struct pid *pgrp; > int ret = 0; > > if (current->signal->tty != tty) > return 0; > > + rcu_read_lock(); > + pgrp = task_pgrp(current); > + > spin_lock_irqsave(>ctrl_lock, flags); > > if (!tty->pgrp) { > printk(KERN_WARNING "tty_check_change: tty->pgrp == NULL!\n"); > goto out_unlock; > } > - if (task_pgrp(current) == tty->pgrp) > + if (pgrp == tty->pgrp) > goto out_unlock; > spin_unlock_irqrestore(>ctrl_lock, flags); > + > if (is_ignored(SIGTTOU)) > - goto out; > + goto out_rcuunlock; > if (is_current_pgrp_orphaned()) { > ret = -EIO; > - goto out; > + goto out_rcuunlock; > } > - kill_pgrp(task_pgrp(current), SIGTTOU, 1); > + kill_pgrp(pgrp, SIGTTOU, 1); > + rcu_read_unlock(); > set_thread_flag(TIF_SIGPENDING); > ret = -ERESTARTSYS; > -out: > return ret; > out_unlock: > spin_unlock_irqrestore(>ctrl_lock, flags); > +out_rcuunlock: > + rcu_read_unlock(); > return ret; > } > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] xen/blkfront: convert to blk-mq APIs
On 07/12/2015 02:14 AM, Jens Axboe wrote: > On 07/11/2015 07:30 AM, Bob Liu wrote: >> Note: This patch is based on original work of Arianna's internship for >> GNOME's Outreach Program for Women. > > Great to see this finally get prepped to go in! > >> Only one hardware queue is used now, so there is no performance change. > > I would hope that the blk-mq path, even with one queue, is a perf win over > the old interface. So I'm not sure that is correct. But the bigger win will > be with more queues, of course. > Right, but there are memory consumption and migration issues while using more hardware queues. So I separated this patch from that big patchset and hope can be merged first. >> The legacy non-mq code is deleted completely which is the same as other >> drivers like virtio, mtip, and nvme. >> >> Also dropped one unnecessary holding of info->io_lock when calling >> blk_mq_stop_hw_queues(). >> >> Changes in v2: >> - Reorganized blk_mq_queue_rq() >> - Restored most io_locks in place > > Looks good to me. The most common error case is the busy-out not stopping > queues, or not restarting them at completion. But that all looks fine. > > I would, however, rename blk_mq_queue_rq(). It sounds like a core function. > blkif_queue_rq() would be more appropriate. > Will send v3. >> Signed-off-by: Arianna Avanzini >> Signed-off-by: Bob Liu > > Acked-by: Jens Axboe > Thank you! -- Regards, -Bob -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Darlehen.
Haben Sie sich für ein Darlehen oder Hypotheken und ständig abgelehnt suchen die von Finanzinstituten ist Mr.James Rodriguez ein Gläubiger bietet Darlehen zu einem Zinssatz von 3% Personen / Unternehmensverbände, Unternehmen, Betrieben, Schulen, Kirchen, usw., die in der sind Bedarf an Geld in einer Amortisationszeit von 1 bis 30 years.We Angebot 5.000,00 Euro auf 50 Mio. Euro bis zu einschließlich 18 und älter gelten müssen Sie sind. Wir sind vertrauenswürdig, zuverlässig und dynamisch. kontaktieren Sie uns jetzt: jr9304...@gmail.com Ihre Namen .. Menge benötigt werden .. Dauer: .. Dein Land ... Deine Adresse .. Telefon ... Monatliches Einkommen ... Sex ... Dein Alter ... (jr9304...@gmail.com) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 1/2] kconfig: warn of unhandled characters in Kconfig commands
One more nitpick (should be the last one): On Fri, Jul 10, 2015 at 10:25:32AM +0200, Andreas Ruprecht wrote: > In Kconfig, definitions of options take the following form: > " ...". COMMANDs and PARAMs are treated > slightly different by the underlying parser. > > While commit 2e0d737fc76f ("kconfig: don't silently ignore unhandled > characters") introduced a warning for unsupported characters around > PARAMs, it does not cover situations where a COMMAND has additional > characters before it. > > This change makes Kconfig emit a warning if superfluous characters > are found before COMMANDs. As the 'help' statement sometimes is > written as '---help---', the '-' character would now also be regarded > as unhandled and generate a warning. To avoid that, '-' is added to > the list of allowed characters, and the token '---help---' is included > in the zconf.gperf file. > > Reported-by: Valentin Rothberg > Signed-off-by: Andreas Ruprecht > --- > Changes to v1: > - add '---help---' in zconf.gperf instead of special casing > it in zconf.l > > Changes to v2: > - Do no constify char parameter to warn_ignored_character > - Shorten rule definitions for '.' > > scripts/kconfig/zconf.gperf | 1 + > scripts/kconfig/zconf.l | 20 +++- > 2 files changed, 12 insertions(+), 9 deletions(-) > > diff --git a/scripts/kconfig/zconf.gperf b/scripts/kconfig/zconf.gperf > index b6ac02d..7aceb7b 100644 > --- a/scripts/kconfig/zconf.gperf > +++ b/scripts/kconfig/zconf.gperf > @@ -22,6 +22,7 @@ comment,T_COMMENT, TF_COMMAND > config, T_CONFIG, TF_COMMAND > menuconfig, T_MENUCONFIG, TF_COMMAND > help,T_HELP, TF_COMMAND > +"---help---", T_HELP, TF_COMMAND I think the quotes are redundant here, and tabs instead of spaces would be consistent with the other entries. Make sure everything lines up with 8-space tabstops. > if, T_IF, TF_COMMAND|TF_PARAM > endif, T_ENDIF,TF_COMMAND > depends, T_DEPENDS, TF_COMMAND > diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l > index 200a3fe..c410d25 100644 > --- a/scripts/kconfig/zconf.l > +++ b/scripts/kconfig/zconf.l > @@ -66,9 +66,16 @@ static void alloc_string(const char *str, int size) > memcpy(text, str, size); > text[size] = 0; > } > + > +static void warn_ignored_character(char chr) > +{ > + fprintf(stderr, > + "%s:%d:warning: ignoring unsupported character '%c'\n", > + zconf_curname(), zconf_lineno(), chr); > +} > %} > > -n[A-Za-z0-9_] > +n[A-Za-z0-9_-] > > %% > int str = 0; > @@ -106,7 +113,7 @@ n [A-Za-z0-9_] > zconflval.string = text; > return T_WORD; > } > - . > + . warn_ignored_character(*yytext); > \n { > BEGIN(INITIAL); > current_file->lineno++; > @@ -132,8 +139,7 @@ n [A-Za-z0-9_] > BEGIN(STRING); > } > \n BEGIN(INITIAL); current_file->lineno++; return T_EOL; > - --- /* ignore */ > - ({n}|[-/.])+{ > + ({n}|[/.])+ { > const struct kconf_id *id = kconf_id_lookup(yytext, yyleng); > if (id && id->flags & TF_PARAM) { > zconflval.id = id; > @@ -146,11 +152,7 @@ n[A-Za-z0-9_] > #.* /* comment */ > \\\ncurrent_file->lineno++; > [[:blank:]]+ > - . { > - fprintf(stderr, > - "%s:%d:warning: ignoring unsupported character '%c'\n", > - zconf_curname(), zconf_lineno(), *yytext); > - } > + . warn_ignored_character(*yytext); > <> { > BEGIN(INITIAL); > } > -- > 1.9.1 > Cheers, Ulf -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] locking/pvqspinlock: Fix kernel panic in locking-selftest
Enabling locking-selftest in a VM guest may cause the following kernel panic: kernel BUG at .../kernel/locking/qspinlock_paravirt.h:137! This is due to the fact that the pvqspinlock unlock function is expecting either a _Q_LOCKED_VAL or _Q_SLOW_VAL in the lock byte. This patch prevents that bug report by ignoring it when debug_locks_silent is set. Otherwise, a warning will be printed if it contains an unexpected value. With this patch applied, the kernel locking-selftest completed without any noise. Signed-off-by: Waiman Long --- kernel/locking/qspinlock_paravirt.h | 12 +++- 1 files changed, 11 insertions(+), 1 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 04ab181..15d3733 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -4,6 +4,7 @@ #include #include +#include /* * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead @@ -286,15 +287,24 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { struct __qspinlock *l = (void *)lock; struct pv_node *node; + u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); /* * We must not unlock if SLOW, because in that case we must first * unhash. Otherwise it would be possible to have multiple @lock * entries, which would be BAD. */ - if (likely(cmpxchg(>locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL)) + if (likely(lockval == _Q_LOCKED_VAL)) return; + if (unlikely(lockval != _Q_SLOW_VAL)) { + if (debug_locks_silent) + return; + WARN(1, "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", + (unsigned long)lock, atomic_read(>val)); + return; + } + /* * Since the above failed to release, this must be the SLOW path. * Therefore start by looking up the blocked node and unhashing it. -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[GIT PULL] libnvdimm fixes for 4.2-rc2
Hi Linus, please pull from: git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm libnvdimm-fixes ...to receive: 1/ Fixes for a handful of smatch reports (Thanks Dan C.!) and minor bug fixes (patches 1-6) 2/ Correctness fixes to the BLK-mode nvdimm driver (patches 7-10). Granted these are slightly large for a -rc update. They have been out for review in one form or another since the end of May and were deferred from the merge window while we settled on the "PMEM API" for the PMEM-mode nvdimm driver (i.e. memremap_pmem, memcpy_to_pmem, and wmb_pmem). Now that those apis are merged we implement them in the BLK driver to guarantee that mmio aperture moves stay ordered with respect to incoming read/write requests, and that writes are flushed through those mmio-windows and platform-buffers to be persistent on media. These pass the sub-system unit tests with the updates to tools/testing/nvdimm, and have received a successful build-report from the kbuild robot (468 configs). Full git log below with acks from Rafael for the touches to drivers/acpi/. [PATCH 01/10] sparse: fix misplaced __pmem definition [PATCH 02/10] libnvdimm: smatch cleanups in __nd_ioctl [PATCH 03/10] nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails [PATCH 04/10] nfit: fix smatch "use after null check" report [PATCH 05/10] pmem: add maintainer for include/linux/pmem.h [PATCH 06/10] tools/testing/nvdimm: mock ioremap_wt [PATCH 07/10] tools/testing/nvdimm: fix return code for unimplemented commands [PATCH 08/10] tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to nfit_test [PATCH 09/10] nfit: update block I/O path to use PMEM API [PATCH 10/10] nfit: add support for NVDIMM "latch" flag The following changes since commit 88793e5c774ec69351ef6b5200bb59f532e41bca: Merge tag 'libnvdimm-for-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm (2015-06-29 10:34:42 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm libnvdimm-fixes for you to fetch changes up to f0f2c072cf530d5b8890be5051cc8b36b0c54cce: nfit: add support for NVDIMM "latch" flag (2015-07-10 14:43:50 -0400) Axel Lin (1): nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails Dan Williams (6): sparse: fix misplaced __pmem definition libnvdimm: smatch cleanups in __nd_ioctl nfit: fix smatch "use after null check" report tools/testing/nvdimm: mock ioremap_wt tools/testing/nvdimm: fix return code for unimplemented commands tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to nfit_test Ross Zwisler (3): pmem: add maintainer for include/linux/pmem.h nfit: update block I/O path to use PMEM API nfit: add support for NVDIMM "latch" flag MAINTAINERS | 1 + drivers/acpi/nfit.c | 134 ++ drivers/acpi/nfit.h | 20 +- drivers/nvdimm/bus.c | 11 +--- include/linux/compiler.h | 2 +- tools/testing/nvdimm/Kbuild | 3 + tools/testing/nvdimm/test/iomap.c | 27 tools/testing/nvdimm/test/nfit.c | 52 ++- 8 files changed, 223 insertions(+), 27 deletions(-) commit 31f02455455d405320e2f749696bef4e02903b35 Author: Dan Williams Date: Tue Jun 30 12:07:17 2015 -0400 sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams commit af834d457d9ed69e14836b63d0da198fdd2ec706 Author: Dan Williams Date: Tue Jun 30 14:10:09 2015 -0400 libnvdimm: smatch cleanups in __nd_ioctl Drop use of access_ok() since we are already using copy_{to|from}_user() which do their own access_ok(). Reported-by: Dan Carpenter Signed-off-by: Dan Williams commit daa1dee405d7d3d3e816b84a692e838a5647a02a Author: Axel Lin Date: Sun Jun 28 17:00:57 2015 +0800 nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails Return proper error if class_create() fails. Signed-off-by: Axel Lin Signed-off-by: Dan Williams commit 193ccca43850d2355e7690a93ab9d7d78d38f905 Author: Dan Williams Date: Tue Jun 30 16:09:39 2015 -0400 nfit: fix smatch "use after null check" report drivers/acpi/nfit.c:1224 acpi_nfit_blk_region_enable() error: we previously assumed 'nfit_mem' could be null
Re: [PATCH 0/7] Add rcu_sync infrastructure to avoid _expedited() in percpu-rwsem
On Sat, Jul 11, 2015 at 4:35 PM, Oleg Nesterov wrote: > > Linus, I am mostly trying to convince you. Nobody else objected so far. > Could you please comment? I don't mind this part of the series. It's the whole "do we really want to put the effort into percpu-rwsem I worry about, as there just aren't that many users. The conversions made that "too damn special" thing go away, but the conversions (particularly the big _real_ user, namely fs/locks.c) seem to have serious performance problems that are quite possibly not fixable. So my objection isn't to your change, my objection is to the whole "right now there are two users, and they both use a global lock, so *of course* they scale like shit, and this is all just papering over that much more fundamental problem". I hate one-off locking. One-off locking with one global lock? Yeah, that just smells. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 0/3] special_mapping_fault() is broken
On 07/10, Andrew Morton wrote: > > On Fri, 10 Jul 2015 18:51:21 +0200 Oleg Nesterov wrote: > > > special_mapping_fault() is absolutely broken. It seems it was always > > wrong, but this didn't matter until vdso/vvar started to use more than > > one page. > > > > The patches are the same, just 1/3 was re-diffed on top of the recent > > 6b7339f4c31ad "mm: avoid setting up anonymous pages into file mapping" > > from Kirill. > > > > And after this change vma_is_anonymous() becomes really trivial, it > > simply checks vm_ops == NULL. However, I do think the helper makes > > sense. There are a lot of ->vm_ops != NULL checks, the helper makes > > the caller's code more understandable (self-documented) and this is > > more grep-friendly. > > I'm trying to work out which kernel version(s) this should go into, > without a lot of success. > > What do we think the worst-case effects of the bug? Ah, I should have mentioned this. And when I re-read my messages I see that "absolutely broken" looks like "should be fixed asap". Sorry for confusion. No, this bug is not serious. Nothing bad can happen from the kernel perspective. And I doubt that some application will ever unmap/remap the part of vdso or any other install_special_mapping() user. So this is just correctness fix. In fact, to me the main problem is that I was totally confused when I tried to read/understand this code ;) Oleg. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 5/7] percpu-rwsem: change it to rely on rss_sync infrastructure
Currently down_write/up_write calls synchronize_sched_expedited() twice which is evil. Change this code to rely on rcu-sync primitives. This avoids the _expedited "big hammer", and this can be faster in the contended case or even in the case when a single thread does down_write/up_write in a loop. Of course, a single down_write() will take more time, but otoh it will be much more friendly to the whole system. To simplify the review this patch doesn't update the comments, fixed by the next change. Signed-off-by: Oleg Nesterov --- include/linux/percpu-rwsem.h |3 ++- kernel/locking/percpu-rwsem.c | 18 +++--- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 3e88c9a..3e58226 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -5,11 +5,12 @@ #include #include #include +#include #include struct percpu_rw_semaphore { + struct rcu_sync_struct rss; unsigned int __percpu *fast_read_ctr; - atomic_twrite_ctr; struct rw_semaphore rw_sem; atomic_tslow_read_ctr; wait_queue_head_t write_waitq; diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 652a8ee..69a7314 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ __init_rwsem(>rw_sem, name, rwsem_key); - atomic_set(>write_ctr, 0); + rcu_sync_init(>rss, RCU_SCHED_SYNC); atomic_set(>slow_read_ctr, 0); init_waitqueue_head(>write_waitq); return 0; @@ -25,6 +25,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, void percpu_free_rwsem(struct percpu_rw_semaphore *brw) { + rcu_sync_dtor(>rss); free_percpu(brw->fast_read_ctr); brw->fast_read_ctr = NULL; /* catch use after free bugs */ } @@ -54,13 +55,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) */ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) { - bool success = false; + bool success; preempt_disable(); - if (likely(!atomic_read(>write_ctr))) { + success = rcu_sync_is_idle(>rss); + if (likely(success)) __this_cpu_add(*brw->fast_read_ctr, val); - success = true; - } preempt_enable(); return success; @@ -126,8 +126,6 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* tell update_fast_ctr() there is a pending writer */ - atomic_inc(>write_ctr); /* * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read *so that update_fast_ctr() can't succeed. @@ -139,7 +137,7 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) *fast-path, it executes a full memory barrier before we return. *See R_W case in the comment above update_fast_ctr(). */ - synchronize_sched_expedited(); + rcu_sync_enter(>rss); /* exclude other writers, and block the new readers completely */ down_write(>rw_sem); @@ -159,7 +157,5 @@ void percpu_up_write(struct percpu_rw_semaphore *brw) * Insert the barrier before the next fast-path in down_read, * see W_R case in the comment above update_fast_ctr(). */ - synchronize_sched_expedited(); - /* the last writer unblocks update_fast_ctr() */ - atomic_dec(>write_ctr); + rcu_sync_exit(>rss); } -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/7] rcusync: Introduce struct rcu_sync_ops
Add the new struct rcu_sync_ops which holds sync/call methods, and turn the function pointers in rcu_sync_struct into an array of struct rcu_sync_ops. This simplifies the "init" helpers, and this way it is simpler to add the new methods we need, especially ifdef'ed. Reviewed-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- include/linux/rcusync.h | 60 ++- kernel/rcu/sync.c | 43 + 2 files changed, 45 insertions(+), 58 deletions(-) diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h index 7858491..988ec33 100644 --- a/include/linux/rcusync.h +++ b/include/linux/rcusync.h @@ -4,6 +4,8 @@ #include #include +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + struct rcu_sync_struct { int gp_state; int gp_count; @@ -12,53 +14,37 @@ struct rcu_sync_struct { int cb_state; struct rcu_head cb_head; - void (*sync)(void); - void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); + enum rcu_sync_type gp_type; }; -#define ___RCU_SYNC_INIT(name) \ - .gp_state = 0, \ - .gp_count = 0, \ - .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ - .cb_state = 0 - -#define __RCU_SCHED_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_sched, \ - .call = call_rcu_sched, \ -} - -#define __RCU_BH_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu_bh, \ - .call = call_rcu_bh,\ -} - -#define __RCU_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu,\ - .call = call_rcu, \ -} - -#define DEFINE_RCU_SCHED_SYNC(name)\ - struct rcu_sync_struct name = __RCU_SCHED_SYNC_INIT(name) - -#define DEFINE_RCU_BH_SYNC(name) \ - struct rcu_sync_struct name = __RCU_BH_SYNC_INIT(name) - -#define DEFINE_RCU_SYNC(name) \ - struct rcu_sync_struct name = __RCU_SYNC_INIT(name) - static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) { return !rss->gp_state; /* GP_IDLE */ } -enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; - extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync_struct *); extern void rcu_sync_exit(struct rcu_sync_struct *); +#define __RCU_SYNC_INITIALIZER(name, type) { \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0, \ + .gp_type = type,\ + } + +#define__DEFINE_RCU_SYNC(name, type) \ + struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) + +#define DEFINE_RCU_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_SYNC) + +#define DEFINE_RCU_SCHED_SYNC(name)\ + __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC) + +#define DEFINE_RCU_BH_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_BH_SYNC) + #endif /* _LINUX_RCUSYNC_H_ */ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index f84176a..99051b7 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -1,7 +1,24 @@ - #include #include +static const struct { + void (*sync)(void); + void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +} gp_ops[] = { + [RCU_SYNC] = { + .sync = synchronize_rcu, + .call = call_rcu, + }, + [RCU_SCHED_SYNC] = { + .sync = synchronize_sched, + .call = call_rcu_sched, + }, + [RCU_BH_SYNC] = { + .sync = synchronize_rcu_bh, + .call = call_rcu_bh, + }, +}; + enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; @@ -11,23 +28,7 @@ void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type) { memset(rss, 0,
[PATCH 7/7] percpu-rwsem: cleanup the lockdep annotations in percpu_down_read()
Stolen from Peter's patch. Change percpu_down_read() to use __down_read(), this way we can do rwsem_acquire_read() unconditionally at the start to make this code more symmetric and clean. Signed-off-by: Oleg Nesterov --- kernel/locking/percpu-rwsem.c | 10 +- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 705aefd..2c54c64 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -62,14 +62,14 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) void percpu_down_read(struct percpu_rw_semaphore *brw) { might_sleep(); - if (likely(update_fast_ctr(brw, +1))) { - rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_); + rwsem_acquire_read(>rw_sem.dep_map, 0, 0, _RET_IP_); + + if (likely(update_fast_ctr(brw, +1))) return; - } - down_read(>rw_sem); + /* Avoid rwsem_acquire_read() and rwsem_release() */ + __down_read(>rw_sem); atomic_inc(>slow_read_ctr); - /* avoid up_read()->rwsem_release() */ __up_read(>rw_sem); } -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 6/7] percpu-rwsem: fix the comments outdated by rcu_sync
Update the comments broken by the previous change. Signed-off-by: Oleg Nesterov --- kernel/locking/percpu-rwsem.c | 50 + 1 files changed, 11 insertions(+), 39 deletions(-) diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 69a7314..705aefd 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -31,27 +31,12 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) } /* - * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the - * fast per-cpu counter. The writer uses synchronize_sched_expedited() to - * serialize with the preempt-disabled section below. - * - * The nontrivial part is that we should guarantee acquire/release semantics - * in case when - * - * R_W: down_write() comes after up_read(), the writer should see all - * changes done by the reader - * or - * W_R: down_read() comes after up_write(), the reader should see all - * changes done by the writer + * This is the fast-path for down_read/up_read. If it succeeds we rely + * on the barriers provided by rcu_sync_enter/exit; see the comments in + * percpu_down_write() and percpu_up_write(). * * If this helper fails the callers rely on the normal rw_semaphore and * atomic_dec_and_test(), so in this case we have the necessary barriers. - * - * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or - * __this_cpu_add() below can be reordered with any LOAD/STORE done by the - * reader inside the critical section. See the comments in down_write and - * up_write below. */ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) { @@ -113,29 +98,15 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) return sum; } -/* - * A writer increments ->write_ctr to force the readers to switch to the - * slow mode, note the atomic_read() check in update_fast_ctr(). - * - * After that the readers can only inc/dec the slow ->slow_read_ctr counter, - * ->fast_read_ctr is stable. Once the writer moves its sum into the slow - * counter it represents the number of active readers. - * - * Finally the writer takes ->rw_sem for writing and blocks the new readers, - * then waits until the slow counter becomes zero. - */ void percpu_down_write(struct percpu_rw_semaphore *brw) { /* -* 1. Ensures that write_ctr != 0 is visible to any down_read/up_read -*so that update_fast_ctr() can't succeed. -* -* 2. Ensures we see the result of every previous this_cpu_add() in -*update_fast_ctr(). +* Make rcu_sync_is_idle() == F and thus disable the fast-path in +* percpu_down_read() and percpu_up_read(), and wait for gp pass. * -* 3. Ensures that if any reader has exited its critical section via -*fast-path, it executes a full memory barrier before we return. -*See R_W case in the comment above update_fast_ctr(). +* The latter synchronises us with the preceeding readers which used +* the fast-past, so we can not miss the result of __this_cpu_add() +* or anything else inside their criticial sections. */ rcu_sync_enter(>rss); @@ -154,8 +125,9 @@ void percpu_up_write(struct percpu_rw_semaphore *brw) /* release the lock, but the readers can't use the fast-path */ up_write(>rw_sem); /* -* Insert the barrier before the next fast-path in down_read, -* see W_R case in the comment above update_fast_ctr(). +* Enable the fast-path in percpu_down_read() and percpu_up_read() +* but only after another gp pass; this adds the necessary barrier +* to ensure the reader can't miss the changes done by us. */ rcu_sync_exit(>rss); } -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/7] rcusync: Introduce rcu_sync_dtor()
Add the new rcu_sync_ops->wait() method and the new helper, rcu_sync_dtor(). It is needed if you are going to, say, kfree(rcu_sync_object). It simply calls ops->wait() to "flush" the potentially pending rcu callback. Reviewed-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- include/linux/rcusync.h |1 + kernel/rcu/sync.c | 22 ++ 2 files changed, 23 insertions(+), 0 deletions(-) diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h index a51e5c7..0135838 100644 --- a/include/linux/rcusync.h +++ b/include/linux/rcusync.h @@ -31,6 +31,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync_struct *); extern void rcu_sync_exit(struct rcu_sync_struct *); +extern void rcu_sync_dtor(struct rcu_sync_struct *); #define __RCU_SYNC_INITIALIZER(name, type) { \ .gp_state = 0, \ diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 32cdbb8..8835ad1 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -10,6 +10,7 @@ static const struct { void (*sync)(void); void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); + void (*wait)(void); #ifdef CONFIG_PROVE_RCU int (*held)(void); #endif @@ -17,16 +18,19 @@ static const struct { [RCU_SYNC] = { .sync = synchronize_rcu, .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { .sync = synchronize_sched, .call = call_rcu_sched, + .wait = rcu_barrier_sched, __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { .sync = synchronize_rcu_bh, .call = call_rcu_bh, + .wait = rcu_barrier_bh, __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -128,3 +132,21 @@ void rcu_sync_exit(struct rcu_sync_struct *rss) } spin_unlock_irq(>rss_lock); } + +void rcu_sync_dtor(struct rcu_sync_struct *rss) +{ + int cb_state; + + BUG_ON(rss->gp_count); + + spin_lock_irq(>rss_lock); + if (rss->cb_state == CB_REPLAY) + rss->cb_state = CB_PENDING; + cb_state = rss->cb_state; + spin_unlock_irq(>rss_lock); + + if (cb_state != CB_IDLE) { + gp_ops[rss->gp_type].wait(); + BUG_ON(rss->cb_state != CB_IDLE); + } +} -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/7] rcusync: Add the CONFIG_PROVE_RCU checks
It would be nice to validate that the caller of rcu_sync_is_idle() holds the corresponding type of RCU read-side lock. Add the new rcu_sync_ops->held() method and change rcu_sync_is_idle() to WARN() if it returns false. This obviously penalizes the readers (fast-path), but only if CONFIG_PROVE_RCU. Reviewed-by: Paul E. McKenney Suggested-by: "Paul E. McKenney" Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- include/linux/rcusync.h |6 ++ kernel/rcu/sync.c | 21 + 2 files changed, 27 insertions(+), 0 deletions(-) diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h index 988ec33..a51e5c7 100644 --- a/include/linux/rcusync.h +++ b/include/linux/rcusync.h @@ -17,9 +17,15 @@ struct rcu_sync_struct { enum rcu_sync_type gp_type; }; +extern bool __rcu_sync_is_idle(struct rcu_sync_struct *); + static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) { +#ifdef CONFIG_PROVE_RCU + return __rcu_sync_is_idle(rss); +#else return !rss->gp_state; /* GP_IDLE */ +#endif } extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type); diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 99051b7..32cdbb8 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -1,21 +1,33 @@ #include #include +#ifdef CONFIG_PROVE_RCU +#define __INIT_HELD(func) .held = func, +#else +#define __INIT_HELD(func) +#endif + static const struct { void (*sync)(void); void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +#ifdef CONFIG_PROVE_RCU + int (*held)(void); +#endif } gp_ops[] = { [RCU_SYNC] = { .sync = synchronize_rcu, .call = call_rcu, + __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { .sync = synchronize_sched, .call = call_rcu_sched, + __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { .sync = synchronize_rcu_bh, .call = call_rcu_bh, + __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -24,6 +36,15 @@ enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; #definerss_lockgp_wait.lock +#ifdef CONFIG_PROVE_RCU +bool __rcu_sync_is_idle(struct rcu_sync_struct *rss) +{ + WARN_ON(!gp_ops[rss->gp_type].held()); + return rss->gp_state == GP_IDLE; +} +EXPORT_SYMBOL_GPL(__rcu_sync_is_idle); +#endif + void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type) { memset(rss, 0, sizeof(*rss)); -- 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/7] rcu: Create rcu_sync infrastructure
It is functionally equivalent to struct rcu_sync_struct { atomic_t counter; }; static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) { return atomic_read(>counter) == 0; } static inline void rcu_sync_enter(struct rcu_sync_struct *rss) { atomic_inc(>counter); synchronize_sched(); } static inline void rcu_sync_exit(struct rcu_sync_struct *rss) { synchronize_sched(); atomic_dec(>counter); } except: it records the state and synchronize_sched() is only called by rcu_sync_enter() and only if necessary. Reviewed-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) --- include/linux/rcusync.h | 64 kernel/rcu/Makefile |2 +- kernel/rcu/sync.c | 108 +++ 3 files changed, 173 insertions(+), 1 deletions(-) create mode 100644 include/linux/rcusync.h create mode 100644 kernel/rcu/sync.c diff --git a/include/linux/rcusync.h b/include/linux/rcusync.h new file mode 100644 index 000..7858491 --- /dev/null +++ b/include/linux/rcusync.h @@ -0,0 +1,64 @@ +#ifndef _LINUX_RCUSYNC_H_ +#define _LINUX_RCUSYNC_H_ + +#include +#include + +struct rcu_sync_struct { + int gp_state; + int gp_count; + wait_queue_head_t gp_wait; + + int cb_state; + struct rcu_head cb_head; + + void (*sync)(void); + void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +}; + +#define ___RCU_SYNC_INIT(name) \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0 + +#define __RCU_SCHED_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_sched, \ + .call = call_rcu_sched, \ +} + +#define __RCU_BH_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu_bh, \ + .call = call_rcu_bh,\ +} + +#define __RCU_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu,\ + .call = call_rcu, \ +} + +#define DEFINE_RCU_SCHED_SYNC(name)\ + struct rcu_sync_struct name = __RCU_SCHED_SYNC_INIT(name) + +#define DEFINE_RCU_BH_SYNC(name) \ + struct rcu_sync_struct name = __RCU_BH_SYNC_INIT(name) + +#define DEFINE_RCU_SYNC(name) \ + struct rcu_sync_struct name = __RCU_SYNC_INIT(name) + +static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) +{ + return !rss->gp_state; /* GP_IDLE */ +} + +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + +extern void rcu_sync_init(struct rcu_sync_struct *, enum rcu_sync_type); +extern void rcu_sync_enter(struct rcu_sync_struct *); +extern void rcu_sync_exit(struct rcu_sync_struct *); + +#endif /* _LINUX_RCUSYNC_H_ */ + diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 50a8084..61a1656 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -1,4 +1,4 @@ -obj-y += update.o +obj-y += update.o sync.o obj-$(CONFIG_SRCU) += srcu.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += tree.o diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c new file mode 100644 index 000..f84176a --- /dev/null +++ b/kernel/rcu/sync.c @@ -0,0 +1,108 @@ + +#include +#include + +enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; +enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; + +#definerss_lockgp_wait.lock + +void rcu_sync_init(struct rcu_sync_struct *rss, enum rcu_sync_type type) +{ + memset(rss, 0, sizeof(*rss)); + init_waitqueue_head(>gp_wait); + + switch (type) { + case RCU_SYNC: + rss->sync = synchronize_rcu; + rss->call = call_rcu; + break; + + case RCU_SCHED_SYNC: + rss->sync = synchronize_sched; + rss->call = call_rcu_sched; + break; + + case RCU_BH_SYNC: + rss->sync = synchronize_rcu_bh; + rss->call = call_rcu_bh; + break; + } +} +
[PATCH 0/7] Add rcu_sync infrastructure to avoid _expedited() in percpu-rwsem
Hello, Let me make another attempt to push rcu_sync and add a _simple_ improvment into percpu-rwsem. It already has another user (cgroups) and I think it can have more. Peter has some use-cases. sb->s_writers (which afaics is buggy btw) can be turned into percpu-rwsem too I think. Linus, I am mostly trying to convince you. Nobody else objected so far. Could you please comment? Peter, if you agree with 5-7, can I add your Signed-off-by's ? To me, the most annoying problem with percpu_rw_semaphore is synchronize_sched_expedited() which is called twice by every down_write/up_write. I think it would be really nice to avoid it. Let's start with the simple test-case, #!/bin/bash perf probe -x /lib/libc.so.6 syscall for i in {1..1000}; do echo 1 >| /sys/kernel/debug/tracing/events/probe_libc/syscall/enable echo 0 >| /sys/kernel/debug/tracing/events/probe_libc/syscall/enable done It needs ~ 13.5 seconds (2 CPUs, KVM). If we simply replace synchronize_sched_expedited() with synchronize_sched() it takes ~ 67.5 seconds. This is not good. With these patches it takes around 13.3 seconds again (a little bit faster), and it doesn't use _expedited. synchronize_sched() is called 1-2 (max 3) times in average. And now it does not disturb the whole system. And just in case, I also measured for (i = 0; i < 100; ++i) { percpu_down_write(_mmap_sem); percpu_up_write(_mmap_sem); } and it runs more than 1.5 times faster (to remind, only 2 CPUs), but this is not that interesting, I agree. And note that the actual change in percpu-rwsem is really simple, and imo it even makes the code simpler. (the last patch is off- topic cleanup). So the only complication is rcu_sync itself. But, rightly or not (I am obviously biased), I believe this new rcu infrastructure is natural and useful, and I think it can have more users too. And. We can do more improvements in rcu_sync and percpu-rwsem, and I don't only mean other optimizations from Peter. In particular, we can extract the "wait for gp pass" from rcu_sync_enter() into another helper, we can teach percpu_down_write() to allow multiple writers, and more. Oleg. include/linux/percpu-rwsem.h |3 +- include/linux/rcusync.h | 57 +++ kernel/locking/percpu-rwsem.c | 78 ++--- kernel/rcu/Makefile |2 +- kernel/rcu/sync.c | 152 + 5 files changed, 235 insertions(+), 57 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ARM: pxa: fix dm9000 platform data regression
Since dm9000 driver added support for a vcc regulator, platform data based platforms have their ethernet broken, as the regulator claiming returns -EPROBE_DEFER and prevents dm9000 loading. This patch fixes this for all pxa boards using dm9000, by using the specific regulator_has_full_constraints() function. This was discovered and tested on the cm-x300 board. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/capc7117.c | 3 +++ arch/arm/mach-pxa/cm-x2xx.c| 3 +++ arch/arm/mach-pxa/cm-x300.c| 2 ++ arch/arm/mach-pxa/colibri-pxa270.c | 3 +++ arch/arm/mach-pxa/em-x270.c| 2 ++ arch/arm/mach-pxa/icontrol.c | 3 +++ arch/arm/mach-pxa/trizeps4.c | 3 +++ arch/arm/mach-pxa/vpac270.c| 3 +++ arch/arm/mach-pxa/zeus.c | 2 ++ 9 files changed, 24 insertions(+) diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c index c092730749b9..bf366b39fa61 100644 --- a/arch/arm/mach-pxa/capc7117.c +++ b/arch/arm/mach-pxa/capc7117.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -144,6 +145,8 @@ static void __init capc7117_init(void) capc7117_uarts_init(); capc7117_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(CAPC7117, diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c index bb99f59a36d8..a17a91eb8e9a 100644 --- a/arch/arm/mach-pxa/cm-x2xx.c +++ b/arch/arm/mach-pxa/cm-x2xx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -466,6 +467,8 @@ static void __init cmx2xx_init(void) cmx2xx_init_ac97(); cmx2xx_init_touchscreen(); cmx2xx_init_leds(); + + regulator_has_full_constraints(); } static void __init cmx2xx_init_irq(void) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 4d3588d26c2a..5851f4c254c1 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -835,6 +835,8 @@ static void __init cm_x300_init(void) cm_x300_init_ac97(); cm_x300_init_wi2wi(); cm_x300_init_bl(); + + regulator_has_full_constraints(); } static void __init cm_x300_fixup(struct tag *tags, char **cmdline) diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index 5f9d9303b346..3503826333c7 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void) printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n", colibri_pxa270_baseboard); } + + regulator_has_full_constraints(); } /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 51531ecffca8..9d7072b04045 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -1306,6 +1306,8 @@ static void __init em_x270_init(void) em_x270_init_i2c(); em_x270_init_camera(); em_x270_userspace_consumers_init(); + + regulator_has_full_constraints(); } MACHINE_START(EM_X270, "Compulab EM-X270") diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index c98511c5abd1..9b0eb0252af6 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "generic.h" @@ -185,6 +186,8 @@ static void __init icontrol_init(void) mxm_8x10_mmc_init(); icontrol_can_init(); + + regulator_has_full_constraints(); } MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM") diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 872dcb20e757..066e3a250ee0 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -534,6 +535,8 @@ static void __init trizeps4_init(void) BCR_writew(trizeps_conxs_bcr); board_backlight_power(1); + + regulator_has_full_constraints(); } static void __init trizeps4_map_io(void) diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index aa89488f961e..54122a983ae3 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -711,6 +712,8 @@ static void __init vpac270_init(void) vpac270_ts_init(); vpac270_rtc_init(); vpac270_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(VPAC270, "Voipac PXA270") diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index ac2ae5c71ab4..6158566fa0f7 100644 ---
Loan Offer At 3% Interest Rate
Good Day Sir/Madam. I am Mr John Matthew, A private Money lender. I am Writing you to introduce a small and large business money lending service to you. I can service your financial need with less payback problem that is why we fund you for just 3%. Need business or a personal loan Fill the Short application below.Contact us today for that loan you need with this email address :johnmatthewloanlen...@gmail.com NAME. COUNTRY. STATE. ADDRESS... PHONE NUMBER... AMOUNT NEEDED AS LOAN.. LOAN DURATION.. MONTHLY INCOME. Your's Faithfully: Mr John Matthew: -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Lånetilbud Ved 3% rente
Good Day Sir / Fru. Jeg er John Matthew, En privat Money långiver. Jeg skriver dig til at introducere en lille og store virksomheder penge udlån service til dig. Jeg kan servicere dine finansielle behov med mindre tilbagebetalingstid problem, der er derfor, vi finansierer dig for kun 3%. Brug forretning eller et personligt lån Fyld Short ansøgning below.Contact os i dag for dette lån, du har brug for med denne e-mail-adresse: johnmatthewloanlen...@gmail.com NAME . LAND . STATE . ADRESSE ... TELEFONNUMMER ... Nødvendige beløb AS LÅN .. LÅN VARIGHED .. Månedlige indkomst . Dine er trofast: John Matthew: -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 3/3] arm64, mm: Use IPIs for TLB invalidation.
Hello. On 07/11/2015 11:25 PM, David Daney wrote: From: David Daney Most broadcast TLB invalidations are unnecessary. So when invalidating for a given mm/vma target the only the needed CPUs via The only the needed? and IPI. For global TLB invalidations, also use IPI. Tested on Cavium ThunderX. This change reduces 'time make -j48' on kernel from 139s to 116s (83% as long). The patch is needed because of a ThunderX Pass1 erratum: Exclusive store operations unreliable in the presence of broadcast TLB invalidations. The performance improvements shown make it compelling even without the erratum workaround need. Signed-off-by: David Daney WBR, Sergei -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] irqchip: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc
Commit-ID: 5b29264c659c31bada65582005d99adb3bb41fea Gitweb: http://git.kernel.org/tip/5b29264c659c31bada65582005d99adb3bb41fea Author: Jiang Liu AuthorDate: Thu, 4 Jun 2015 12:13:20 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:27 +0200 irqchip: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc while we already have a pointer to corresponding irq_desc. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: linux-arm-ker...@lists.infradead.org Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: Maxime Ripard Link: http://lkml.kernel.org/r/1433391238-19471-11-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/exynos-combiner.c | 4 ++-- drivers/irqchip/irq-armada-370-xp.c | 2 +- drivers/irqchip/irq-gic.c | 4 ++-- drivers/irqchip/irq-orion.c | 2 +- drivers/irqchip/irq-sunxi-nmi.c | 2 +- drivers/irqchip/spear-shirq.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index 6ad04ac..1a4a1b0 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -67,8 +67,8 @@ static void combiner_unmask_irq(struct irq_data *data) static void combiner_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) { - struct combiner_chip_data *chip_data = irq_get_handler_data(irq); - struct irq_chip *chip = irq_get_chip(irq); + struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq, combiner_irq; unsigned long status; diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 73b73ac..39b72da 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -450,7 +450,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *r, bool b) {} static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) { - struct irq_chip *chip = irq_get_chip(irq); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned long irqmap, irqn, irqsrc, cpuid; unsigned int cascade_irq; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 2eaae9c..cadd862 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -288,8 +288,8 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) { - struct gic_chip_data *chip_data = irq_get_handler_data(irq); - struct irq_chip *chip = irq_get_chip(irq); + struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq, gic_irq; unsigned long status; diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c index 7fbae56..5ea999a 100644 --- a/drivers/irqchip/irq-orion.c +++ b/drivers/irqchip/irq-orion.c @@ -108,7 +108,7 @@ IRQCHIP_DECLARE(orion_intc, "marvell,orion-intc", orion_irq_init); static void orion_bridge_irq_handler(unsigned int irq, struct irq_desc *desc) { - struct irq_domain *d = irq_get_handler_data(irq); + struct irq_domain *d = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, 0); u32 stat = readl_relaxed(gc->reg_base + ORION_BRIDGE_IRQ_CAUSE) & diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 9186a11..772a82c 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -61,7 +61,7 @@ static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off) static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_get_chip(irq); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int virq = irq_find_mapping(domain, 0); chained_irq_enter(chip, desc); diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index 3df144f..6171855 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -184,7 +184,7 @@ static struct spear_shirq *spear320_shirq_blocks[] = { static void shirq_handler(unsigned irq, struct irq_desc *desc) { - struct spear_shirq *shirq = irq_get_handler_data(irq); + struct spear_shirq *shirq = irq_desc_get_handler_data(desc); u32 pend; pend = readl(shirq->base + shirq->status_reg) & shirq->mask; -- To
[tip:irq/core] genirq: Remove the irq argument from setup_affinity()
Commit-ID: a8a98eac7b238beb49b479c164303651d5a37eb6 Gitweb: http://git.kernel.org/tip/a8a98eac7b238beb49b479c164303651d5a37eb6 Author: Jiang Liu AuthorDate: Thu, 4 Jun 2015 12:13:30 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:25 +0200 genirq: Remove the irq argument from setup_affinity() Unused except for the alpha wrapper, which can retrieve if from the irq descriptor. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1433391238-19471-21-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f5b7742..886f115 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -361,8 +361,7 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); /* * Generic version of the affinity autoselector. */ -static int -setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) +static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) { struct cpumask *set = irq_default_affinity; int node = irq_desc_get_node(desc); @@ -395,10 +394,10 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) return 0; } #else -static inline int -setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask) +/* Wrapper for ALPHA specific affinity selector magic */ +static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask) { - return irq_select_affinity(irq); + return irq_select_affinity(irq_desc_get_irq(d)); } #endif @@ -412,14 +411,14 @@ int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask) int ret; raw_spin_lock_irqsave(>lock, flags); - ret = setup_affinity(irq, desc, mask); + ret = setup_affinity(desc, mask); raw_spin_unlock_irqrestore(>lock, flags); return ret; } #else static inline int -setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) +setup_affinity(struct irq_desc *desc, struct cpumask *mask) { return 0; } @@ -1256,7 +1255,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } /* Set default affinity mask once everything is setup */ - setup_affinity(irq, desc, mask); + setup_affinity(desc, mask); } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] irqchip/mips-gic: Use access helper irq_data_get_affinity_mask()
Commit-ID: 72f86db4dd5eafbadd45c9092df73c49f320f638 Gitweb: http://git.kernel.org/tip/72f86db4dd5eafbadd45c9092df73c49f320f638 Author: Jiang Liu AuthorDate: Mon, 1 Jun 2015 16:05:38 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:27 +0200 irqchip/mips-gic: Use access helper irq_data_get_affinity_mask() Use access helper irq_data_get_affinity_mask() to hide implementation details of struct irq_desc. [ tglx: Verified with coccinelle ] Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Link: http://lkml.kernel.org/r/1433145945-789-30-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 42dbebc..e6c2df9 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -405,7 +405,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, clear_bit(irq, pcpu_masks[i].pcpu_mask); set_bit(irq, pcpu_masks[cpumask_first()].pcpu_mask); - cpumask_copy(d->affinity, cpumask); + cpumask_copy(irq_data_get_affinity_mask(d), cpumask); spin_unlock_irqrestore(_lock, flags); return IRQ_SET_MASK_OK_NOCOPY; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] genirq: Remove the irq argument from check_irq_resend()
Commit-ID: 0798abeb7eec37dcc20f252c2195fc31c41561f9 Gitweb: http://git.kernel.org/tip/0798abeb7eec37dcc20f252c2195fc31c41561f9 Author: Jiang Liu AuthorDate: Thu, 4 Jun 2015 12:13:27 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:24 +0200 genirq: Remove the irq argument from check_irq_resend() It's only used in the software resend case and can be retrieved from irq_desc if necessary. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1433391238-19471-18-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 2 +- kernel/irq/internals.h | 2 +- kernel/irq/manage.c| 2 +- kernel/irq/resend.c| 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f3c3d55..0cfbd15 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -187,7 +187,7 @@ int irq_startup(struct irq_desc *desc, bool resend) irq_enable(desc); } if (resend) - check_irq_resend(desc, desc->irq_data.irq); + check_irq_resend(desc); return ret; } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 3e03824..7054947e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -90,7 +90,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *act irqreturn_t handle_irq_event(struct irq_desc *desc); /* Resending of interrupts :*/ -void check_irq_resend(struct irq_desc *desc, unsigned int irq); +void check_irq_resend(struct irq_desc *desc); bool irq_wait_for_poll(struct irq_desc *desc); void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f974485..c2e835d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -516,7 +516,7 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq) /* Prevent probing on this irq: */ irq_settings_set_noprobe(desc); irq_enable(desc); - check_irq_resend(desc, irq); + check_irq_resend(desc); /* fall-through */ } default: diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 9065107..32fc47c 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -53,7 +53,7 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0); * * Is called with interrupts disabled and desc->lock held. */ -void check_irq_resend(struct irq_desc *desc, unsigned int irq) +void check_irq_resend(struct irq_desc *desc) { /* * We do not resend level type interrupts. Level type @@ -74,6 +74,8 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) if (!desc->irq_data.chip->irq_retrigger || !desc->irq_data.chip->irq_retrigger(>irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND + unsigned int irq = irq_desc_get_irq(desc); + /* * If the interrupt has a parent irq and runs * in the thread context of the parent irq, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] genirq: Remove the irq argument from note_interrupt()
Commit-ID: 0dcdbc97557fd8c297c4e38e9f66e304a64bae9d Gitweb: http://git.kernel.org/tip/0dcdbc97557fd8c297c4e38e9f66e304a64bae9d Author: Jiang Liu AuthorDate: Thu, 4 Jun 2015 12:13:28 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:25 +0200 genirq: Remove the irq argument from note_interrupt() Only required for the slow path. Retrieve it from irq descriptor if necessary. [ tglx: Split out from combo patch. Left [try_]misrouted_irq() untouched as there is no win in the slow path ] Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433391238-19471-19-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 3 +-- kernel/irq/chip.c | 2 +- kernel/irq/handle.c | 2 +- kernel/irq/spurious.c | 6 -- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 92188b0..429ac26 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -487,8 +487,7 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, #endif /* Handling of unhandled and spurious interrupts: */ -extern void note_interrupt(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret); +extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); /* Enable/disable irq debugging output: */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 310d658..76f199d 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -328,7 +328,7 @@ void handle_nested_irq(unsigned int irq) action_ret = action->thread_fn(action->irq, action->dev_id); if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + note_interrupt(desc, action_ret); raw_spin_lock_irq(>lock); irqd_clear(>irq_data, IRQD_IRQ_INPROGRESS); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 4d37b96..b6eeea8 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -176,7 +176,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) add_interrupt_randomness(irq, flags); if (!noirqdebug) - note_interrupt(irq, desc, retval); + note_interrupt(desc, retval); return retval; } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 5378c52..3214417 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -270,9 +270,10 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc, #define SPURIOUS_DEFERRED 0x8000 -void note_interrupt(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret) +void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) { + unsigned int irq; + if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc)) return; @@ -396,6 +397,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, desc->last_unhandled = jiffies; } + irq = irq_desc_get_irq(desc); if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { int ok = misrouted_irq(irq); if (action_ret == IRQ_NONE) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] genirq: Remove the parameter 'irq' of kstat_incr_irqs_this_cpu()
Commit-ID: b51bf95c583bba645974348666e9b5a14c7aa3ea Gitweb: http://git.kernel.org/tip/b51bf95c583bba645974348666e9b5a14c7aa3ea Author: Jiang Liu AuthorDate: Thu, 4 Jun 2015 12:13:25 +0800 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:24 +0200 genirq: Remove the parameter 'irq' of kstat_incr_irqs_this_cpu() The first parameter 'irq' is never used by kstat_incr_irqs_this_cpu(). Remove it. Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1433391238-19471-16-git-send-email-jiang@linux.intel.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 16 kernel/irq/handle.c| 2 +- kernel/irq/internals.h | 2 +- kernel/irq/irqdesc.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 27f4332..f3c3d55 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -315,7 +315,7 @@ void handle_nested_irq(unsigned int irq) raw_spin_lock_irq(>lock); desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); action = desc->action; if (unlikely(!action || irqd_irq_disabled(>irq_data))) { @@ -391,7 +391,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); if (unlikely(!desc->action || irqd_irq_disabled(>irq_data))) { desc->istate |= IRQS_PENDING; @@ -443,7 +443,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); /* * If its disabled or no action available @@ -515,7 +515,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) goto out; desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); /* * If its disabled or no action available @@ -583,7 +583,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) goto out_unlock; } - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); /* Start handling the irq */ desc->irq_data.chip->irq_ack(>irq_data); @@ -646,7 +646,7 @@ void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) goto out_eoi; } - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); do { if (unlikely(!desc->action)) @@ -675,7 +675,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); if (chip->irq_ack) chip->irq_ack(>irq_data); @@ -705,7 +705,7 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) void *dev_id = raw_cpu_ptr(action->percpu_dev_id); irqreturn_t res; - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); if (chip->irq_ack) chip->irq_ack(>irq_data); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6354802..4d37b96 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -30,7 +30,7 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) { print_irq_desc(irq, desc); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irqs_this_cpu(desc); ack_bad_irq(irq); } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee8..3e03824 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -191,7 +191,7 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask) return __irqd_to_state(d) & mask; } -static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) +static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) { __this_cpu_inc(*desc->kstat_irqs); __this_cpu_inc(kstat.irqs_sum); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4afc457..0a2a4b6 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -582,7 +582,7 @@ int irq_set_percpu_devid(unsigned int irq) void kstat_incr_irq_this_cpu(unsigned int irq) { - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irqs_this_cpu(irq_to_desc(irq)); } /** -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo
[tip:irq/core] irqchip/sirfsoc: Fix generic chip allocation wreckage
Commit-ID: d452bca82d9ff4f220afa4234418912623db4fe6 Gitweb: http://git.kernel.org/tip/d452bca82d9ff4f220afa4234418912623db4fe6 Author: Thomas Gleixner AuthorDate: Mon, 6 Jul 2015 10:18:29 + Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:23 +0200 irqchip/sirfsoc: Fix generic chip allocation wreckage irq_alloc_domain_generic_chips() can only be called once for an irqdomain. The sirfsoc init calls it twice and because the return value is not checked it does not notice the wreckage. The code works by chance because the first call already allocates two chips and therefor the second call to sirfsoc_alloc_gc() operates on the proper generic chip instance. Use a single call and setup the two chips in the obvious correct way. Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Barry Song Cc: linux-arm-ker...@lists.infradead.org Cc: Olof Johansson Link: http://lkml.kernel.org/r/20150706101543.470696...@linutronix.de --- drivers/irqchip/irq-sirfsoc.c | 48 ++- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c index a469355..b930069 100644 --- a/drivers/irqchip/irq-sirfsoc.c +++ b/drivers/irqchip/irq-sirfsoc.c @@ -17,34 +17,38 @@ #include #include "irqchip.h" -#define SIRFSOC_INT_RISC_MASK0 0x0018 -#define SIRFSOC_INT_RISC_MASK1 0x001C -#define SIRFSOC_INT_RISC_LEVEL0 0x0020 -#define SIRFSOC_INT_RISC_LEVEL1 0x0024 +#define SIRFSOC_INT_RISC_MASK0 0x0018 +#define SIRFSOC_INT_RISC_MASK1 0x001C +#define SIRFSOC_INT_RISC_LEVEL00x0020 +#define SIRFSOC_INT_RISC_LEVEL10x0024 #define SIRFSOC_INIT_IRQ_ID0x0038 +#define SIRFSOC_INT_BASE_OFFSET0x0004 #define SIRFSOC_NUM_IRQS 64 +#define SIRFSOC_NUM_BANKS (SIRFSOC_NUM_IRQS / 32) static struct irq_domain *sirfsoc_irqdomain; -static __init void -sirfsoc_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) +static __init void sirfsoc_alloc_gc(void __iomem *base) { - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - int ret; unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; unsigned int set = IRQ_LEVEL; - - ret = irq_alloc_domain_generic_chips(sirfsoc_irqdomain, num, 1, "irq_sirfsoc", - handle_level_irq, clr, set, IRQ_GC_INIT_MASK_CACHE); - - gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, irq_start); - gc->reg_base = base; - ct = gc->chip_types; - ct->chip.irq_mask = irq_gc_mask_clr_bit; - ct->chip.irq_unmask = irq_gc_mask_set_bit; - ct->regs.mask = SIRFSOC_INT_RISC_MASK0; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; + int i; + + irq_alloc_domain_generic_chips(sirfsoc_irqdomain, 32, 1, "irq_sirfsoc", + handle_level_irq, clr, set, + IRQ_GC_INIT_MASK_CACHE); + + for (i = 0; i < SIRFSOC_NUM_BANKS; i++) { + gc = irq_get_domain_generic_chip(sirfsoc_irqdomain, i * 32); + gc->reg_base = base + i * SIRFSOC_INT_BASE_OFFSET; + ct = gc->chip_types; + ct->chip.irq_mask = irq_gc_mask_clr_bit; + ct->chip.irq_unmask = irq_gc_mask_set_bit; + ct->regs.mask = SIRFSOC_INT_RISC_MASK0; + } } static void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs) @@ -64,10 +68,8 @@ static int __init sirfsoc_irq_init(struct device_node *np, panic("unable to map intc cpu registers\n"); sirfsoc_irqdomain = irq_domain_add_linear(np, SIRFSOC_NUM_IRQS, - _generic_chip_ops, base); - - sirfsoc_alloc_gc(base, 0, 32); - sirfsoc_alloc_gc(base + 4, 32, SIRFSOC_NUM_IRQS - 32); + _generic_chip_ops, base); + sirfsoc_alloc_gc(base); writel_relaxed(0, base + SIRFSOC_INT_RISC_LEVEL0); writel_relaxed(0, base + SIRFSOC_INT_RISC_LEVEL1); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:irq/core] irqchip: Prepare for local stub header removal
Commit-ID: 41a83e06e2bb9ac46731681fd44d1e6ab184dac5 Gitweb: http://git.kernel.org/tip/41a83e06e2bb9ac46731681fd44d1e6ab184dac5 Author: Joel Porquet AuthorDate: Tue, 7 Jul 2015 17:11:46 -0400 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:23 +0200 irqchip: Prepare for local stub header removal The IRQCHIP_DECLARE macro moved to to 'include/linux/irqchip.h', so the local irqchip.h became an empty shell, which solely includes include/linux/irqchip.h Include the global header in all irqchip drivers instead of the local header, so we can remove it. Signed-off-by: Joel Porquet Cc: vgu...@synopsys.com Cc: mon...@monstr.eu Cc: r...@linux-mips.org Cc: ja...@lakedaemon.net Link: http://lkml.kernel.org/r/1882096.X39jVG8e0D@joel-zenbook Signed-off-by: Thomas Gleixner --- drivers/irqchip/exynos-combiner.c| 3 +-- drivers/irqchip/irq-armada-370-xp.c | 3 +-- drivers/irqchip/irq-atmel-aic.c | 2 +- drivers/irqchip/irq-atmel-aic5.c | 2 +- drivers/irqchip/irq-bcm2835.c| 3 +-- drivers/irqchip/irq-bcm7038-l1.c | 3 +-- drivers/irqchip/irq-bcm7120-l2.c | 3 +-- drivers/irqchip/irq-brcmstb-l2.c | 2 -- drivers/irqchip/irq-clps711x.c | 3 +-- drivers/irqchip/irq-crossbar.c | 3 +-- drivers/irqchip/irq-digicolor.c | 3 +-- drivers/irqchip/irq-dw-apb-ictl.c| 3 +-- drivers/irqchip/irq-gic-v3-its.c | 3 +-- drivers/irqchip/irq-gic-v3.c | 2 +- drivers/irqchip/irq-gic.c| 2 +- drivers/irqchip/irq-hip04.c | 2 +- drivers/irqchip/irq-ingenic.c| 3 +-- drivers/irqchip/irq-keystone.c | 3 +-- drivers/irqchip/irq-mips-cpu.c | 3 +-- drivers/irqchip/irq-mips-gic.c | 3 +-- drivers/irqchip/irq-mmp.c| 3 +-- drivers/irqchip/irq-moxart.c | 3 +-- drivers/irqchip/irq-mtk-sysirq.c | 3 +-- drivers/irqchip/irq-mxs.c| 3 +-- drivers/irqchip/irq-nvic.c | 3 +-- drivers/irqchip/irq-omap-intc.c | 3 +-- drivers/irqchip/irq-or1k-pic.c | 3 +-- drivers/irqchip/irq-orion.c | 3 +-- drivers/irqchip/irq-renesas-h8300h.c | 2 -- drivers/irqchip/irq-renesas-h8s.c| 2 +- drivers/irqchip/irq-s3c24xx.c| 3 +-- drivers/irqchip/irq-sirfsoc.c| 2 +- drivers/irqchip/irq-sun4i.c | 3 +-- drivers/irqchip/irq-sunxi-nmi.c | 2 +- drivers/irqchip/irq-tb10x.c | 2 +- drivers/irqchip/irq-tegra.c | 3 +-- drivers/irqchip/irq-versatile-fpga.c | 3 +-- drivers/irqchip/irq-vf610-mscm-ir.c | 3 +-- drivers/irqchip/irq-vic.c| 3 +-- drivers/irqchip/irq-vt8500.c | 3 +-- drivers/irqchip/irq-xtensa-mx.c | 3 +-- drivers/irqchip/irq-xtensa-pic.c | 3 +-- drivers/irqchip/irq-zevio.c | 3 +-- drivers/irqchip/spear-shirq.c| 3 +-- 44 files changed, 42 insertions(+), 79 deletions(-) diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index 5c82e3b..05cdccc 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -15,13 +15,12 @@ #include #include #include +#include #include #include #include #include -#include "irqchip.h" - #define COMBINER_ENABLE_SET0x0 #define COMBINER_ENABLE_CLEAR 0x4 #define COMBINER_INT_STATUS0xC diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 0d3b0fe..73b73ac 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -33,8 +34,6 @@ #include #include -#include "irqchip.h" - /* Interrupt Controller Registers Map */ #define ARMADA_370_XP_INT_SET_MASK_OFFS(0x48) #define ARMADA_370_XP_INT_CLEAR_MASK_OFFS (0x4C) diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index dae3604..dbbf30a 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,6 @@ #include #include "irq-atmel-aic-common.h" -#include "irqchip.h" /* Number of irq lines managed by AIC */ #define NR_AIC_IRQS32 diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 459bf44..ff2e832 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,6 @@ #include #include "irq-atmel-aic-common.h" -#include "irqchip.h" /* Number of irq lines managed by AIC */ #define NR_AIC5_IRQS 128 diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index e68c3b6..a36ba96 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -48,13 +48,12 @@ #include #include #include +#include #include #include #include
[tip:irq/core] irqchip/dw-apb-ictl: Fix generic domain chip wreckage
Commit-ID: b66231183a8542de1414e42326dd1c6bc4af75f4 Gitweb: http://git.kernel.org/tip/b66231183a8542de1414e42326dd1c6bc4af75f4 Author: Thomas Gleixner AuthorDate: Mon, 6 Jul 2015 15:32:25 +0200 Committer: Thomas Gleixner CommitDate: Sat, 11 Jul 2015 23:14:23 +0200 irqchip/dw-apb-ictl: Fix generic domain chip wreckage The num_ct argument of irq_alloc_domain_generic_chips() tells the core code how many chip types (for different control flows, e.g. edge/level) should be allocated. It does not control how many generic chip instances are created because that's determined from the irq domain size and the number of interrupts per chip. The dw-apb init abuses the num_ct argument for allocating one or two chip types depending on the number of interrupts. That's completely wrong because the alternate type is never used. This code was obviously never tested on a system which has more than 32 interrupts as that would have never worked due to the unitialized second generic chip instance. Hand in the proper num_ct=1 and fixup the chip initialization along with the interrupt handler. Signed-off-by: Thomas Gleixner Tested-by: Jisheng Zhang Cc: Sebastian Hesselbarth Cc: Mark Rutland Cc: Jason Cooper Link: http://lkml.kernel.org/r/20150706101543.373582...@linutronix.de --- drivers/irqchip/irq-dw-apb-ictl.c | 53 --- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c index 53bb732..ca22f4e 100644 --- a/drivers/irqchip/irq-dw-apb-ictl.c +++ b/drivers/irqchip/irq-dw-apb-ictl.c @@ -25,24 +25,25 @@ #define APB_INT_MASK_H 0x0c #define APB_INT_FINALSTATUS_L 0x30 #define APB_INT_FINALSTATUS_H 0x34 +#define APB_INT_BASE_OFFSET0x04 static void dw_apb_ictl_handler(unsigned int irq, struct irq_desc *desc) { - struct irq_chip *chip = irq_get_chip(irq); - struct irq_chip_generic *gc = irq_get_handler_data(irq); - struct irq_domain *d = gc->private; - u32 stat; + struct irq_domain *d = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); int n; chained_irq_enter(chip, desc); - for (n = 0; n < gc->num_ct; n++) { - stat = readl_relaxed(gc->reg_base + -APB_INT_FINALSTATUS_L + 4 * n); + for (n = 0; n < d->revmap_size; n += 32) { + struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, n); + u32 stat = readl_relaxed(gc->reg_base + APB_INT_FINALSTATUS_L); + while (stat) { u32 hwirq = ffs(stat) - 1; - generic_handle_irq(irq_find_mapping(d, - gc->irq_base + hwirq + 32 * n)); + u32 virq = irq_find_mapping(d, gc->irq_base + hwirq); + + generic_handle_irq(virq); stat &= ~(1 << hwirq); } } @@ -73,7 +74,7 @@ static int __init dw_apb_ictl_init(struct device_node *np, struct irq_domain *domain; struct irq_chip_generic *gc; void __iomem *iobase; - int ret, nrirqs, irq; + int ret, nrirqs, irq, i; u32 reg; /* Map the parent interrupt for the chained handler */ @@ -128,35 +129,25 @@ static int __init dw_apb_ictl_init(struct device_node *np, goto err_unmap; } - ret = irq_alloc_domain_generic_chips(domain, 32, (nrirqs > 32) ? 2 : 1, -np->name, handle_level_irq, clr, 0, -IRQ_GC_MASK_CACHE_PER_TYPE | + ret = irq_alloc_domain_generic_chips(domain, 32, 1, np->name, +handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); if (ret) { pr_err("%s: unable to alloc irq domain gc\n", np->full_name); goto err_unmap; } - gc = irq_get_domain_generic_chip(domain, 0); - gc->private = domain; - gc->reg_base = iobase; - - gc->chip_types[0].regs.mask = APB_INT_MASK_L; - gc->chip_types[0].regs.enable = APB_INT_ENABLE_L; - gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; - gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; - gc->chip_types[0].chip.irq_resume = dw_apb_ictl_resume; - - if (nrirqs > 32) { - gc->chip_types[1].regs.mask = APB_INT_MASK_H; - gc->chip_types[1].regs.enable = APB_INT_ENABLE_H; - gc->chip_types[1].chip.irq_mask = irq_gc_mask_set_bit; - gc->chip_types[1].chip.irq_unmask = irq_gc_mask_clr_bit; - gc->chip_types[1].chip.irq_resume = dw_apb_ictl_resume; + for (i = 0; i < DIV_ROUND_UP(nrirqs, 32); i++) { + gc = irq_get_domain_generic_chip(domain, i * 32); +
Re: V4.1-RC build error after commit 77a3c6f
The subject should say V4.2-RC. Sorry. Larry On 07/11/2015 04:27 PM, Larry Finger wrote: Beginning with the commit in the subject, I get the following build error: CC [M] drivers/media/v4l2-core/videobuf2-core.o drivers/media/v4l2-core/videobuf2-core.c: In function ‘vb2_warn_zero_bytesused’: drivers/media/v4l2-core/videobuf2-core.c:1253:2: error: implicit declaration of function ‘__WARN’ [-Werror=implicit-function-declaration] __WARN(); ^ cc1: some warnings being treated as errors My .config is attached. This problem has been bisected to the following commit: commit 77a3c6fd90c94f635edb00d4a65f485687538791 Author: Laurent Pinchart Date: Fri Jun 19 08:50:07 2015 -0300 [media] vb2: Don't WARN when v4l2_buffer.bytesused is 0 for multiplanar buffers Commit f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the vb2_queue struct") added a WARN_ONCE to catch usage of a deprecated API using a zero value for v4l2_buffer.bytesused. However, the condition is checked incorrectly, as the v4L2_buffer bytesused field is supposed to be ignored for multiplanar buffers. This results in spurious warnings when using the multiplanar API. Fix it by checking v4l2_buffer.bytesused for uniplanar buffers and v4l2_plane.bytesused for multiplanar buffers. Fixes: f61bf13b6a07 ("[media] vb2: add allow_zero_bytesused flag to the vb2_queue struct") Signed-off-by: Laurent Pinchart Cc: sta...@vger.kernel.org # for v4.0 Signed-off-by: Mauro Carvalho Chehab Thanks, Larry -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] x86,kvm: Add a kernel parameter to disable PV spinlock
Xen has an kernel command line argument "xen_nopvspin" to disable paravirtual spinlocks. This patch adds a similar "kvm_nopvspin" argument to disable paravirtual spinlocks for KVM. This can be useful for testing as well as allowing administrators to choose unfair lock for their KVM guests if they want to. Signed-off-by: Waiman Long --- Documentation/kernel-parameters.txt |7 ++- arch/x86/kernel/kvm.c | 15 +-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1d6f045..032d37d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1747,6 +1747,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. feature (tagged TLBs) on capable Intel chips. Default is 1 (enabled) + kvm_nopvspin[X86,KVM] + Disables the paravirtualized spinlock slowpath + optimizations for KVM. + + l2cr= [PPC] l3cr= [PPC] @@ -4091,7 +4096,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. never -- do not unplug even if version check succeeds xen_nopvspin[X86,XEN] - Disables the ticketlock slowpath using Xen PV + Disables the spinlock slowpath using Xen PV optimizations. xen_nopv[X86] diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 47190bd..6373dca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -584,6 +584,17 @@ static void kvm_kick_cpu(int cpu) kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid); } +static bool kvm_pvspin = true; + +/* + * Allow disabling of PV spinlock in kernel command line + */ +static __init int kvm_parse_nopvspin(char *arg) +{ + kvm_pvspin = false; + return 0; +} +early_param("kvm_nopvspin", kvm_parse_nopvspin); #ifdef CONFIG_QUEUED_SPINLOCKS @@ -857,7 +868,7 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket) */ void __init kvm_spinlock_init(void) { - if (!kvm_para_available()) + if (!kvm_para_available() || !kvm_pvspin) return; /* Does host kernel support KVM_FEATURE_PV_UNHALT? */ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) @@ -877,7 +888,7 @@ void __init kvm_spinlock_init(void) static __init int kvm_spinlock_init_jump(void) { - if (!kvm_para_available()) + if (!kvm_para_available() || !kvm_pvspin) return 0; if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return 0; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/7] locking/qspinlock: Enhance pvqspinlock & introduce queued unfair lock
This patchset consists of two parts: 1) Patches 1-5 enhance the performance of PV qspinlock especially for overcommitted guest. The first patch moves all the CPU kicking to the unlock code. The 2nd and 3rd patches implement a kick-ahead and wait-early mechanism that was shown to improve performance for overcommitted guest. They are inspired by the "Do Virtual Machines Really Scale?" blog from Sanidhya Kashyap. The 4th patch adds code to collect PV qspinlock statistics. The last patch adds the pending bit support to PV qspinlock to improve performance at light load. This is important as the PV queuing code has even higher overhead than the native queuing code. 2) Patches 6 introduces queued unfair lock as a replacement of the existing unfair byte lock. The queued unfair lock is fairer than the byte lock currently in the qspinlock while improving performance at high contention level. Patch 7 adds a kernel command line option to KVM for disabling PV spinlock, similar to the one in Xen, if the administrators choose to do so. The last patch adds statistics collection to the queued unfair lock code. Linux kernel builds were run in KVM guest on an 8-socket, 4 cores/socket Westmere-EX system and a 4-socket, 8 cores/socket Haswell-EX system. So both systems have 32 physical CPUs. VM guests (no NUMA pinning) were set up with 32, 48 and 60 vCPUs. The kernel build times (make -j , where was the number of vCPUs) on various configurations were as follows: Westere-EX (8x4): Kernel32 vCPUs48 vCPUs60 vCPUs -- pvticketlock (4.1.1) 5m02.0s13m27.6s15m49.9s pvqspinlock (4.2-rc1) 3m39.9s11.17.8s12m19.9s patched pvqspinlock3m38.5s 9m27.8s 9m39.4s unfair byte lock 4m23.8s 7m14.7s 8m50.4s unfair queued lock 3m03.4s 3m29.7s 4m15.4s Haswell-EX (4x8): Kernel32 vCPUs48 vCPUs60 vCPUs -- pvticketlock (4.1.1) 1m58.9s18m57.0s20m46.1s pvqspinlock (4.2-rc1) 1m59.9s18m44.2s18m57.0s patched pvqspinlock2m01.7s 8m03.7s 8m29.5s unfair byte lock 2m04.5s 2m46.7s 3m15.6s unfair queued lock 1m59.4s 2m04.9s 2m18.6s It can be seen that queued unfair lock has the best performance in almost all the cases. As can be seen in patch 4, the overhead of PV kicking and waiting is quite high. Unfair locks avoid those overhead and spend the time on productive work instead. On the other hand, the pvqspinlock is fair while the byte lock is not. The queued unfair lock is kind of in the middle between those two. It is not as fair as the pvqspinlock, but is fairer than the byte lock. Looking at the PV locks, the pvqspinlock patch did increase performance in the overcommitted guests by about 20% in Westmere-EX and more than 2X in Haswell-EX. More investigation may be needed to find out why there was slowdown in Haswell-EX compared with Westmere-EX. In conclusion, unfair lock is actually better performance-wise when a VM guest is over-committed. If there is no over-commitment, PV locks work fine, too. When the VM guest was changed to NUMA pinned (direct mapping between physical and virtual CPUs) in the Westmere-EX system, the build times became: Kernel32 vCPUs -- pvticketlock (4.1.1) 2m47.1s pvqspinlock (4.2-rc1) 2m45.9s patched pvqspinlock2m45.2s unfair byte lock 2m45.4s unfair queued lock 2m44.9s It can be seen that the build times are virtually the same for all the configurations. Waiman Long (7): locking/pvqspinlock: Only kick CPU at unlock time locking/pvqspinlock: Allow vCPUs kick-ahead locking/pvqspinlock: Implement wait-early for overcommitted guest locking/pvqspinlock: Collect slowpath lock statistics locking/pvqspinlock: Add pending bit support locking/qspinlock: A fairer queued unfair lock locking/qspinlock: Collect queued unfair lock slowpath statistics arch/x86/Kconfig|8 + arch/x86/include/asm/qspinlock.h| 17 +- kernel/locking/qspinlock.c | 140 ++- kernel/locking/qspinlock_paravirt.h | 436 --- kernel/locking/qspinlock_unfair.h | 327 ++ 5 files changed, 880 insertions(+), 48 deletions(-) create mode 100644 kernel/locking/qspinlock_unfair.h -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/7] locking/pvqspinlock: Allow vCPUs kick-ahead
Frequent CPU halting (vmexit) and CPU kicking (vmenter) lengthens critical section and block forward progress. This patch implements a kick-ahead mechanism where the unlocker will kick the queue head vCPUs as well as up to two additional vCPUs next to the queue head if they were halted. The kickings are done after exiting the critical section to improve parallelism. The amount of kick-ahead allowed depends on the number of vCPUs in the VM guest. This change should improve overall system performance in a busy overcommitted guest. Signed-off-by: Waiman Long --- kernel/locking/qspinlock_paravirt.h | 71 ++- 1 files changed, 69 insertions(+), 2 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index d302c39..4c1a299 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -67,6 +67,12 @@ static struct pv_hash_entry *pv_lock_hash; static unsigned int pv_lock_hash_bits __read_mostly; /* + * Allow kick-ahead of vCPUs at unlock time + */ +#define PV_KICK_AHEAD_MAX 4 +static int pv_kick_ahead __read_mostly; + +/* * Allocate memory for the PV qspinlock hash buckets * * This function should be called from the paravirt spinlock initialization @@ -74,7 +80,16 @@ static unsigned int pv_lock_hash_bits __read_mostly; */ void __init __pv_init_lock_hash(void) { - int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); + int ncpus = num_possible_cpus(); + int pv_hash_size = ALIGN(4 * ncpus, PV_HE_PER_LINE); + int i; + + /* +* The minimum number of vCPUs required in each kick-ahead level +*/ + static const u8 kick_ahead_threshold[PV_KICK_AHEAD_MAX] = { + 4, 8, 16, 32 + }; if (pv_hash_size < PV_HE_MIN) pv_hash_size = PV_HE_MIN; @@ -88,6 +103,18 @@ void __init __pv_init_lock_hash(void) pv_hash_size, 0, HASH_EARLY, _lock_hash_bits, NULL, pv_hash_size, pv_hash_size); + /* +* Enable the unlock kick ahead mode according to the number of +* vCPUs available. +*/ + for (i = PV_KICK_AHEAD_MAX; i > 0; i--) + if (ncpus >= kick_ahead_threshold[i - 1]) { + pv_kick_ahead = i; + break; + } + if (pv_kick_ahead) + printk(KERN_INFO "PV unlock kick ahead level %d enabled\n", + pv_kick_ahead); } #define for_each_hash_entry(he, offset, hash) \ @@ -317,13 +344,33 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) } /* + * Helper to get the address of the next kickable node + * The node has to be in the halted state and is being transitioned to + * running state by this function. Otherwise, NULL will be returned. + */ +static inline struct pv_node *pv_get_kick_node(struct pv_node *node) +{ + struct pv_node *next = (struct pv_node *)READ_ONCE(node->mcs.next); + + if (!next) + return NULL; + + if ((READ_ONCE(next->state) != vcpu_halted) || + (cmpxchg(>state, vcpu_halted, vcpu_running) != vcpu_halted)) + next = NULL;/* No kicking is needed */ + + return next; +} + +/* * PV version of the unlock function to be used in stead of * queued_spin_unlock(). */ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { struct __qspinlock *l = (void *)lock; - struct pv_node *node; + struct pv_node *node, *nxt, *next[PV_KICK_AHEAD_MAX]; + int i, nr_kick; /* * We must not unlock if SLOW, because in that case we must first @@ -340,6 +387,20 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) node = pv_unhash(lock); /* +* Implement kick-ahead mode +* +* Access the next group of nodes, if available, and prepare to kick +* them after releasing the lock if they are in the halted state. This +* should improve performance on an overcommitted system. +*/ + for (nr_kick = 0, nxt = node; nr_kick < pv_kick_ahead; +nxt = next[nr_kick], nr_kick++) { + next[nr_kick] = pv_get_kick_node(nxt); + if (!next[nr_kick]) + break; + } + + /* * Now that we have a reference to the (likely) blocked pv_node, * release the lock. */ @@ -354,6 +415,12 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) */ if (READ_ONCE(node->state) == vcpu_hashed) pv_kick(node->cpu); + + /* +* Kick the next group of vCPUs, if available. +*/ + for (i = 0; i < nr_kick; i++) + pv_kick(next[i]->cpu); }
[PATCH 4/7] locking/pvqspinlock: Collect slowpath lock statistics
This patch enables the accumulation of kicking and waiting related PV qspinlock statistics when the new QUEUED_LOCK_STAT configuration option is selected. It also enables the collection of kicking and wakeup latencies which have a heavy dependency on the CPUs being used. The measured latencies for different CPUs are: CPU Wakeup Kicking --- -- --- Haswell-EX 26.4us 9.2us Westmere-EX 99.4US 25.5us So Haswell is much faster than Westmere. The accumulated lock statistics will be reported in debugfs under the pv-qspinlock directory. Signed-off-by: Waiman Long --- arch/x86/Kconfig|7 ++ kernel/locking/qspinlock_paravirt.h | 173 ++- 2 files changed, 177 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55bced1..299a1c4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -679,6 +679,13 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. +config QUEUED_LOCK_STAT + bool "Paravirt queued lock statistics" + depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS + ---help--- + Enable the collection of statistical data on the behavior of + paravirtualized queued spinlocks and report them on debugfs. + source "arch/x86/xen/Kconfig" config KVM_GUEST diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index b3fe5bb..efc9a72 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -60,6 +60,155 @@ struct pv_node { }; /* + * PV qspinlock statistics + */ +enum pv_qlock_stat { + pvstat_wait_head, + pvstat_wait_node, + pvstat_wait_early, + pvstat_kick_wake, + pvstat_kick_cpu, + pvstat_kick_ahead, + pvstat_no_kick, + pvstat_spurious, + pvstat_hash, + pvstat_hops, + pvstat_num /* Total number of statistics counts */ +}; + +#ifdef CONFIG_QUEUED_LOCK_STAT +/* + * Collect pvqspinlock statiatics + */ +#include +#include + +static const char * const stat_fsnames[pvstat_num] = { + [pvstat_wait_head] = "wait_head_count", + [pvstat_wait_node] = "wait_node_count", + [pvstat_wait_early] = "wait_early_count", + [pvstat_kick_wake] = "kick_wake_count", + [pvstat_kick_cpu]= "kick_cpu_count", + [pvstat_kick_ahead] = "kick_ahead_count", + [pvstat_no_kick] = "no_kick_count", + [pvstat_spurious]= "spurious_wakeup", + [pvstat_hash]= "hash_count", + [pvstat_hops]= "hash_hops_count", +}; + +static atomic_t pvstats[pvstat_num]; + +/* + * pv_kick_latencies = sum of all pv_kick latencies in ns + * pv_wake_latencies = sum of all wakeup latencies in ns + * + * Avg kick latency = pv_kick_latencies/(kick_cpu_count + kick_ahead_count) + * Avg wake latency = pv_wake_latencies/kick_wake_count + */ +static atomic64_t pv_kick_latencies, pv_wake_latencies; +static DEFINE_PER_CPU(u64, pv_kick_time); + +/* + * Reset all the statistics counts if set + */ +static bool reset_cnts __read_mostly; + +/* + * Initialize debugfs for the PV qspinlock statistics + */ +static int __init pv_qspinlock_debugfs(void) +{ + struct dentry *d_pvqlock = debugfs_create_dir("pv-qspinlock", NULL); + int i; + + if (!d_pvqlock) + printk(KERN_WARNING + "Could not create 'pv-qspinlock' debugfs directory\n"); + + for (i = 0; i < pvstat_num; i++) + debugfs_create_u32(stat_fsnames[i], 0444, d_pvqlock, + (u32 *)[i]); + debugfs_create_u64("kick_latencies", 0444, d_pvqlock, + (u64 *)_kick_latencies); + debugfs_create_u64("wake_latencies", 0444, d_pvqlock, + (u64 *)_wake_latencies); + debugfs_create_bool("reset_cnts", 0644, d_pvqlock, (u32 *)_cnts); + return 0; +} +fs_initcall(pv_qspinlock_debugfs); + +/* + * Reset all the counts + */ +static noinline void pvstat_reset(void) +{ + int i; + + for (i = 0; i < pvstat_num; i++) + atomic_set([i], 0); + atomic64_set(_kick_latencies, 0); + atomic64_set(_wake_latencies, 0); + reset_cnts = 0; +} + +/* + * Increment the PV qspinlock statistics counts + */ +static inline void pvstat_inc(enum pv_qlock_stat stat) +{ + atomic_inc([stat]); + if (unlikely(reset_cnts)) + pvstat_reset(); +} + +/* + * PV hash hop count + */ +static inline void pvstat_hop(int hopcnt) +{ + atomic_inc([pvstat_hash]); + atomic_add(hopcnt, [pvstat_hops]); +} + +/* + * Replacement function for pv_kick() + */ +static inline void __pv_kick(int cpu) +{ + u64 start = sched_clock(); + + *per_cpu_ptr(_kick_time, cpu) = start; + pv_kick(cpu); + atomic64_add(sched_clock() - start,
[PATCH 7/7] locking/qspinlock: Collect queued unfair lock slowpath statistics
This patch enables the accumulation of unfair qspinlock statistics when the CONFIG_QUEUED_LOCK_STAT configuration parameter is set. The accumulated lock statistics will be reported in debugfs under the unfair-qspinlock directory. On a KVM guest with 32 vCPUs, the statistics counts after bootup were: lsteal_cnts = 172219 2377 425 118 33 8 5 12 14 0 0 0 trylock_cnt = 1495372 So most of the lock stealing happened in the initial trylock before entering the queue. Once a vCPU is in the queue, the chance of getting the lock drop off significantly the further it is away from queue head. Signed-off-by: Waiman Long --- arch/x86/Kconfig |7 ++- kernel/locking/qspinlock.c|2 +- kernel/locking/qspinlock_unfair.h | 89 + 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 299a1c4..aee6236 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -680,11 +680,12 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. config QUEUED_LOCK_STAT - bool "Paravirt queued lock statistics" - depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS + bool "Paravirt/Unfair queued lock statistics" + depends on DEBUG_FS && QUEUED_SPINLOCKS ---help--- Enable the collection of statistical data on the behavior of - paravirtualized queued spinlocks and report them on debugfs. + paravirtualized and unfair queued spinlocks and report them + on debugfs. source "arch/x86/xen/Kconfig" diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 65dead9..12e2e89 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -538,7 +538,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #ifdef queued_spin_trylock #undef queued_spin_trylock #endif -#define queued_spin_trylockqueued_spin_trylock_unfair +#define queued_spin_trylock__queued_spin_trylock_unfair /* * The unfair lock code is used internally and so don't need to be exported diff --git a/kernel/locking/qspinlock_unfair.h b/kernel/locking/qspinlock_unfair.h index 0e8a40f..fc94578 100644 --- a/kernel/locking/qspinlock_unfair.h +++ b/kernel/locking/qspinlock_unfair.h @@ -44,6 +44,93 @@ struct uf_node { u32 prev_tail; /* Previous node tail code */ }; +#ifdef CONFIG_QUEUED_LOCK_STAT + +#include + +/* + * Unfair qspinlock statistics + * + * All spinning CPUs are grouped into buckets depending on the most + * significant bit in their lock stealing period. The first entry in + * the array is for the queue head. + */ +#define NR_LPERIOD_CNTS(LPERIOD_THRESHOLD_SHIFT - LPERIOD_MIN_SHIFT + 6) +static atomic_t lsteal_cnts[NR_LPERIOD_CNTS]; + +/* + * # of successful trylocks at beginning of slowpath + */ +static atomic_t trylock_cnt; + +/* + * Counts reset flag + */ +static bool reset_cnts __read_mostly; + +/* + * Initialize debugfs for the unfair qspinlock statistics + */ +static int __init unfair_qspinlock_debugfs(void) +{ + struct dentry *d_ufqlock = debugfs_create_dir("unfair-qspinlock", NULL); + + if (!d_ufqlock) + printk(KERN_WARNING + "Could not create 'unfair-qspinlock' debugfs directory\n"); + + debugfs_create_u32_array("lsteal_cnts", 0444, d_ufqlock, + (u32 *)lsteal_cnts, NR_LPERIOD_CNTS); + debugfs_create_u32("trylock_cnt", 0444, d_ufqlock, (u32 *)_cnt); + debugfs_create_bool("reset_cnts", 0644, d_ufqlock, (u32 *)_cnts); + return 0; +} +fs_initcall(unfair_qspinlock_debugfs); + +/* + * Reset all the statistics counts + */ +static noinline void reset_counts(void) +{ + int idx; + + reset_cnts = false; + atomic_set(_cnt, 0); + for (idx = 0 ; idx < NR_LPERIOD_CNTS; idx++) + atomic_set(_cnts[idx], 0); +} + +/* + * Increment the unfair qspinlock statistic count + */ +static inline void ustat_inc(struct uf_node *pn) +{ + /* +* fls() returns the most significant 1 bit position + 1 +*/ + int idx = fls(pn->lsteal_period) - LPERIOD_MIN_SHIFT; + + if (idx >= NR_LPERIOD_CNTS) + idx = NR_LPERIOD_CNTS - 1; + atomic_inc(_cnts[idx]); + if (unlikely(reset_cnts)) + reset_counts(); +} + +static inline bool __queued_spin_trylock_unfair(struct qspinlock *lock) +{ + bool ret = queued_spin_trylock_unfair(lock); + + if (ret) + atomic_inc(_cnt); + return ret; +} + +#else /* CONFIG_QUEUED_LOCK_STAT */ +static inline void ustat_inc(struct uf_node *pn) { } +#define __queued_spin_trylock_unfair queued_spin_trylock_unfair +#endif /* CONFIG_QUEUED_LOCK_STAT */ + /** * cmpxchg_tail - Put in the new tail code if it matches the old one * @lock : Pointer to queue spinlock structure @@ -125,6 +212,7 @@ static inline bool unfair_wait_node(struct
[PATCH 6/7] locking/qspinlock: A fairer queued unfair lock
For a virtual guest with the qspinlock patch, a simple unfair byte lock will be used if PV spinlock is not configured in or the hypervisor isn't either KVM or Xen. The byte lock works fine with small guest of just a few vCPUs. On a much larger guest, however, byte lock can have the following problems: 1) Lock starvation is a real possibility especially if the number of vCPUs is large. 2) The constant reading and occasionally writing to the lock word can put a lot of cacheline contention traffic on the affected cacheline. This patch introduces a queue-based unfair lock where all the vCPUs on the queue can opportunistically steal the lock, but the frequency of doing so decreases the further it is away from the queue head. It can encourage a more FIFO like order of getting the lock and hence greatly reduce the chance of lock starvation. It can also reduce cacheline contention problem and so improve the performance of the system. This patch has no impact on native qspinlock performance at all. The unfair lock code will only be compiled in if CONFIG_HYPERVISOR_GUEST is defined. A microbenchmark of running 1 million lock-unlock operation for various number of threads running on a KVM guest with 32 pinned vCPUs and 4 vCPUs per node (8 nodes). This microbenchmark is intended to measure the variability of the execution times. Kernel ThreadsMin/Avg/Max(ms)SD(ms) -- -------- Unfair byte lock 4 133.1/386.0/509.0 153.48 8 720.5/939.5/1,068.0 117.08 162,237.8/6,045.8/7,550.3 1747.37 325,880.2/37,028.2/44,668.7 10136.30 Unfair qspinlock 4 326.1/453.7/523.0 80.44 8 681.6/1,126.4/1,486.5304.85 161,543.0/3,633.4/4,568.1 1000.47 322,356.8/7,103.3/7,894.9 1231.11 With small number of contending threads, both the performance and variability of both types of unfair lock are similar. However, when the number of contending threads increases, the byte lock has a much higher variability than the unfair qspinlock. Signed-off-by: Waiman Long --- arch/x86/include/asm/qspinlock.h | 17 ++-- kernel/locking/qspinlock.c| 98 ++- kernel/locking/qspinlock_unfair.h | 238 + 3 files changed, 340 insertions(+), 13 deletions(-) create mode 100644 kernel/locking/qspinlock_unfair.h diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae..bc82ace 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,18 +39,19 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock +#ifdef CONFIG_HYPERVISOR_GUEST +#ifndef static_cpu_has_hypervisor +#define static_cpu_has_hypervisor static_cpu_has(X86_FEATURE_HYPERVISOR) +#endif -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +#define queued_spin_trylock_unfair queued_spin_trylock_unfair +static inline bool queued_spin_trylock_unfair(struct qspinlock *lock) { - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return false; - - while (atomic_cmpxchg(>val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + u8 *l = (u8 *)lock; - return true; + return !READ_ONCE(*l) && (xchg(l, _Q_LOCKED_VAL) == 0); } +#endif #include diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5a25e89..65dead9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -19,7 +19,11 @@ * Peter Zijlstra */ -#ifndef _GEN_PV_LOCK_SLOWPATH +#if defined(_GEN_PV_LOCK_SLOWPATH) || defined(_GEN_UNFAIR_LOCK_SLOWPATH) +#define _GEN_LOCK_SLOWPATH +#endif + +#ifndef _GEN_LOCK_SLOWPATH #include #include @@ -68,7 +72,7 @@ #include "mcs_spinlock.h" -#ifdef CONFIG_PARAVIRT_SPINLOCKS +#ifdef CONFIG_HYPERVISOR_GUEST #define MAX_NODES 8 #else #define MAX_NODES 4 @@ -81,6 +85,7 @@ * Exactly fits one 64-byte cacheline on a 64-bit architecture. * * PV doubles the storage and uses the second cacheline for PV state. + * Unfair lock (mutually exclusive to PV) also uses the second cacheline. */ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); @@ -277,7 +282,18 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif -#endif /* _GEN_PV_LOCK_SLOWPATH */ +#ifdef CONFIG_HYPERVISOR_GUEST +static void unfair_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#else +static __always_inline void +unfair_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { } +#endif + +#ifndef static_cpu_has_hypervisor +#define
[PATCH 3/7] locking/pvqspinlock: Implement wait-early for overcommitted guest
In an overcommitted guest where some vCPUs have to be halted to make forward progress in other areas, it is highly likely that a vCPU later in the spinlock queue will be spinning while the ones earlier in the queue would have been halted already. The spinning in the later vCPUs is then just a waste of precious CPU cycles because they are not going to get the lock soon as the earlier ones have to be woken up and take their turn to get the lock. This patch implements a wait-early mechanism where the vCPU will call pv_wait() earlier if the previous vCPU is in the halted state already. In this case, it will spin less before calling pv_wait(). On the other hand, if the previous vCPU was running and then becomes halted, the current vCPU will call pv_wait() immmediately in this case. This patch also separates the spin threshold for queue head and queue nodes. It favors the queue head by allowing it to spin longer before calling pv_wait(). Signed-off-by: Waiman Long --- kernel/locking/qspinlock.c |5 ++- kernel/locking/qspinlock_paravirt.h | 52 +-- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d2e0fc1..782bc18 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -238,7 +238,8 @@ static __always_inline void set_locked(struct qspinlock *lock) */ static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } +static __always_inline void __pv_wait_node(struct mcs_spinlock *node, + struct mcs_spinlock *prev) { } static __always_inline void __pv_scan_next(struct qspinlock *lock, struct mcs_spinlock *node) { } static __always_inline void __pv_wait_head(struct qspinlock *lock, @@ -391,7 +392,7 @@ queue: prev = decode_tail(old); WRITE_ONCE(prev->next, node); - pv_wait_node(node); + pv_wait_node(node, prev); arch_mcs_spin_lock_contended(>locked); } diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 4c1a299..b3fe5bb 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -22,6 +22,26 @@ #define _Q_SLOW_VAL(3U << _Q_LOCKED_OFFSET) /* + * Queued Spinlock Spin Thresholds + * --- + * Because of the cacheline contention effect of the ticket spinlock, the + * same spin threshold for queued spinlock will run a bit faster. So we set + * a slight larger threshold for the queue head (1.25X) while the other queue + * nodes will keep the same threshold. + * + * A queue node vCPU will spin less if the vCPU in the previous node is halted. + * The queue node vCPU will also monitor the state of the previous node + * periodically if it is not halted. When the previous node vCPU transitions + * from active to halted, the current one will go to halted state too. It is + * because it takes quite a lot of cycles for a vCPU to perform vmexit and + * vmenter. So it is better for the current vCPU to go be halted too. + */ +#define QHEAD_SPIN_THRESHOLD (SPIN_THRESHOLD + (SPIN_THRESHOLD/4)) +#define QNODE_SPIN_THRESHOLD SPIN_THRESHOLD +#define QNODE_SPIN_THRESHOLD_SHORT (QNODE_SPIN_THRESHOLD >> 4) +#define QNODE_SPIN_CHECK_MASK 0xff + +/* * Queue node uses: vcpu_running & vcpu_halted. * Queue head uses: vcpu_running & vcpu_hashed. */ @@ -187,15 +207,41 @@ static void pv_init_node(struct mcs_spinlock *node) * pv_scan_next() is used to set _Q_SLOW_VAL and fill in hash table on its * behalf. */ -static void pv_wait_node(struct mcs_spinlock *node) +static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) { struct pv_node *pn = (struct pv_node *)node; + struct pv_node *pp = (struct pv_node *)prev; + bool prev_halted; int loop; for (;;) { - for (loop = SPIN_THRESHOLD; loop; loop--) { + /* +* Spin less if the previous vCPU was in the halted state +*/ + prev_halted = (READ_ONCE(pp->state) != vcpu_running); + loop = prev_halted ? QNODE_SPIN_THRESHOLD_SHORT + : QNODE_SPIN_THRESHOLD; + while (loop--) { if (READ_ONCE(node->locked)) return; + /* +* Look for state transition at previous node. +* +* running => halted: +* call pv_wait() now to halt current vCPU +* halted => running: +* reset spin threshold to QNODE_SPIN_THRESHOLD +*/ +
[PATCH 5/7] locking/pvqspinlock: Add pending bit support
Like the native qspinlock, using the pending bit when it is lightly loaded to acquire the lock is faster than going through the PV queuing process which is even slower than the native queuing process. It also avoids loading two additional cachelines (the MCS and PV nodes). This patch adds the pending bit support for PV qspinlock. The pending bit code has a smaller spin threshold. It will default back to the queuing method if it cannot acquired the lock within a certain time limit. Signed-off-by: Waiman Long --- kernel/locking/qspinlock.c | 27 +++- kernel/locking/qspinlock_paravirt.h | 61 +++ 2 files changed, 87 insertions(+), 1 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 782bc18..5a25e89 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -162,6 +162,17 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL); } +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->pending, 0); +} + /* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure @@ -193,6 +204,15 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) } /** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_add(-_Q_PENDING_VAL, >val); +} + +/** * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word @@ -246,6 +266,7 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) { } #define pv_enabled() false +#define pv_pending_lock(l, v) false #define pv_init_node __pv_init_node #define pv_wait_node __pv_wait_node @@ -287,8 +308,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); - if (pv_enabled()) + if (pv_enabled()) { + if (pv_pending_lock(lock, val)) + return; /* Got the lock via pending bit */ goto queue; + } if (virt_queued_spin_lock(lock)) return; @@ -464,6 +488,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #undef pv_wait_node #undef pv_scan_next #undef pv_wait_head +#undef pv_pending_lock #undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index efc9a72..d770694 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -40,6 +40,7 @@ #define QNODE_SPIN_THRESHOLD SPIN_THRESHOLD #define QNODE_SPIN_THRESHOLD_SHORT (QNODE_SPIN_THRESHOLD >> 4) #define QNODE_SPIN_CHECK_MASK 0xff +#define PENDING_SPIN_THRESHOLD QNODE_SPIN_THRESHOLD_SHORT /* * Queue node uses: vcpu_running & vcpu_halted. @@ -70,6 +71,8 @@ enum pv_qlock_stat { pvstat_kick_cpu, pvstat_kick_ahead, pvstat_no_kick, + pvstat_pend_lock, + pvstat_pend_fail, pvstat_spurious, pvstat_hash, pvstat_hops, @@ -91,6 +94,8 @@ static const char * const stat_fsnames[pvstat_num] = { [pvstat_kick_cpu]= "kick_cpu_count", [pvstat_kick_ahead] = "kick_ahead_count", [pvstat_no_kick] = "no_kick_count", + [pvstat_pend_lock] = "pending_lock_count", + [pvstat_pend_fail] = "pending_fail_count", [pvstat_spurious]= "spurious_wakeup", [pvstat_hash]= "hash_count", [pvstat_hops]= "hash_hops_count", @@ -355,6 +360,62 @@ static void pv_init_node(struct mcs_spinlock *node) } /* + * Try to acquire the lock and wait using the pending bit + */ +static int pv_pending_lock(struct qspinlock *lock, u32 val) +{ + int loop = PENDING_SPIN_THRESHOLD; + u32 new, old; + + /* +* wait for in-progress pending->locked hand-overs +*/ + if (val == _Q_PENDING_VAL) { + while (((val = atomic_read(>val)) == _Q_PENDING_VAL) && + loop--) + cpu_relax(); + } + + /* +* trylock || pending +*/ + for (;;) { + if (val & ~_Q_LOCKED_MASK) + goto queue; + new = _Q_LOCKED_VAL; + if (val == new) + new |= _Q_PENDING_VAL; + old =
[PATCH 1/7] locking/pvqspinlock: Only kick CPU at unlock time
For an over-committed guest with more vCPUs than physical CPUs available, it is possible that a vCPU may be kicked twice before getting the lock - one before it becomes queue head and once before it gets the lock. All these CPU kicking and halting (VMEXIT) can be expensive and slow down system performance. This patch adds a new vCPU state (vcpu_hashed) which enables the code to delay CPU kicking until at unlock time. Once this state is set, the new lock holder will set _Q_SLOW_VAL and fill in the hash table on behalf of the halted queue head vCPU. The original vcpu_halted state will be used by pv_wait_node() only to differentiate other queue nodes from the qeue head. Signed-off-by: Waiman Long --- kernel/locking/qspinlock.c | 10 ++-- kernel/locking/qspinlock_paravirt.h | 83 ++- 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 38c4920..d2e0fc1 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock) static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { } - +static __always_inline void __pv_scan_next(struct qspinlock *lock, + struct mcs_spinlock *node) { } static __always_inline void __pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) { } @@ -248,7 +248,7 @@ static __always_inline void __pv_wait_head(struct qspinlock *lock, #define pv_init_node __pv_init_node #define pv_wait_node __pv_wait_node -#define pv_kick_node __pv_kick_node +#define pv_scan_next __pv_scan_next #define pv_wait_head __pv_wait_head #ifdef CONFIG_PARAVIRT_SPINLOCKS @@ -440,7 +440,7 @@ queue: cpu_relax(); arch_mcs_spin_unlock_contended(>locked); - pv_kick_node(next); + pv_scan_next(lock, next); release: /* @@ -461,7 +461,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #undef pv_init_node #undef pv_wait_node -#undef pv_kick_node +#undef pv_scan_next #undef pv_wait_head #undef queued_spin_lock_slowpath diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 04ab181..d302c39 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -21,9 +21,14 @@ #define _Q_SLOW_VAL(3U << _Q_LOCKED_OFFSET) +/* + * Queue node uses: vcpu_running & vcpu_halted. + * Queue head uses: vcpu_running & vcpu_hashed. + */ enum vcpu_state { vcpu_running = 0, - vcpu_halted, + vcpu_halted,/* Used only in pv_wait_node */ + vcpu_hashed,/* = pv_hash'ed + vcpu_halted */ }; struct pv_node { @@ -152,7 +157,8 @@ static void pv_init_node(struct mcs_spinlock *node) /* * Wait for node->locked to become true, halt the vcpu after a short spin. - * pv_kick_node() is used to wake the vcpu again. + * pv_scan_next() is used to set _Q_SLOW_VAL and fill in hash table on its + * behalf. */ static void pv_wait_node(struct mcs_spinlock *node) { @@ -171,9 +177,9 @@ static void pv_wait_node(struct mcs_spinlock *node) * * [S] pn->state = vcpu_halted[S] next->locked = 1 * MB MB -* [L] pn->locked [RmW] pn->state = vcpu_running +* [L] pn->locked [RmW] pn->state = vcpu_hashed * -* Matches the xchg() from pv_kick_node(). +* Matches the cmpxchg() from pv_scan_next(). */ smp_store_mb(pn->state, vcpu_halted); @@ -181,9 +187,9 @@ static void pv_wait_node(struct mcs_spinlock *node) pv_wait(>state, vcpu_halted); /* -* Reset the vCPU state to avoid unncessary CPU kicking +* Reset the state except when vcpu_hashed is set. */ - WRITE_ONCE(pn->state, vcpu_running); + cmpxchg(>state, vcpu_halted, vcpu_running); /* * If the locked flag is still not set after wakeup, it is a @@ -193,6 +199,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * MCS lock will be released soon. */ } + /* * By now our node->locked should be 1 and our caller will not actually * spin-wait for it. We do however rely on our caller to do a @@ -201,24 +208,32 @@ static void pv_wait_node(struct mcs_spinlock *node) } /* - * Called after setting next->locked = 1, used to wake those stuck in - * pv_wait_node(). + * Called after setting
Re: [PATCH] MIPS: ath79: irq: Remove the include of drivers/irqchip/irqchip.h
On Wed, 8 Jul 2015, Alban Bedel wrote: > We shouldn't include irqchip.h from outside of the drivers/irqchip > directory. The irq driver should idealy be there, however this not > trivial at the moment. We still need to support platforms without DT > support and the interface to the DDR controller still use a custom > arch specific API. > > For now just redefine the IRQCHIP_DECLARE macro to avoid the cross > tree include. The macro has been moved to linux/irqchip.h. But even if it would still be in drivers/irqchip such a redefine is even worse than the ../../... include. And the proper solution from the very beginning would have been to move the macro to the global header instead of this horrible include. Sigh, tglx -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/3] arm64, mm: Use flush_tlb_all_local() in flush_context().
From: David Daney When CONFIG_SMP, we end up calling flush_context() on each CPU (indirectly) from __new_context(). Because of this, doing a broadcast TLB invalidate is overkill, as all CPUs will be doing a local invalidation. Change the scope of the TLB invalidation operation to be local, resulting in nr_cpus invalidations, rather than nr_cpus^2. On CPUs with a large ASID space this operation is not often done. But, when it is, this reduces the overhead. Benchmarked "time make -j48" kernel build with and without the patch on Cavium ThunderX system, one run to warm up the caches, and then five runs measured: original with-patch 139.299s 139.0766s S.D. 0.321S.D. 0.159 Probably a little faster, but could be measurement noise. Signed-off-by: David Daney --- arch/arm64/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 76c1e6c..ab5b8d3 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -48,7 +48,7 @@ static void flush_context(void) { /* set the reserved TTBR0 before flushing the TLB */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + flush_tlb_all_local(); if (icache_is_aivivt()) __flush_icache_all(); } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/3] arm64, mm: Use IPIs for TLB invalidation.
From: David Daney This patch set (or something like it) is needed for the Cavium ThunderX, but its performance improvements may make it compelling on its own merits. Summery: On ThunerX we cannot use broadcast TLB invalidation, so we use IPIs where necessary. The funny thing is that it also happens to make workloads similar to kernel builds much faster. David Daney (3): arm64, mm: Add flush_tlb_all_local() function. arm64, mm: Use flush_tlb_all_local() in flush_context(). arm64, mm: Use IPIs for TLB invalidation. arch/arm64/include/asm/tlbflush.h | 64 --- arch/arm64/mm/context.c | 2 +- arch/arm64/mm/flush.c | 46 3 files changed, 59 insertions(+), 53 deletions(-) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3] arm64, mm: Add flush_tlb_all_local() function.
From: David Daney To be used in follow-on patch. Signed-off-by: David Daney --- arch/arm64/include/asm/tlbflush.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 934815d..42c09ec 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -105,6 +105,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } +static inline void flush_tlb_all_local(void) +{ + dsb(ishst); + asm("tlbi vmalle1"); + isb(); +} + static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/3] arm64, mm: Use IPIs for TLB invalidation.
From: David Daney Most broadcast TLB invalidations are unnecessary. So when invalidating for a given mm/vma target the only the needed CPUs via and IPI. For global TLB invalidations, also use IPI. Tested on Cavium ThunderX. This change reduces 'time make -j48' on kernel from 139s to 116s (83% as long). The patch is needed because of a ThunderX Pass1 erratum: Exclusive store operations unreliable in the presence of broadcast TLB invalidations. The performance improvements shown make it compelling even without the erratum workaround need. Signed-off-by: David Daney --- arch/arm64/include/asm/tlbflush.h | 67 ++- arch/arm64/mm/flush.c | 46 +++ 2 files changed, 56 insertions(+), 57 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 42c09ec..2c132b0 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -63,46 +63,22 @@ * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ -static inline void flush_tlb_all(void) -{ - dsb(ishst); - asm("tlbi vmalle1is"); - dsb(ish); - isb(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - unsigned long asid = (unsigned long)ASID(mm) << 48; +void flush_tlb_all(void); - dsb(ishst); - asm("tlbi aside1is, %0" : : "r" (asid)); - dsb(ish); -} +void flush_tlb_mm(struct mm_struct *mm); static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | - ((unsigned long)ASID(vma->vm_mm) << 48); - - dsb(ishst); - asm("tlbi vae1is, %0" : : "r" (addr)); - dsb(ish); + /* Simplify to entire mm. */ + flush_tlb_mm(vma->vm_mm); } static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; - unsigned long addr; - start = asid | (start >> 12); - end = asid | (end >> 12); - - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vae1is, %0" : : "r"(addr)); - dsb(ish); + /* Simplify to entire mm. */ + flush_tlb_mm(vma->vm_mm); } static inline void flush_tlb_all_local(void) @@ -112,40 +88,17 @@ static inline void flush_tlb_all_local(void) isb(); } -static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - unsigned long addr; - start >>= 12; - end >>= 12; - - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vaae1is, %0" : : "r"(addr)); - dsb(ish); - isb(); -} - -/* - * This is meant to avoid soft lock-ups on large TLB flushing ranges and not - * necessarily a performance improvement. - */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) - static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_range(vma, start, end); - else - flush_tlb_mm(vma->vm_mm); + /* Simplify to entire mm. */ + flush_tlb_mm(vma->vm_mm); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_kernel_range(start, end); - else - flush_tlb_all(); + /* Simplify to all. */ + flush_tlb_all(); } /* diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 4dfa397..45f24d3 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,51 @@ #include "mm.h" +static void flush_tlb_local(void *info) +{ + asm volatile("\n" +" tlbivmalle1\n" +" isb sy" + ); +} + +static void flush_tlb_mm_local(void *info) +{ + unsigned long asid = (unsigned long)info; + + asm volatile("\n" +" tlbiaside1, %0\n" +" isb sy" +: : "r" (asid) + ); +} + +void flush_tlb_all(void) +{ + /* Make sure page table modifications are visible. */ + dsb(ishst); + /* IPI to all CPUs to do local flush. */ + on_each_cpu(flush_tlb_local, NULL, 1); + +} +EXPORT_SYMBOL(flush_tlb_all); + +void flush_tlb_mm(struct mm_struct *mm) +{ + if (!mm) { + flush_tlb_all(); + } else { + unsigned long asid = (unsigned long)ASID(mm) << 48; + /* Make sure page table
Re: [BUG] mellanox IB driver fails to load on large config
On Fri, Jul 10, 2015 at 10:15 PM, andrew banman wrote: > I'm seeing a large number of allocation errors originating from the Mellanox > IB > driver when booting the 4.2-rc1 kernel on a 4096cpu 32TB memory system: Just to make sure, mlx4 works fine on this small (...) system with 4.1 and 4.2-rc1 breaks, or 4.2-rc1 is the 1st time you're trying that config? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] selinux: fix mprotect PROT_EXEC regression caused by mm change
On Fri, 10 Jul 2015, Stephen Smalley wrote: > commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip > security check and lockdep conflict with XFS") caused a regression for > SELinux by disabling any SELinux checking of mprotect PROT_EXEC on > shared anonymous mappings. However, even before that regression, the > checking on such mprotect PROT_EXEC calls was inconsistent with the > checking on a mmap PROT_EXEC call for a shared anonymous mapping. On a > mmap, the security hook is passed a NULL file and knows it is dealing with > an anonymous mapping and therefore applies an execmem check and no file > checks. On a mprotect, the security hook is passed a vma with a > non-NULL vm_file (as this was set from the internally-created shmem > file during mmap) and therefore applies the file-based execute check and > no execmem check. Since the aforementioned commit now marks the shmem > zero inode with the S_PRIVATE flag, the file checks are disabled and > we have no checking at all on mprotect PROT_EXEC. Add a test to > the mprotect hook logic for such private inodes, and apply an execmem > check in that case. This makes the mmap and mprotect checking consistent > for shared anonymous mappings, as well as for /dev/zero and ashmem. > > Signed-off-by: Stephen Smalley Thank you for correcting that, Stephen (and for the nicely detailed commit description): it looks right to me so I'll say Acked-by: Hugh Dickins but I know far too little of SElinux, and its defaults, to confirm whether it actually does all you need - I'll trust you on that. (There being various other references to the file in file_map_prot_check() and selinux_file_mprotect(), and I couldn't tell if they should or should not be modified by IS_PRIVATE(file_inode(file) checks too: my best guess was that they wouldn't matter.) > --- > security/selinux/hooks.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c > index 6231081..564079c 100644 > --- a/security/selinux/hooks.c > +++ b/security/selinux/hooks.c > @@ -3283,7 +3283,8 @@ static int file_map_prot_check(struct file *file, > unsigned long prot, int shared > int rc = 0; > > if (default_noexec && > - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE { > + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || > +(!shared && (prot & PROT_WRITE { > /* >* We are making executable an anonymous mapping or a >* private file mapping that will also be writable. > -- > 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 3/3] usb: dwc3: gadget: return error if command sent to DEPCMD register fails
Hi, On Sat, Jul 11, 2015 at 05:17:32PM +, Subbaraya Sundeep Bhatta wrote: > > > >> Hi Felipe, > > > >> > > > >> Just an update on this. > > > >> > > > >> I'm trying to get this working with our latest IP with dwc3 from > > > >> your testing/next branch. It fails the usbtest with a problem > > > >> unrelated to this patch. > > > >>. > > > >> It passes on 4.1.1. > > > >> > > > >> I'll have to look into the failure but I won't get to it until next > > > >> week as I'm off the rest of this week. > > > > > > > > interesting... If you could post failure signature, I can help > > > > looking at it, but I guess it's too late to ask :-) > > > > > > > > thanks for helping though > > > > > > > > > > > > > Hi Felipe, > > > > > > Nevermind about my issue, it ended up being a setup-related problem. > > > > > > I actually do see the same error as you due to this series of patches. > > > Except I see it happening before even the first iteration. I get a > > > completion status of 1 for the Set Endpoint Transfer Resources > > > command. I'm not sure why this is. > > > > > > I don't see any conflict with any previous Transfer Complete. > > Same behavior at my end too. Fails before first iteration and I get > completion status of 1 for Set Endpoint Resource command. Attached the > logs of testing done with this patch and without this patch. > Without this patch I often see completion status of 1 for Set Endpoint > Transfer Resources command for Bulk and Isoc endpoints but test > proceeds because driver just logs command completion status and moves > on. We can revert this patch for time being. IP version is 2.90a. yeah, that's what I mean, it really seems like it's the IP misbehaving. John, let's try to figure out what's the root cause of this, we really want to use command completion status at some point, but for now we need to revert the patch :-( Let me know if you want me to log STARS ticket on your solvnet system. cheers -- balbi signature.asc Description: Digital signature
Re: [PATCH RESEND] iio: adc: rockchip_saradc: add missing MODULE_* data
Hi Jonathan, Am Samstag, 11. Juli 2015, 18:32:42 schrieb Jonathan Cameron: > On 08/07/15 15:17, Heiko Stuebner wrote: > > The module-data is currently missing. This includes the > > license-information > > which makes the driver taint the kernel and miss symbols when compiled as > > module. > > > > Fixes: 44d6f2ef94f9 ("iio: adc: add driver for Rockchip saradc") > > Signed-off-by: Heiko Stuebner > > Sorry Heiko, > > Not entirely sure why I haven't picked this up before. > > Anyhow, now applied to the fixes-for-4.2 branch of iio.git > and marked for stable. I need to catch up with a bit of a > backlog, but should get a pull request out to Greg sometime > early next week. really no problem. I track my patches and generally simply keep pestering people for as long as it takes ;-) Heiko -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re:
0001-Fix-redundant-check-against-unsigned-int-in-broken-a.patch Description: Binary data
Re: [PATCH v2] net: dsa: mv88e6xxx: add write access to debugfs regs file
Hi David, On Jul 11, 2015, at 2:01 AM, David da...@davemloft.net wrote: > From: Vivien Didelot > Date: Thu, 9 Jul 2015 17:13:29 -0400 > >> Allow write access to the regs file in the debugfs interface, with the >> following parameters: >> >> echo> regs >> >> Where "name" is the register name (as shown in the header row), "reg" is >> the register address (as shown in the first column) and "value" is the >> 16-bit value. e.g.: >> >> echo GLOBAL 1a 5550 > regs >> >> Signed-off-by: Vivien Didelot > > I don't know about this. > > This starts to smell like a back door for proprietary userspace SDKs to > program the switch hardware. > > Yes, they can do it via other mechanisms, but we don't have to make it > any eaiser for them either. I agree with you and I wouldn't want that neither. > If you want to poke registers, hack the module just like any other > person with appropriate privileges can do. I'm not sure what you mean. Keeping some custom patches in our local tree? > Frankly, all of this debugfs crap in the DSA drivers smells like poo. > I don't like it _AT_ _ALL_, and I shouldn't have allowed any of it > into the tree in the first place. > > I might just remove it all myself, it bothers me so much. > > Fetching information should be done by well typed, generic, interfaces > that apply to any similar device or object. All of this debugfs stuff > smells of hacks and special case crap that's only usable for one > device type and that makes it the single most terrible interface to > give to users. In the meantime, this is really useful for development. i.e. ensuring a good switchdev/DSA interaction without being able to read and write directly the hardware VLAN table, is a bit a PITA. A dynamic debugfs looked appropriate. On the other hand, the mv88e6xxx driver gets cluttered with all this code. I'd gladly move all this code in a mv88e6xxx-debugfs.c file, and conditionally compile it with: mv88e6xxx_drv-$(CONFIG_DEBUG_FS) += mv88e6xxx-debugfs.o similar to what the i2400m driver does. Would that be appreciated? Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341
On 7/11/2015 11:26 AM, Porteus Kiosk wrote: Hello Arjan, We need it for setting up the time in the hardware clock through the 'hwclock' command. Thank you. hmm thinking about it after coffee... there is an RTC that can be exposed to userspace. hrmpf. Wonder why its not there for you -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341
On 7/11/2015 11:21 AM, Arjan van de Ven wrote: On 7/11/2015 10:59 AM, Larry Finger wrote: On a Winbook TW100 BayTrail tablet, kernel 4.0 and later do not create /dev/rtc0 when CONFIG_PINCTRL_BAYTRAIL is set in the configuration. Removing this option from the config creates a real-time clock; however, it is no longer possible to get the tablet to sleep using the power button. Only complete shutdown works. This problem was bisected to the following commit: in "hardware reduced mode" (e.g. tablets) on Baytrail the RTC is not actually enabled/initialized by the firmware; talking to it may appear to work but it's really not a good idea (and breaks things likes suspend/resume etc). (or in other words, many of the legacy PC things are not supposed to be there) what did you want to use rtc0 for? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: V4.0.x fails to create /dev/rtc0 on Winbook TW100 when CONFIG_PINCTRL_BAYTRAIL is set, bisected to commit 7486341
On 7/11/2015 10:59 AM, Larry Finger wrote: On a Winbook TW100 BayTrail tablet, kernel 4.0 and later do not create /dev/rtc0 when CONFIG_PINCTRL_BAYTRAIL is set in the configuration. Removing this option from the config creates a real-time clock; however, it is no longer possible to get the tablet to sleep using the power button. Only complete shutdown works. This problem was bisected to the following commit: in "hardware reduced mode" (e.g. tablets) on Baytrail the RTC is not actually enabled/initialized by the firmware; talking to it may appear to work but it's really not a good idea (and breaks things likes suspend/resume etc). -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] drm/atomic: fix null dereference
On Sat, Jul 11, 2015 at 1:24 PM, Sudip Mukherjee wrote: > We are checking the size of e->event but we were doing it when e is > known to be NULL. nak, this will leak event_space.. since it is a sizeof, it isn't actually deref'ing e, but rather just using the static type info, so it's ok (although perhaps funny looking) BR, -R > Signed-off-by: Sudip Mukherjee > --- > drivers/gpu/drm/drm_atomic.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c > index acebd16..51d3a85 100644 > --- a/drivers/gpu/drm/drm_atomic.c > +++ b/drivers/gpu/drm/drm_atomic.c > @@ -1311,7 +1311,6 @@ static struct drm_pending_vblank_event > *create_vblank_event( > e = kzalloc(sizeof *e, GFP_KERNEL); > if (e == NULL) { > spin_lock_irqsave(>event_lock, flags); > - file_priv->event_space += sizeof e->event; > spin_unlock_irqrestore(>event_lock, flags); > goto out; > } > -- > 1.8.1.2 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2] xen/blkfront: convert to blk-mq APIs
On 07/11/2015 07:30 AM, Bob Liu wrote: Note: This patch is based on original work of Arianna's internship for GNOME's Outreach Program for Women. Great to see this finally get prepped to go in! Only one hardware queue is used now, so there is no performance change. I would hope that the blk-mq path, even with one queue, is a perf win over the old interface. So I'm not sure that is correct. But the bigger win will be with more queues, of course. The legacy non-mq code is deleted completely which is the same as other drivers like virtio, mtip, and nvme. Also dropped one unnecessary holding of info->io_lock when calling blk_mq_stop_hw_queues(). Changes in v2: - Reorganized blk_mq_queue_rq() - Restored most io_locks in place Looks good to me. The most common error case is the busy-out not stopping queues, or not restarting them at completion. But that all looks fine. I would, however, rename blk_mq_queue_rq(). It sounds like a core function. blkif_queue_rq() would be more appropriate. Signed-off-by: Arianna Avanzini Signed-off-by: Bob Liu Acked-by: Jens Axboe -- Jens Axboe -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 02/11] blkcg: use blkg_free() in blkcg_init_queue() failure path
When blkcg_init_queue() fails midway after creating a new blkg, it performs kfree() directly; however, this doesn't free the policy data areas. Make it use blkg_free() instead. In turn, blkg_free() is updated to handle root request_list special case. While this fixes a possible memory leak, it's on an unlikely failure path of an already cold path and the size leaked per occurrence is miniscule too. I don't think it needs to be tagged for -stable. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fbb0b65..64cc48f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -70,7 +70,8 @@ static void blkg_free(struct blkcg_gq *blkg) for (i = 0; i < BLKCG_MAX_POLS; i++) kfree(blkg->pd[i]); - blk_exit_rl(>rl); + if (blkg->blkcg != _root) + blk_exit_rl(>rl); kfree(blkg); } @@ -934,7 +935,7 @@ int blkcg_init_queue(struct request_queue *q) radix_tree_preload_end(); if (IS_ERR(blkg)) { - kfree(new_blkg); + blkg_free(new_blkg); return PTR_ERR(blkg); } -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHSET v3 block/for-4.3] blkcg: blkcg policy methods and data handling cleanup
This is v3 of blkcg_policy methods cleanup patchset. Changes from the last take [L] are * Rebased on top of block/for-linus. * 0003-blkcg-remove-unnecessary-blkcg_root-handling-from-cs.patch and 0004-blkcg-restructure-blkg_policy_data-allocation-in-blk.patch added. These are follow-up cleanups for the blkcg_policy_data handling fixes which went into block/for-linus. * 0010-blkcg-cosmetic-updates-about-blkcg_policy_data.patch and 0011-blkcg-replace-blkcg_policy-cpd_size-with-cpd_alloc-f.patch added so that blkcg_policy_data handling is consistent with blkg_policy_data handling. This patchset contains assorted cleanups for blkcg_policy methods and blk[c]g_policy_data handling. * alloc/free added for blkg_policy_data. exit dropped. * alloc/free added for blkcg_policy_data. * blk-throttle's async percpu allocation is replaced with direct allocation. * all methods now take blk[c]g_policy_data instead of blkcg_gq or blkcg. This patchset contains the following 11 patches. 0001-blkcg-remove-unnecessary-request_list-blkg-NULL-test.patch 0002-blkcg-use-blkg_free-in-blkcg_init_queue-failure-path.patch 0003-blkcg-remove-unnecessary-blkcg_root-handling-from-cs.patch 0004-blkcg-restructure-blkg_policy_data-allocation-in-blk.patch 0005-blkcg-make-blkcg_activate_policy-allow-NULL-pd_init_.patch 0006-blkcg-replace-blkcg_policy-pd_size-with-pd_alloc-fre.patch 0007-blk-throttle-remove-asynchrnous-percpu-stats-allocat.patch 0008-blk-throttle-clean-up-blkg_policy_data-alloc-init-ex.patch 0009-blkcg-make-blkcg_policy-methods-take-a-pointer-to-bl.patch 0010-blkcg-cosmetic-updates-about-blkcg_policy_data.patch 0011-blkcg-replace-blkcg_policy-cpd_size-with-cpd_alloc-f.patch 0001-0005 are misc cleanups. 0006-0008 add alloc/free methods and remove blk-throttle's async percpu allocation mechanism. 0009 makes all methods take blkcg_policy_data. 0010-0011 apply similar cleanups to blkcg_policy_data handling. This patchset is also available in the following git branch. git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git review-blkcg-methods-cleanup and is on top of block/for-linus 06b285bd1125 ("blkcg: fix blkcg_policy_data allocation bug") + [1] [PATCHSET block/for-4.3] writeback: cgroup writeback updates + [2] [PATCHSET v2 block/for-4.3] block, cgroup: make cfq charge async IOs to the appropriate blkcgs diffstat follows, thanks. block/blk-cgroup.c | 171 +++- block/blk-throttle.c | 173 + block/cfq-iosched.c| 68 + include/linux/blk-cgroup.h | 65 4 files changed, 214 insertions(+), 263 deletions(-) -- tejun [L] http://lkml.kernel.org/g/1436284293-4666-1-git-send-email...@kernel.org [1] http://lkml.kernel.org/g/1436281823-1947-1-git-send-email...@kernel.org [2] http://lkml.kernel.org/g/1436283361-3889-1-git-send-email...@kernel.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 04/11] blkcg: restructure blkg_policy_data allocation in blkcg_activate_policy()
When a policy gets activated, it needs to allocate and install its policy data on all existing blkg's (blkcg_gq's). Because blkg iteration is protected by a spinlock, it currently counts the total number of blkg's in the system, allocates the matching number of policy data on a list and installs them during a single iteration. This can be simplified by using speculative GFP_NOWAIT allocations while iterating and falling back to a preallocated policy data on failure. If the preallocated one has already been consumed, it releases the lock, preallocate with GFP_KERNEL and then restarts the iteration. This can be a bit more expensive than before but policy activation is a very cold path and shouldn't matter. Signed-off-by: Tejun Heo --- block/blk-cgroup.c | 55 ++ include/linux/blk-cgroup.h | 3 --- 2 files changed, 21 insertions(+), 37 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 2a493ce..5dbbacd 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1043,65 +1043,52 @@ EXPORT_SYMBOL_GPL(blkio_cgrp_subsys); int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { - LIST_HEAD(pds); + struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg; - struct blkg_policy_data *pd, *nd; - int cnt = 0, ret; + int ret; if (blkcg_policy_enabled(q, pol)) return 0; - /* count and allocate policy_data for all existing blkgs */ blk_queue_bypass_start(q); - spin_lock_irq(q->queue_lock); - list_for_each_entry(blkg, >blkg_list, q_node) - cnt++; - spin_unlock_irq(q->queue_lock); - - /* allocate per-blkg policy data for all existing blkgs */ - while (cnt--) { - pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); - if (!pd) { +pd_prealloc: + if (!pd_prealloc) { + pd_prealloc = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); + if (!pd_prealloc) { ret = -ENOMEM; - goto out_free; + goto out_bypass_end; } - list_add_tail(>alloc_node, ); } - /* -* Install the allocated pds and cpds. With @q bypassing, no new blkg -* should have been created while the queue lock was dropped. -*/ spin_lock_irq(q->queue_lock); list_for_each_entry(blkg, >blkg_list, q_node) { - if (WARN_ON(list_empty())) { - /* umm... this shouldn't happen, just abort */ - ret = -ENOMEM; - goto out_unlock; - } - pd = list_first_entry(, struct blkg_policy_data, alloc_node); - list_del_init(>alloc_node); + struct blkg_policy_data *pd; - /* grab blkcg lock too while installing @pd on @blkg */ - spin_lock(>blkcg->lock); + if (blkg->pd[pol->plid]) + continue; + + pd = kzalloc_node(pol->pd_size, GFP_NOWAIT, q->node); + if (!pd) + swap(pd, pd_prealloc); + if (!pd) { + spin_unlock_irq(q->queue_lock); + goto pd_prealloc; + } blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; pol->pd_init_fn(blkg); - - spin_unlock(>blkcg->lock); } __set_bit(pol->plid, q->blkcg_pols); ret = 0; -out_unlock: + spin_unlock_irq(q->queue_lock); -out_free: +out_bypass_end: blk_queue_bypass_end(q); - list_for_each_entry_safe(pd, nd, , alloc_node) - kfree(pd); + kfree(pd_prealloc); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 9711fc2..db82288 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -80,9 +80,6 @@ struct blkg_policy_data { /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; int plid; - - /* used during policy activation */ - struct list_headalloc_node; }; /* -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 07/11] blk-throttle: remove asynchrnous percpu stats allocation mechanism
Because percpu allocator couldn't do non-blocking allocations, blk-throttle was forced to implement an ad-hoc asynchronous allocation mechanism for its percpu stats for cases where blkg's (blkcg_gq's) are allocated from an IO path without sleepable context. Now that percpu allocator can handle gfp_mask and blkg_policy_data alloc / free are handled by policy methods, the ad-hoc asynchronous allocation mechanism can be replaced with direct allocation from tg_stats_alloc_fn(). Rit it out. This ensures that an active throtl_grp always has valid non-NULL ->stats_cpu. Remove checks on it. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-throttle.c | 112 --- 1 file changed, 25 insertions(+), 87 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f1dd691..3c86976 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -144,9 +144,6 @@ struct throtl_grp { /* Per cpu stats pointer */ struct tg_stats_cpu __percpu *stats_cpu; - - /* List of tgs waiting for per cpu stats memory to be allocated */ - struct list_head stats_alloc_node; }; struct throtl_data @@ -168,13 +165,6 @@ struct throtl_data struct work_struct dispatch_work; }; -/* list and work item to allocate percpu group stats */ -static DEFINE_SPINLOCK(tg_stats_alloc_lock); -static LIST_HEAD(tg_stats_alloc_list); - -static void tg_stats_alloc_fn(struct work_struct *); -static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); - static void throtl_pending_timer_fn(unsigned long arg); static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) @@ -256,53 +246,6 @@ static struct throtl_data *sq_to_td(struct throtl_service_queue *sq) } \ } while (0) -static void tg_stats_init(struct tg_stats_cpu *tg_stats) -{ - blkg_rwstat_init(_stats->service_bytes); - blkg_rwstat_init(_stats->serviced); -} - -/* - * Worker for allocating per cpu stat for tgs. This is scheduled on the - * system_wq once there are some groups on the alloc_list waiting for - * allocation. - */ -static void tg_stats_alloc_fn(struct work_struct *work) -{ - static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */ - struct delayed_work *dwork = to_delayed_work(work); - bool empty = false; - -alloc_stats: - if (!stats_cpu) { - int cpu; - - stats_cpu = alloc_percpu(struct tg_stats_cpu); - if (!stats_cpu) { - /* allocation failed, try again after some time */ - schedule_delayed_work(dwork, msecs_to_jiffies(10)); - return; - } - for_each_possible_cpu(cpu) - tg_stats_init(per_cpu_ptr(stats_cpu, cpu)); - } - - spin_lock_irq(_stats_alloc_lock); - - if (!list_empty(_stats_alloc_list)) { - struct throtl_grp *tg = list_first_entry(_stats_alloc_list, -struct throtl_grp, -stats_alloc_node); - swap(tg->stats_cpu, stats_cpu); - list_del_init(>stats_alloc_node); - } - - empty = list_empty(_stats_alloc_list); - spin_unlock_irq(_stats_alloc_lock); - if (!empty) - goto alloc_stats; -} - static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(>node); @@ -405,7 +348,27 @@ static void throtl_service_queue_exit(struct throtl_service_queue *sq) static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) { - return kzalloc_node(sizeof(struct throtl_grp), gfp, node); + struct throtl_grp *tg; + int cpu; + + tg = kzalloc_node(sizeof(*tg), gfp, node); + if (!tg) + return NULL; + + tg->stats_cpu = alloc_percpu_gfp(struct tg_stats_cpu, gfp); + if (!tg->stats_cpu) { + kfree(tg); + return NULL; + } + + for_each_possible_cpu(cpu) { + struct tg_stats_cpu *stats_cpu = per_cpu_ptr(tg->stats_cpu, cpu); + + blkg_rwstat_init(_cpu->service_bytes); + blkg_rwstat_init(_cpu->serviced); + } + + return >pd; } static void throtl_pd_init(struct blkcg_gq *blkg) @@ -413,7 +376,6 @@ static void throtl_pd_init(struct blkcg_gq *blkg) struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_data *td = blkg->q->td; struct throtl_service_queue *parent_sq; - unsigned long flags; int rw; /* @@ -448,16 +410,6 @@ static void throtl_pd_init(struct blkcg_gq *blkg) tg->bps[WRITE] = -1; tg->iops[READ] = -1; tg->iops[WRITE] = -1; - - /* -* Ugh... We need to perform per-cpu allocation for tg->stats_cpu -* but percpu
[PATCH 06/11] blkcg: replace blkcg_policy->pd_size with ->pd_alloc/free_fn() methods
A blkg (blkcg_gq) represents the relationship between a cgroup and request_queue. Each active policy has a pd (blkg_policy_data) on each blkg. The pd's were allocated by blkcg core and each policy could request to allocate extra space at the end by setting blkcg_policy->pd_size larger than the size of pd. This is a bit unusual but was done this way mostly to simplify error handling and all the existing use cases could be handled this way; however, this is becoming too restrictive now that percpu memory can be allocated without blocking. This introduces two new mandatory blkcg_policy methods - pd_alloc_fn() and pd_free_fn() - which are used to allocate and release pd for a given policy. As pd allocation is now done from policy side, it can simply allocate a larger area which embeds pd at the beginning. This change makes ->pd_size pointless. Removed. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 21 +++-- block/blk-throttle.c | 13 - block/cfq-iosched.c| 13 - include/linux/blk-cgroup.h | 18 +- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b558705..9d83623 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -68,7 +68,8 @@ static void blkg_free(struct blkcg_gq *blkg) return; for (i = 0; i < BLKCG_MAX_POLS; i++) - kfree(blkg->pd[i]); + if (blkg->pd[i]) + blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->blkcg != _root) blk_exit_rl(>rl); @@ -114,7 +115,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, continue; /* alloc per-policy data and attach it to blkg */ - pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); + pd = pol->pd_alloc_fn(gfp_mask, q->node); if (!pd) goto err_free; @@ -1053,7 +1054,7 @@ int blkcg_activate_policy(struct request_queue *q, blk_queue_bypass_start(q); pd_prealloc: if (!pd_prealloc) { - pd_prealloc = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); + pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); if (!pd_prealloc) { ret = -ENOMEM; goto out_bypass_end; @@ -1068,7 +1069,7 @@ int blkcg_activate_policy(struct request_queue *q, if (blkg->pd[pol->plid]) continue; - pd = kzalloc_node(pol->pd_size, GFP_NOWAIT, q->node); + pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node); if (!pd) swap(pd, pd_prealloc); if (!pd) { @@ -1089,7 +1090,8 @@ int blkcg_activate_policy(struct request_queue *q, spin_unlock_irq(q->queue_lock); out_bypass_end: blk_queue_bypass_end(q); - kfree(pd_prealloc); + if (pd_prealloc) + pol->pd_free_fn(pd_prealloc); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); @@ -1124,8 +1126,10 @@ void blkcg_deactivate_policy(struct request_queue *q, if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); - kfree(blkg->pd[pol->plid]); - blkg->pd[pol->plid] = NULL; + if (blkg->pd[pol->plid]) { + pol->pd_free_fn(blkg->pd[pol->plid]); + blkg->pd[pol->plid] = NULL; + } spin_unlock(>blkcg->lock); } @@ -1147,9 +1151,6 @@ int blkcg_policy_register(struct blkcg_policy *pol) struct blkcg *blkcg; int i, ret; - if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) - return -EINVAL; - mutex_lock(_pol_register_mutex); mutex_lock(_pol_mutex); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b231935..f1dd691 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -403,6 +403,11 @@ static void throtl_service_queue_exit(struct throtl_service_queue *sq) del_timer_sync(>pending_timer); } +static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) +{ + return kzalloc_node(sizeof(struct throtl_grp), gfp, node); +} + static void throtl_pd_init(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -493,6 +498,11 @@ static void throtl_pd_exit(struct blkcg_gq *blkg) throtl_service_queue_exit(>service_queue); } +static void throtl_pd_free(struct blkg_policy_data *pd) +{ + kfree(pd); +} + static void throtl_pd_reset_stats(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -1468,12 +1478,13 @@ static void throtl_shutdown_wq(struct request_queue *q) } static struct blkcg_policy blkcg_policy_throtl = { - .pd_size= sizeof(struct throtl_grp),
[PATCH 05/11] blkcg: make blkcg_activate_policy() allow NULL ->pd_init_fn
blkg_create() allows NULL ->pd_init_fn() but blkcg_activate_policy() doesn't. As both in-kernel policies implement ->pd_init_fn, it currently doesn't break anything. Update blkcg_activate_policy() so that its behavior is consistent with blkg_create(). Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5dbbacd..b558705 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1079,7 +1079,8 @@ int blkcg_activate_policy(struct request_queue *q, blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; - pol->pd_init_fn(blkg); + if (pol->pd_init_fn) + pol->pd_init_fn(blkg); } __set_bit(pol->plid, q->blkcg_pols); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 09/11] blkcg: make blkcg_policy methods take a pointer to blkcg_policy_data
The newly added ->pd_alloc_fn() and ->pd_free_fn() deal with pd (blkg_policy_data) while the older ones use blkg (blkcg_gq). As using blkg doesn't make sense for ->pd_alloc_fn() and after allocation pd can always be mapped to blkg and given that these are policy-specific methods, it makes sense to converge on pd. This patch makes all methods deal with pd instead of blkg. Most conversions are trivial. In blk-cgroup.c, a couple method invocation sites now test whether pd exists instead of policy state for consistency. This shouldn't cause any behavioral differences. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 18 -- block/blk-throttle.c | 13 +++-- block/cfq-iosched.c| 14 +++--- include/linux/blk-cgroup.h | 8 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e509bc8..d18cdb6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -242,7 +242,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_init_fn) - pol->pd_init_fn(blkg); + pol->pd_init_fn(blkg->pd[i]); } /* insert */ @@ -256,7 +256,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_online_fn) - pol->pd_online_fn(blkg); + pol->pd_online_fn(blkg->pd[i]); } } blkg->online = true; @@ -347,7 +347,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && pol->pd_offline_fn) - pol->pd_offline_fn(blkg); + pol->pd_offline_fn(blkg->pd[i]); } blkg->online = false; @@ -468,9 +468,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; - if (blkcg_policy_enabled(blkg->q, pol) && - pol->pd_reset_stats_fn) - pol->pd_reset_stats_fn(blkg); + if (blkg->pd[i] && pol->pd_reset_stats_fn) + pol->pd_reset_stats_fn(blkg->pd[i]); } } @@ -1072,7 +1071,7 @@ int blkcg_activate_policy(struct request_queue *q, pd->blkg = blkg; pd->plid = pol->plid; if (pol->pd_init_fn) - pol->pd_init_fn(blkg); + pol->pd_init_fn(pd); } __set_bit(pol->plid, q->blkcg_pols); @@ -1112,10 +,9 @@ void blkcg_deactivate_policy(struct request_queue *q, /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(>blkcg->lock); - if (pol->pd_offline_fn) - pol->pd_offline_fn(blkg); - if (blkg->pd[pol->plid]) { + if (pol->pd_offline_fn) + pol->pd_offline_fn(blkg->pd[pol->plid]); pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c3a235b..c2c7547 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -377,9 +377,10 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) return >pd; } -static void throtl_pd_init(struct blkcg_gq *blkg) +static void throtl_pd_init(struct blkg_policy_data *pd) { - struct throtl_grp *tg = blkg_to_tg(blkg); + struct throtl_grp *tg = pd_to_tg(pd); + struct blkcg_gq *blkg = tg_to_blkg(tg); struct throtl_data *td = blkg->q->td; struct throtl_service_queue *sq = >service_queue; @@ -417,13 +418,13 @@ static void tg_update_has_rules(struct throtl_grp *tg) (tg->bps[rw] != -1 || tg->iops[rw] != -1); } -static void throtl_pd_online(struct blkcg_gq *blkg) +static void throtl_pd_online(struct blkg_policy_data *pd) { /* * We don't want new groups to escape the limits of its ancestors. * Update has_rules[] after a new group is brought online. */ - tg_update_has_rules(blkg_to_tg(blkg)); + tg_update_has_rules(pd_to_tg(pd)); } static void throtl_pd_free(struct blkg_policy_data *pd) @@ -435,9 +436,9 @@ static void throtl_pd_free(struct blkg_policy_data *pd) kfree(tg); } -static void throtl_pd_reset_stats(struct blkcg_gq *blkg) +static void throtl_pd_reset_stats(struct blkg_policy_data *pd) { - struct throtl_grp *tg = blkg_to_tg(blkg); +
[PATCH 08/11] blk-throttle: clean up blkg_policy_data alloc/init/exit/free methods
With the recent addition of alloc and free methods, things became messier. This patch reorganizes them according to the followings. * ->pd_alloc_fn() Responsible for allocation and static initializations - the ones which can be done independent of where the pd might be attached. * ->pd_init_fn() Initializations which require the knowledge of where the pd is attached. * ->pd_free_fn() The counter part of pd_alloc_fn(). Static de-init and freeing. This leaves ->pd_exit_fn() without any users. Removed. While at it, collapse an one liner function throtl_pd_exit(), which has only one user, into its user. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 11 - block/blk-throttle.c | 57 -- block/cfq-iosched.c| 15 include/linux/blk-cgroup.h | 2 -- 4 files changed, 31 insertions(+), 54 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9d83623..e509bc8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -402,15 +402,6 @@ static void blkg_destroy_all(struct request_queue *q) void __blkg_release_rcu(struct rcu_head *rcu_head) { struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); - int i; - - /* tell policies that this one is being freed */ - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - - if (blkg->pd[i] && pol->pd_exit_fn) - pol->pd_exit_fn(blkg); - } /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(>blkcg->css); @@ -1123,8 +1114,6 @@ void blkcg_deactivate_policy(struct request_queue *q, if (pol->pd_offline_fn) pol->pd_offline_fn(blkg); - if (pol->pd_exit_fn) - pol->pd_exit_fn(blkg); if (blkg->pd[pol->plid]) { pol->pd_free_fn(blkg->pd[pol->plid]); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3c86976..c3a235b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -330,26 +330,19 @@ static struct bio *throtl_pop_queued(struct list_head *queued, } /* init a service_queue, assumes the caller zeroed it */ -static void throtl_service_queue_init(struct throtl_service_queue *sq, - struct throtl_service_queue *parent_sq) +static void throtl_service_queue_init(struct throtl_service_queue *sq) { INIT_LIST_HEAD(>queued[0]); INIT_LIST_HEAD(>queued[1]); sq->pending_tree = RB_ROOT; - sq->parent_sq = parent_sq; setup_timer(>pending_timer, throtl_pending_timer_fn, (unsigned long)sq); } -static void throtl_service_queue_exit(struct throtl_service_queue *sq) -{ - del_timer_sync(>pending_timer); -} - static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) { struct throtl_grp *tg; - int cpu; + int rw, cpu; tg = kzalloc_node(sizeof(*tg), gfp, node); if (!tg) @@ -361,6 +354,19 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) return NULL; } + throtl_service_queue_init(>service_queue); + + for (rw = READ; rw <= WRITE; rw++) { + throtl_qnode_init(>qnode_on_self[rw], tg); + throtl_qnode_init(>qnode_on_parent[rw], tg); + } + + RB_CLEAR_NODE(>rb_node); + tg->bps[READ] = -1; + tg->bps[WRITE] = -1; + tg->iops[READ] = -1; + tg->iops[WRITE] = -1; + for_each_possible_cpu(cpu) { struct tg_stats_cpu *stats_cpu = per_cpu_ptr(tg->stats_cpu, cpu); @@ -375,8 +381,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_data *td = blkg->q->td; - struct throtl_service_queue *parent_sq; - int rw; + struct throtl_service_queue *sq = >service_queue; /* * If on the default hierarchy, we switch to properly hierarchical @@ -391,25 +396,10 @@ static void throtl_pd_init(struct blkcg_gq *blkg) * Limits of a group don't interact with limits of other groups * regardless of the position of the group in the hierarchy. */ - parent_sq = >service_queue; - + sq->parent_sq = >service_queue; if (cgroup_on_dfl(blkg->blkcg->css.cgroup) && blkg->parent) - parent_sq = _to_tg(blkg->parent)->service_queue; - - throtl_service_queue_init(>service_queue, parent_sq); - - for (rw = READ; rw <= WRITE; rw++) { - throtl_qnode_init(>qnode_on_self[rw], tg); - throtl_qnode_init(>qnode_on_parent[rw], tg); - } - - RB_CLEAR_NODE(>rb_node); + sq->parent_sq = _to_tg(blkg->parent)->service_queue; tg->td = td; - - tg->bps[READ] = -1; -
[PATCH 11/11] blkcg: replace blkcg_policy->cpd_size with ->cpd_alloc/free_fn() methods
Each active policy has a cpd (blkcg_policy_data) on each blkcg. The cpd's were allocated by blkcg core and each policy could request to allocate extra space at the end by setting blkcg_policy->cpd_size larger than the size of cpd. This is a bit unusual but blkg (blkcg_gq) policy data used to be handled this way too so it made sense to be consistent; however, blkg policy data switched to alloc/free callbacks. This patch makes similar changes to cpd handling. blkcg_policy->cpd_alloc/free_fn() are added to replace ->cpd_size. As cpd allocation is now done from policy side, it can simply allocate a larger area which embeds cpd at the beginning. As ->cpd_alloc_fn() may be able to perform all necessary initializations, this patch makes ->cpd_init_fn() optional. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini --- block/blk-cgroup.c | 39 --- block/cfq-iosched.c| 19 ++- include/linux/blk-cgroup.h | 17 ++--- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8173e06..48d95ca 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -813,11 +813,15 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) int i; mutex_lock(_pol_mutex); + list_del(>all_blkcgs_node); - mutex_unlock(_pol_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) - kfree(blkcg->cpd[i]); + if (blkcg->cpd[i]) + blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]); + + mutex_unlock(_pol_mutex); + kfree(blkcg); } @@ -850,18 +854,18 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) * check if the policy requires any specific per-cgroup * data: if it does, allocate and initialize it. */ - if (!pol || !pol->cpd_size) + if (!pol || !pol->cpd_alloc_fn) continue; - BUG_ON(blkcg->cpd[i]); - cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + cpd = pol->cpd_alloc_fn(GFP_KERNEL); if (!cpd) { ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } blkcg->cpd[i] = cpd; cpd->plid = i; - pol->cpd_init_fn(cpd); + if (pol->cpd_init_fn) + pol->cpd_init_fn(cpd); } spin_lock_init(>lock); @@ -877,7 +881,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) free_pd_blkcg: for (i--; i >= 0; i--) - kfree(blkcg->cpd[i]); + if (blkcg->cpd[i]) + blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]); free_blkcg: kfree(blkcg); mutex_unlock(_pol_mutex); @@ -1154,11 +1159,11 @@ int blkcg_policy_register(struct blkcg_policy *pol) blkcg_policy[pol->plid] = pol; /* allocate and install cpd's */ - if (pol->cpd_size) { + if (pol->cpd_alloc_fn) { list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) { struct blkcg_policy_data *cpd; - cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + cpd = pol->cpd_alloc_fn(GFP_KERNEL); if (!cpd) { mutex_unlock(_pol_mutex); goto err_free_cpds; @@ -1180,10 +1185,12 @@ int blkcg_policy_register(struct blkcg_policy *pol) return 0; err_free_cpds: - if (pol->cpd_size) { + if (pol->cpd_alloc_fn) { list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) { - kfree(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; + if (blkcg->cpd[pol->plid]) { + pol->cpd_free_fn(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; + } } } blkcg_policy[pol->plid] = NULL; @@ -1216,10 +1223,12 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) /* remove cpds and unregister */ mutex_lock(_pol_mutex); - if (pol->cpd_size) { + if (pol->cpd_alloc_fn) { list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) { - kfree(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; + if (blkcg->cpd[pol->plid]) { + pol->cpd_free_fn(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; + } } } blkcg_policy[pol->plid] = NULL; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index dd6ea9e..a4429b3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1568,6 +1568,16 @@ static void
[PATCH 03/11] blkcg: remove unnecessary blkcg_root handling from css_alloc/free paths
blkcg_css_alloc() bypasses policy data allocation and blkcg_css_free() bypasses policy data and blkcg freeing for blkcg_root. There's no reason to to treat policy data any differently for blkcg_root. If the root css gets allocated after policies are registered, policy registration path will add policy data; otherwise, the alloc path will. The free path isn't never invoked for root csses. This patch removes the unnecessary special handling of blkcg_root from css_alloc/free paths. Signed-off-by: Tejun Heo Cc: Vivek Goyal --- block/blk-cgroup.c | 25 ++--- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 64cc48f..2a493ce 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -819,18 +819,15 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); + int i; mutex_lock(_pol_mutex); list_del(>all_blkcgs_node); mutex_unlock(_pol_mutex); - if (blkcg != _root) { - int i; - - for (i = 0; i < BLKCG_MAX_POLS; i++) - kfree(blkcg->pd[i]); - kfree(blkcg); - } + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkcg->pd[i]); + kfree(blkcg); } static struct cgroup_subsys_state * @@ -844,13 +841,12 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) if (!parent_css) { blkcg = _root; - goto done; - } - - blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); - if (!blkcg) { - ret = ERR_PTR(-ENOMEM); - goto free_blkcg; + } else { + blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); + if (!blkcg) { + ret = ERR_PTR(-ENOMEM); + goto free_blkcg; + } } for (i = 0; i < BLKCG_MAX_POLS ; i++) { @@ -877,7 +873,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } -done: spin_lock_init(>lock); INIT_RADIX_TREE(>blkg_tree, GFP_NOWAIT); INIT_HLIST_HEAD(>blkg_list); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 10/11] blkcg: cosmetic updates about blkcg_policy_data
* Rename blkcg->pd[] to blkcg->cpd[] so that cpd is consistently used for blkcg_policy_data. * Make blkcg_policy->cpd_init_fn() take blkcg_policy_data instead of blkcg. This makes it consistent with blkg_policy_data methods and to-be-added cpd alloc/free methods. * blkcg_policy_data->blkcg and cpd_to_blkcg() added so that cpd_init_fn() can determine the associated blkcg from blkcg_policy_data. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini --- block/blk-cgroup.c | 22 +++--- block/cfq-iosched.c| 11 +-- include/linux/blk-cgroup.h | 14 ++ 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index d18cdb6..8173e06 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -817,7 +817,7 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) mutex_unlock(_pol_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) - kfree(blkcg->pd[i]); + kfree(blkcg->cpd[i]); kfree(blkcg); } @@ -853,15 +853,15 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) if (!pol || !pol->cpd_size) continue; - BUG_ON(blkcg->pd[i]); + BUG_ON(blkcg->cpd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } - blkcg->pd[i] = cpd; + blkcg->cpd[i] = cpd; cpd->plid = i; - pol->cpd_init_fn(blkcg); + pol->cpd_init_fn(cpd); } spin_lock_init(>lock); @@ -877,7 +877,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) free_pd_blkcg: for (i--; i >= 0; i--) - kfree(blkcg->pd[i]); + kfree(blkcg->cpd[i]); free_blkcg: kfree(blkcg); mutex_unlock(_pol_mutex); @@ -1164,9 +1164,9 @@ int blkcg_policy_register(struct blkcg_policy *pol) goto err_free_cpds; } - blkcg->pd[pol->plid] = cpd; + blkcg->cpd[pol->plid] = cpd; cpd->plid = pol->plid; - pol->cpd_init_fn(blkcg); + pol->cpd_init_fn(cpd); } } @@ -1182,8 +1182,8 @@ int blkcg_policy_register(struct blkcg_policy *pol) err_free_cpds: if (pol->cpd_size) { list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) { - kfree(blkcg->pd[pol->plid]); - blkcg->pd[pol->plid] = NULL; + kfree(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; } } blkcg_policy[pol->plid] = NULL; @@ -1218,8 +1218,8 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) if (pol->cpd_size) { list_for_each_entry(blkcg, _blkcgs, all_blkcgs_node) { - kfree(blkcg->pd[pol->plid]); - blkcg->pd[pol->plid] = NULL; + kfree(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; } } blkcg_policy[pol->plid] = NULL; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 95e6b0c..dd6ea9e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -220,7 +220,7 @@ struct cfqg_stats { /* Per-cgroup data */ struct cfq_group_data { /* must be the first member */ - struct blkcg_policy_data pd; + struct blkcg_policy_data cpd; unsigned int weight; unsigned int leaf_weight; @@ -612,7 +612,7 @@ static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) static struct cfq_group_data *cpd_to_cfqgd(struct blkcg_policy_data *cpd) { - return cpd ? container_of(cpd, struct cfq_group_data, pd) : NULL; + return cpd ? container_of(cpd, struct cfq_group_data, cpd) : NULL; } static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) @@ -1568,12 +1568,11 @@ static void cfqg_stats_init(struct cfqg_stats *stats) #endif } -static void cfq_cpd_init(const struct blkcg *blkcg) +static void cfq_cpd_init(struct blkcg_policy_data *cpd) { - struct cfq_group_data *cgd = - cpd_to_cfqgd(blkcg->pd[blkcg_policy_cfq.plid]); + struct cfq_group_data *cgd = cpd_to_cfqgd(cpd); - if (blkcg == _root) { + if (cpd_to_blkcg(cpd) == _root) { cgd->weight = 2 * CFQ_WEIGHT_DEFAULT; cgd->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; } else { diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index ddd4b8b..7988d47 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -45,7 +45,7 @@ struct blkcg { struct blkcg_gq *blkg_hint; struct
[PATCH 01/11] blkcg: remove unnecessary request_list->blkg NULL test in blk_put_rl()
Since ec13b1d6f0a0 ("blkcg: always create the blkcg_gq for the root blkcg"), a request_list always has its blkg associated. Drop unnecessary rl->blkg NULL test from blk_put_rl(). Signed-off-by: Tejun Heo Cc: Vivek Goyal --- include/linux/blk-cgroup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1b62d76..9711fc2 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -394,8 +394,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, */ static inline void blk_put_rl(struct request_list *rl) { - /* root_rl may not have blkg set */ - if (rl->blkg && rl->blkg->blkcg != _root) + if (rl->blkg->blkcg != _root) blkg_put(rl->blkg); } -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] iio: Drop owner assignment from i2c_driver
On 10/07/15 06:54, Krzysztof Kozlowski wrote: > i2c_driver does not need to set an owner because i2c_register_driver() > will set it. > > Signed-off-by: Krzysztof Kozlowski > Applied to the togreg branch of iio.git Thanks, Jonathan > --- > > The coccinelle script which generated the patch was sent here: > http://www.spinics.net/lists/kernel/msg2029903.html > --- > drivers/iio/accel/bma180.c | 1 - > drivers/iio/accel/st_accel_i2c.c | 1 - > drivers/iio/adc/mcp3422.c | 1 - > drivers/iio/adc/ti-adc081c.c | 1 - > drivers/iio/dac/ad5064.c | 1 - > drivers/iio/dac/ad5380.c | 1 - > drivers/iio/dac/ad5446.c | 1 - > drivers/iio/dac/max5821.c | 1 - > drivers/iio/gyro/itg3200_core.c| 1 - > drivers/iio/gyro/st_gyro_i2c.c | 1 - > drivers/iio/humidity/si7005.c | 1 - > drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 1 - > drivers/iio/light/apds9300.c | 1 - > drivers/iio/light/bh1750.c | 1 - > drivers/iio/light/cm32181.c| 1 - > drivers/iio/light/cm3232.c | 1 - > drivers/iio/light/cm36651.c| 1 - > drivers/iio/light/gp2ap020a00f.c | 1 - > drivers/iio/light/isl29125.c | 1 - > drivers/iio/light/jsa1212.c| 1 - > drivers/iio/light/ltr501.c | 1 - > drivers/iio/light/tcs3414.c| 1 - > drivers/iio/light/tcs3472.c| 1 - > drivers/iio/light/tsl4531.c| 1 - > drivers/iio/light/vcnl4000.c | 1 - > drivers/iio/magnetometer/st_magn_i2c.c | 1 - > drivers/iio/pressure/ms5611_i2c.c | 1 - > drivers/iio/pressure/st_pressure_i2c.c | 1 - > drivers/iio/temperature/mlx90614.c | 1 - > drivers/iio/temperature/tmp006.c | 1 - > 30 files changed, 30 deletions(-) > > diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c > index 75c6d2103e07..f04b88406995 100644 > --- a/drivers/iio/accel/bma180.c > +++ b/drivers/iio/accel/bma180.c > @@ -846,7 +846,6 @@ MODULE_DEVICE_TABLE(i2c, bma180_ids); > static struct i2c_driver bma180_driver = { > .driver = { > .name = "bma180", > - .owner = THIS_MODULE, > .pm = BMA180_PM_OPS, > }, > .probe = bma180_probe, > diff --git a/drivers/iio/accel/st_accel_i2c.c > b/drivers/iio/accel/st_accel_i2c.c > index d4ad72ca4a3d..a2f1c20319eb 100644 > --- a/drivers/iio/accel/st_accel_i2c.c > +++ b/drivers/iio/accel/st_accel_i2c.c > @@ -122,7 +122,6 @@ MODULE_DEVICE_TABLE(i2c, st_accel_id_table); > > static struct i2c_driver st_accel_driver = { > .driver = { > - .owner = THIS_MODULE, > .name = "st-accel-i2c", > .of_match_table = of_match_ptr(st_accel_of_match), > }, > diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c > index b96c636470ef..3555122008b4 100644 > --- a/drivers/iio/adc/mcp3422.c > +++ b/drivers/iio/adc/mcp3422.c > @@ -404,7 +404,6 @@ MODULE_DEVICE_TABLE(of, mcp3422_of_match); > static struct i2c_driver mcp3422_driver = { > .driver = { > .name = "mcp3422", > - .owner = THIS_MODULE, > .of_match_table = of_match_ptr(mcp3422_of_match), > }, > .probe = mcp3422_probe, > diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c > index b3a82b4d1a75..2c8374f86252 100644 > --- a/drivers/iio/adc/ti-adc081c.c > +++ b/drivers/iio/adc/ti-adc081c.c > @@ -140,7 +140,6 @@ MODULE_DEVICE_TABLE(of, adc081c_of_match); > static struct i2c_driver adc081c_driver = { > .driver = { > .name = "adc081c", > - .owner = THIS_MODULE, > .of_match_table = of_match_ptr(adc081c_of_match), > }, > .probe = adc081c_probe, > diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c > index f03b92fd3803..c067e6821496 100644 > --- a/drivers/iio/dac/ad5064.c > +++ b/drivers/iio/dac/ad5064.c > @@ -630,7 +630,6 @@ MODULE_DEVICE_TABLE(i2c, ad5064_i2c_ids); > static struct i2c_driver ad5064_i2c_driver = { > .driver = { > .name = "ad5064", > -.owner = THIS_MODULE, > }, > .probe = ad5064_i2c_probe, > .remove = ad5064_i2c_remove, > diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c > index 9de4c4d38280..130de9b3e0bf 100644 > --- a/drivers/iio/dac/ad5380.c > +++ b/drivers/iio/dac/ad5380.c > @@ -593,7 +593,6 @@ MODULE_DEVICE_TABLE(i2c, ad5380_i2c_ids); > static struct i2c_driver ad5380_i2c_driver = { > .driver = { > .name = "ad5380", > -.owner = THIS_MODULE, > }, > .probe = ad5380_i2c_probe, > .remove = ad5380_i2c_remove, > diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c > index 46bb62a5c1d4..07e17d72a3f3 100644
Re: [PATCH 1/3] staging: iio: Drop owner assignment from i2c_driver
On 10/07/15 07:34, Krzysztof Kozlowski wrote: > i2c_driver does not need to set an owner because i2c_register_driver() > will set it. > > Signed-off-by: Krzysztof Kozlowski Applied to the togreg branch of iio.git Thanks, Jonathan > > --- > > The coccinelle script which generated the patch was sent here: > http://www.spinics.net/lists/kernel/msg2029903.html > --- > drivers/staging/iio/addac/adt7316-i2c.c | 1 - > drivers/staging/iio/light/isl29018.c| 1 - > drivers/staging/iio/light/isl29028.c| 1 - > 3 files changed, 3 deletions(-) > > diff --git a/drivers/staging/iio/addac/adt7316-i2c.c > b/drivers/staging/iio/addac/adt7316-i2c.c > index 75ddd4f801a3..78fe0b557280 100644 > --- a/drivers/staging/iio/addac/adt7316-i2c.c > +++ b/drivers/staging/iio/addac/adt7316-i2c.c > @@ -124,7 +124,6 @@ static struct i2c_driver adt7316_driver = { > .driver = { > .name = "adt7316", > .pm = ADT7316_PM_OPS, > - .owner = THIS_MODULE, > }, > .probe = adt7316_i2c_probe, > .id_table = adt7316_i2c_id, > diff --git a/drivers/staging/iio/light/isl29018.c > b/drivers/staging/iio/light/isl29018.c > index e646c5d24004..019ba5245c23 100644 > --- a/drivers/staging/iio/light/isl29018.c > +++ b/drivers/staging/iio/light/isl29018.c > @@ -838,7 +838,6 @@ static struct i2c_driver isl29018_driver = { > .name = "isl29018", > .acpi_match_table = ACPI_PTR(isl29018_acpi_match), > .pm = ISL29018_PM_OPS, > - .owner = THIS_MODULE, > .of_match_table = isl29018_of_match, > }, > .probe = isl29018_probe, > diff --git a/drivers/staging/iio/light/isl29028.c > b/drivers/staging/iio/light/isl29028.c > index e5b2fdc2334b..cd6f2727aa58 100644 > --- a/drivers/staging/iio/light/isl29028.c > +++ b/drivers/staging/iio/light/isl29028.c > @@ -547,7 +547,6 @@ static struct i2c_driver isl29028_driver = { > .class = I2C_CLASS_HWMON, > .driver = { > .name = "isl29028", > - .owner = THIS_MODULE, > .of_match_table = isl29028_of_match, > }, > .probe = isl29028_probe, > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Staging: iio: dummy: Fix blank line warnings
On 10/07/15 15:10, Cristina Opriceana wrote: > Multiple blank lines should not be used as indicated by checkpatch.pl. > Also, a line should be used after a function/structure declaration. > > Signed-off-by: Cristina Opriceana Applied to the togreg branch of iio.git Thanks, Jonathan > --- > drivers/staging/iio/iio_dummy_evgen.c | 1 + > drivers/staging/iio/iio_simple_dummy.c| 2 -- > drivers/staging/iio/iio_simple_dummy.h| 1 + > drivers/staging/iio/iio_simple_dummy_buffer.c | 2 +- > 4 files changed, 3 insertions(+), 3 deletions(-) > > diff --git a/drivers/staging/iio/iio_dummy_evgen.c > b/drivers/staging/iio/iio_dummy_evgen.c > index c54d5b5..6d38854 100644 > --- a/drivers/staging/iio/iio_dummy_evgen.c > +++ b/drivers/staging/iio/iio_dummy_evgen.c > @@ -214,6 +214,7 @@ static struct device iio_evgen_dev = { > .groups = iio_evgen_groups, > .release = _evgen_release, > }; > + > static __init int iio_dummy_evgen_init(void) > { > int ret = iio_dummy_evgen_create(); > diff --git a/drivers/staging/iio/iio_simple_dummy.c > b/drivers/staging/iio/iio_simple_dummy.c > index 1629a8a..381f90f 100644 > --- a/drivers/staging/iio/iio_simple_dummy.c > +++ b/drivers/staging/iio/iio_simple_dummy.c > @@ -611,7 +611,6 @@ static int iio_dummy_probe(int index) >*/ > iio_dummy_devs[index] = indio_dev; > > - > /* >* Set the device name. >* > @@ -675,7 +674,6 @@ static void iio_dummy_remove(int index) >*/ > struct iio_dev *indio_dev = iio_dummy_devs[index]; > > - > /* Unregister the device */ > iio_device_unregister(indio_dev); > > diff --git a/drivers/staging/iio/iio_simple_dummy.h > b/drivers/staging/iio/iio_simple_dummy.h > index e877a99..8d00224 100644 > --- a/drivers/staging/iio/iio_simple_dummy.h > +++ b/drivers/staging/iio/iio_simple_dummy.h > @@ -119,6 +119,7 @@ static inline int > iio_simple_dummy_configure_buffer(struct iio_dev *indio_dev) > { > return 0; > }; > + > static inline > void iio_simple_dummy_unconfigure_buffer(struct iio_dev *indio_dev) > {}; > diff --git a/drivers/staging/iio/iio_simple_dummy_buffer.c > b/drivers/staging/iio/iio_simple_dummy_buffer.c > index a651b89..00ed774 100644 > --- a/drivers/staging/iio/iio_simple_dummy_buffer.c > +++ b/drivers/staging/iio/iio_simple_dummy_buffer.c > @@ -32,6 +32,7 @@ static const s16 fakedata[] = { > [diffvoltage3m4] = -2, > [accelx] = 344, > }; > + > /** > * iio_simple_dummy_trigger_h() - the trigger handler function > * @irq: the interrupt number > @@ -178,7 +179,6 @@ error_free_buffer: > iio_kfifo_free(indio_dev->buffer); > error_ret: > return ret; > - > } > > /** > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 1/8] i2c: core: Add support for best effort block read emulation
On 10/07/15 18:14, Tirdea, Irina wrote: > > >> -Original Message- >> From: Jonathan Cameron [mailto:ji...@kernel.org] >> Sent: 05 July, 2015 14:59 >> To: Tirdea, Irina; Wolfram Sang; linux-...@vger.kernel.org; >> linux-...@vger.kernel.org >> Cc: linux-kernel@vger.kernel.org; Pandruvada, Srinivas; Peter Meerwald >> Subject: Re: [PATCH v3 1/8] i2c: core: Add support for best effort block >> read emulation >> >> On 03/07/15 10:33, Irina Tirdea wrote: >>> There are devices that need to handle block transactions >>> regardless of the capabilities exported by the adapter. >>> For performance reasons, they need to use i2c read blocks >>> if available, otherwise emulate the block transaction with word >>> or byte transactions. >>> >>> Add support for a helper function that would read a data block >>> using the best transfer available: I2C_FUNC_SMBUS_READ_I2C_BLOCK, >>> I2C_FUNC_SMBUS_READ_WORD_DATA or I2C_FUNC_SMBUS_READ_BYTE_DATA. >>> >>> Signed-off-by: Irina Tirdea >> Looks good to me - I vaguely wondered if it would make sense to use >> an endian conversion in the word case, but as we have possible odd >> numbers of bytes that gets fiddly. >> > > Thanks for the review, Jonathan! > >> I wonder what devices do if you do a word read beyond their end address? >> Perhaps in odd cases we should always fall back to byte reads? > > In my tests I can read beyond the end address, but I cannot be sure if this > is OK for all > devices. This was actually a suggestion from Wolfram for v1, but maybe I'm > missing > something. > > Wolfram, is it safe to read one byte beyond the end address or should I > better use > only byte reads for odd lengths? > >> >>> --- >>> drivers/i2c/i2c-core.c | 60 >>> ++ >>> include/linux/i2c.h| 3 +++ >>> 2 files changed, 63 insertions(+) >>> >>> diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c >>> index 96771ea..55a3455 100644 >>> --- a/drivers/i2c/i2c-core.c >>> +++ b/drivers/i2c/i2c-core.c >>> @@ -2914,6 +2914,66 @@ trace: >>> } >>> EXPORT_SYMBOL(i2c_smbus_xfer); >>> >>> +/** >>> + * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate >>> + * @client: Handle to slave device >>> + * @command: Byte interpreted by slave >>> + * @length: Size of data block; SMBus allows at most 32 bytes >>> + * @values: Byte array into which data will be read; big enough to hold >>> + * the data returned by the slave. SMBus allows at most 32 bytes. >>> + * >>> + * This executes the SMBus "block read" protocol if supported by the >>> adapter. >>> + * If block read is not supported, it emulates it using either word or byte >>> + * read protocols depending on availability. >>> + * >>> + * Before using this function you must double-check if the I2C slave does >>> + * support exchanging a block transfer with a byte transfer. >> Add something here about addressing assumptions. You get odd devices which >> will give bulk reads of addresses not mapped to a nice linear region when >> you do byte reads. > > OK, I'll add this to the comment above: > "The addresses of the I2C slave device that are accessed with this function > must be mapped to a linear region, so that a block read will have the same > effect as a byte read." > Works for me. > Thanks, > Irina > >>> + */ >>> +s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client >>> *client, >>> + u8 command, u8 length, u8 *values) >>> +{ >>> + u8 i; >>> + int status; >>> + >>> + if (length > I2C_SMBUS_BLOCK_MAX) >>> + length = I2C_SMBUS_BLOCK_MAX; >>> + >>> + if (i2c_check_functionality(client->adapter, >>> + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { >>> + return i2c_smbus_read_i2c_block_data(client, command, >>> +length, values); >>> + } else if (i2c_check_functionality(client->adapter, >>> + I2C_FUNC_SMBUS_READ_WORD_DATA)) { >>> + for (i = 0; i < length; i += 2) { >>> + status = i2c_smbus_read_word_data(client, command + i); >>> + if (status < 0) >>> + return status; >>> + values[i] = status & 0xff; >>> + if ((i + 1) < length) >>> + values[i + 1] = status >> 8; >>> + } >>> + if (i > length) >>> + return length; >>> + return i; >>> + } else if (i2c_check_functionality(client->adapter, >>> + I2C_FUNC_SMBUS_READ_BYTE_DATA)) { >>> + for (i = 0; i < length; i++) { >>> + status = i2c_smbus_read_byte_data(client, command + i); >>> + if (status < 0) >>> + return status; >>> + values[i] = status; >>> + } >>> + return i; >>> + } >>> + >>> +
Re: [PATCH v2 2/2] ARM: dts: vfxxx: Add property for minimum sample time
On 10/07/15 19:06, maitysancha...@gmail.com wrote: > Hello Shawn, > > On 15-07-10 16:53:24, Shawn Guo wrote: >> On Wed, Jun 24, 2015 at 02:03:41PM +0530, Sanchayan Maity wrote: >>> Add a device tree property which allows to specify the minimum sample >>> time which can be used to calculate the actual ADC cycles required >>> depending on the hardware. >>> >>> Signed-off-by: Sanchayan Maity >>> --- >>> arch/arm/boot/dts/vfxxx.dtsi | 2 ++ >>> 1 file changed, 2 insertions(+) >>> >>> diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi >>> index 90a03d5..71d9c08 100644 >>> --- a/arch/arm/boot/dts/vfxxx.dtsi >>> +++ b/arch/arm/boot/dts/vfxxx.dtsi >>> @@ -229,6 +229,7 @@ >>> status = "disabled"; >>> fsl,adck-max-frequency = <3000>, <4000>, >>> <2000>; >>> + min-sample-time = <1000>; >>> }; >>> >>> wdoga5: wdog@4003e000 { >>> @@ -447,6 +448,7 @@ >>> status = "disabled"; >>> fsl,adck-max-frequency = <3000>, <4000>, >>> <2000>; >>> + min-sample-time = <1000>; >> >> Can we code 1000 as the default in kernel driver, so that only boards >> requiring different value need to have this property? Doing so makes >> the property optional rather than required. >> > > Not sure if hardcoding it in the driver is the right approach. If it is a true feature of the device (i.e. if in the case of perfect front end electronics) this is the right option, then a default makes a lot of sense. If that isn't the case (I suspect not) then if we drop it be optional chances are no one will bother thinking about it or trying to tune this at all. Hence seems wrong to put a fairly arbitrary default value on it. However, we do need to still work with old device trees and new kernels so need to cope without it. Hence to my mind, if we had started out with this in the first driver version, then the default would be a bad idea. As we didn't then we really need to cope with nothing specified (as best we can) and so we do need a sensible default (or perhaps even sensible worst case default) in there. > > However if the maintainers and others agree on doing this, I will do > the necessary change. > > Thanks. > > Regards, > Sanchayan. > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH RESEND] iio: adc: rockchip_saradc: add missing MODULE_* data
On 08/07/15 15:17, Heiko Stuebner wrote: > The module-data is currently missing. This includes the license-information > which makes the driver taint the kernel and miss symbols when compiled as > module. > > Fixes: 44d6f2ef94f9 ("iio: adc: add driver for Rockchip saradc") > Signed-off-by: Heiko Stuebner Sorry Heiko, Not entirely sure why I haven't picked this up before. Anyhow, now applied to the fixes-for-4.2 branch of iio.git and marked for stable. I need to catch up with a bit of a backlog, but should get a pull request out to Greg sometime early next week. Jonathan > --- > drivers/iio/adc/rockchip_saradc.c | 4 > 1 file changed, 4 insertions(+) > > diff --git a/drivers/iio/adc/rockchip_saradc.c > b/drivers/iio/adc/rockchip_saradc.c > index 8d4e019..9c311c1 100644 > --- a/drivers/iio/adc/rockchip_saradc.c > +++ b/drivers/iio/adc/rockchip_saradc.c > @@ -349,3 +349,7 @@ static struct platform_driver rockchip_saradc_driver = { > }; > > module_platform_driver(rockchip_saradc_driver); > + > +MODULE_AUTHOR("Heiko Stuebner "); > +MODULE_DESCRIPTION("Rockchip SARADC driver"); > +MODULE_LICENSE("GPL v2"); > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] iio: frequency: adf4350: Delete blank line
On 08/07/15 15:04, Ana Calinov wrote: > This patch removes an unnecessary blank line > found by checkpatch.pl --strict: > Blank lines aren't necessary after an open brace '{'. > > Signed-off-by: Ana Calinov Applied to the togreg branch of iio.git. Initially pushed out as testing for the autobuilders to play with it. Thanks, Jonathan > --- > drivers/iio/frequency/adf4350.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c > index 10a0dfc..9890c81 100644 > --- a/drivers/iio/frequency/adf4350.c > +++ b/drivers/iio/frequency/adf4350.c > @@ -72,7 +72,6 @@ static int adf4350_sync_config(struct adf4350_state *st) > for (i = ADF4350_REG5; i >= ADF4350_REG0; i--) { > if ((st->regs_hw[i] != st->regs[i]) || > ((i == ADF4350_REG0) && doublebuf)) { > - > switch (i) { > case ADF4350_REG1: > case ADF4350_REG4: > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] iio: accel: kxcjk-1013: Remove blank lines
On 08/07/15 13:56, Daniel Baluta wrote: > On Wed, Jul 8, 2015 at 3:44 PM, Ana Calinov wrote: >> This patch fixes the the following errors given by >> checkpatch.pl with --strict: >> Please don't use multiple blank lines. >> Blank lines aren't necessary after an open brace '{'. >> >> Signed-off-by: Ana Calinov > > > Looks good to me. > > Reviewed-by: Daniel Baluta > > Thanks Ana! > > Daniel. > Applied to the togreg branch of iio.git. Will push out as testing for the autobuilders to play with it. A sensible checkpatch based cleanup. Thanks Jonathan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] sysctl: fix int -> unsigned long assignments in INT_MIN case
The following if (val < 0) *lvalp = (unsigned long)-val; is incorrect because the compiler is free to assume -val to be positive and use a sign-extend instruction for extending the bit pattern. This is a problem if val == INT_MIN: # echo -2147483648 >/proc/sys/dev/scsi/logging_level # cat /proc/sys/dev/scsi/logging_level -18446744071562067968 Cast to unsigned long before negation - that way we first sign-extend and then negate an unsigned, which is well defined. With this: # cat /proc/sys/dev/scsi/logging_level -2147483648 Signed-off-by: Ilya Dryomov --- kernel/sysctl.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 19b62b522158..0b4092b9fa00 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1995,7 +1995,7 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int val = *valp; if (val < 0) { *negp = true; - *lvalp = (unsigned long)-val; + *lvalp = -(unsigned long)val; } else { *negp = false; *lvalp = (unsigned long)val; @@ -2201,7 +2201,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, int val = *valp; if (val < 0) { *negp = true; - *lvalp = (unsigned long)-val; + *lvalp = -(unsigned long)val; } else { *negp = false; *lvalp = (unsigned long)val; @@ -2436,7 +2436,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, unsigned long lval; if (val < 0) { *negp = true; - lval = (unsigned long)-val; + lval = -(unsigned long)val; } else { *negp = false; lval = (unsigned long)val; @@ -2459,7 +2459,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp unsigned long lval; if (val < 0) { *negp = true; - lval = (unsigned long)-val; + lval = -(unsigned long)val; } else { *negp = false; lval = (unsigned long)val; @@ -2484,7 +2484,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, unsigned long lval; if (val < 0) { *negp = true; - lval = (unsigned long)-val; + lval = -(unsigned long)val; } else { *negp = false; lval = (unsigned long)val; -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 04/45] clk: efm32gg: Remove clk.h include
On Fri, Jul 10, 2015 at 04:33:02PM -0700, Stephen Boyd wrote: > Clock provider drivers generally shouldn't include clk.h because > it's the consumer API. Remove the include here because this is a > provider driver. > > Cc: Uwe Kleine-König > Signed-off-by: Stephen Boyd Still compiles with your change, so fine. Acked-by: Uwe Kleine-König Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | http://www.pengutronix.de/ | -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] drm/atomic: fix null dereference
We are checking the size of e->event but we were doing it when e is known to be NULL. Signed-off-by: Sudip Mukherjee --- drivers/gpu/drm/drm_atomic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index acebd16..51d3a85 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1311,7 +1311,6 @@ static struct drm_pending_vblank_event *create_vblank_event( e = kzalloc(sizeof *e, GFP_KERNEL); if (e == NULL) { spin_lock_irqsave(>event_lock, flags); - file_priv->event_space += sizeof e->event; spin_unlock_irqrestore(>event_lock, flags); goto out; } -- 1.8.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] sysctl: Fix conversion of INT_MIN for LP64 systems
On Sat, Jul 11, 2015 at 10:35 AM, Robert Xiao wrote: > On LP64 systems, reading a sysctl file containing an INT_MIN (-2147483648) > could incorrectly show -18446744071562067968 due to an incorrect conversion > in do_proc_dointvec_conv. This patch fixes the edge case by converting to > unsigned int first to avoid sign extending INT_MIN to unsigned long. > > Test: > > root:/proc/sys/kernel# echo -2147483648 0 0 0 > printk > root:/proc/sys/kernel# cat printk > > Without patch, produces -18446744071562067968 0 0 0. > With patch, should produce -2147483648 0 0 0. > > Signed-off-by: Robert Xiao > --- > kernel/sysctl.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 19b62b5..464df36 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -1995,10 +1995,10 @@ static int do_proc_dointvec_conv(bool *negp, unsigned > long *lvalp, > int val = *valp; > if (val < 0) { > *negp = true; > - *lvalp = (unsigned long)-val; > + *lvalp = (unsigned int)-val; > } else { > *negp = false; > - *lvalp = (unsigned long)val; > + *lvalp = (unsigned int)val; > } > } > return 0; I don't know why am I CC'ed on this - CC'ing Andrew along with Eric and Kees who seem to have worked directly on sysctl.c not too long ago. That said, I took a look at this and I think this patch is wrong. Casting to unsigned int instead of unsigned long *after* the negation is bogus, because we have if (val < 0) ... *lvalp = (unsigned long)-val; and the compiler is free to assume -val to be positive and use the sign-extend instruction. On gcc (GCC) 4.8.3 20140911 (Red Hat 4.8.3-7) that I have here the cast to unsigned int works only with -O1, with -O2 it goes to town and uses cltq which sign-extends: neg%eax movb $0x1,(%rdi) cltq IMO the right way to do this would be to first cast to unsigned long and then negate - that way we will first sign-extend and then negate an unsigned, which is well defined. Also, this needs to be done not just for do_proc_dointvec_conv(), but for do_proc_dointvec_minmax_conv() and jiffies functions as well (although it's probably virtually impossible to set val to exactly INT_MIN through jiffies write branches). Speaking of write branches, only do_proc_dointvec_conv() does check it's input properly, so that's something to look at. Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: PCI devices (buses?) and 3GB of RAM lost with 4.2rc1
Am 08.07.2015 um 22:09 schrieb Stefan Seyfried: > this is on a Thinkpad X200s, 5 years old and working fine, until 4.2rc1 > came along. > > With that booted, I do not have a WiFi card anymore, it doesn't even > appear in "lspci" output. > From diffing the dmesg's, it also looks like I lost some of my RAM: > > -Memory: 8050048K/8280176K available (6401K kernel code, 980K rwdata, > 4864K rodata, 1532K init, 1516K bss, 230128K reserved, 0K cma-reserved) > +Memory: 5104620K/8280176K available (6823K kernel code, 1096K rwdata, > 3220K rodata, 1556K init, 1520K bss, 227792K reserved, 0K cma-reserved) This was only a one-off thing, it looks like the hardware was confused when first booting 4.2-rc1 (I found out when I wanted to bisect it, all the kernels I built did just work, and then I finally booted the distro-kernel again and it also worked :-) So everything is fine, sorry for the noise. -- -- Stefan Seyfried Linux Consultant & Developer Mail: seyfr...@b1-systems.de GPG Key: 0x731B665B B1 Systems GmbH Osterfeldstraße 7 / 85088 Vohburg / http://www.b1-systems.de GF: Ralph Dehner / Unternehmenssitz: Vohburg / AG: Ingolstadt,HRB 3537 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] net: switchdev: don't abort unsupported operations
On Fri, Jul 10, 2015 at 4:48 PM, Vivien Didelot wrote: > There is no need to abort attribute setting or object addition, if the > prepare phase returned operation not supported. > > Thus, abort these two transactions only if the error is not -EOPNOTSUPP. > > Signed-off-by: Vivien Didelot Acked-by: Scott Feldman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: ipw2100: fix timeout bug - always evaluated to 0
> commit 2c86c275015c ("Add ipw2100 wireless driver.") introduced > HW_PHY_OFF_LOOP_DELAY (HZ / 5000) which always evaluated to 0. Clarified > by Stanislav Yakovlev that it should be 50 > milliseconds thus fixed up to msecs_to_jiffies(50). > > Signed-off-by: Nicholas Mc Guire > Acked-by: Stanislav Yakovlev Thanks, applied to wireless-drivers-next.git. Kalle Valo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] hwmon: (nct7802) Add autopoint attributes
Introduced pwm[1..3]_auto_point[1..5]_temp, pwm[1..3]_auto_point[1..5]_pwm, nct7802_auto_point_attrs, nct7802_auto_point_group. nct7802_auto_point_is_visible, --- Changed in v2: - Removed PWM_REG, TEMP_REG - auto_point[1..4]_temp, auto_point[1..4]_pwm and auto_point_crit_temp expanded and replaced with pwm[1..3]_auto_point[1..5]_temp and pwm[1..3]_auto_point[1..5]_pwm. - Introduced nct7802_auto_point_is_visible - added used sysfs_update_group in store_pwm_enable Signed-off-by: Constantine Shulyupin --- drivers/hwmon/nct7802.c | 179 +++- 1 file changed, 162 insertions(+), 17 deletions(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 2f6bbe5..e1bc7a6 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -130,6 +130,9 @@ static ssize_t show_pwm(struct device *dev, struct device_attribute *devattr, unsigned int val; int ret; + if (!attr->index) + return sprintf(buf, "255\n"); + ret = regmap_read(data->regmap, attr->index, ); if (ret < 0) return ret; @@ -170,23 +173,7 @@ static ssize_t show_pwm_enable(struct device *dev, static ssize_t store_pwm_enable(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct nct7802_data *data = dev_get_drvdata(dev); - struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr); - u8 val; - int ret; - - ret = kstrtou8(buf, 0, ); - if (ret < 0) - return ret; - if (val < 1 || val > 2) - return -EINVAL; - ret = regmap_update_bits(data->regmap, REG_SMARTFAN_EN(sattr->index), -1 << SMARTFAN_EN_SHIFT(sattr->index), -(val - 1) << SMARTFAN_EN_SHIFT(sattr->index)); - return ret ? : count; -} + const char *buf, size_t count); static int nct7802_read_temp(struct nct7802_data *data, u8 reg_temp, u8 reg_temp_low, int *temp) @@ -890,11 +877,169 @@ static struct attribute_group nct7802_pwm_group = { .attrs = nct7802_pwm_attrs, }; +/* 7.2.115... 0x80-0x83, 0x84 Temperature (X-axis) transition */ +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x80, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x81, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point3_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x82, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x83, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point5_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x84, 0); + +/* 7.2.120... 0x85-0x88 PWM (Y-axis) transition */ +static SENSOR_DEVICE_ATTR(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x85); +static SENSOR_DEVICE_ATTR(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x86); +static SENSOR_DEVICE_ATTR(pwm1_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x87); +static SENSOR_DEVICE_ATTR(pwm1_auto_point4_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x88); +static SENSOR_DEVICE_ATTR(pwm1_auto_point5_pwm, S_IRUGO | S_IWUSR, + show_pwm, NULL, 0); + +/* 7.2.124 Table 2 X-axis Transition Point 1 Register */ +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x90, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x91, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point3_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x92, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x93, 0); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point5_temp, S_IRUGO | S_IWUSR, + show_temp, store_temp, 0x94, 0); + +/* 7.2.129 Table 2 Y-axis Transition Point 1 Register */ +static SENSOR_DEVICE_ATTR(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x95); +static SENSOR_DEVICE_ATTR(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x96); +static SENSOR_DEVICE_ATTR(pwm2_auto_point3_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x97); +static SENSOR_DEVICE_ATTR(pwm2_auto_point4_pwm, S_IRUGO | S_IWUSR, + show_pwm, store_pwm, 0x98); +static SENSOR_DEVICE_ATTR(pwm2_auto_point5_pwm, S_IRUGO | S_IWUSR, +
Re: [PATCH 1/5] x86/vm86: Move userspace accesses to do_sys_vm86()
On Fri, Jul 10, 2015 at 10:09 PM, Brian Gerst wrote: > Move the userspace accesses down into the common function in > preparation for the next set of patches. > One thing I don't like about the current code that makes these patches harder to review is the bizarre approach to copying. If you changed this: > - tmp = copy_vm86_regs_from_user(, >regs, > - offsetof(struct kernel_vm86_struct, > vm86plus) - > - sizeof(info.regs)); into a normal field-by-field get_user / copy_from_user (the latter for the big regs struct) then it would be clear what the ABI is and it would be much easier to read the patches and confirm that you aren't accidentally changing the ABI. You could also get rid of the constraint that certain fields in apparently kernel-internal structs had to be in a certain order. Other than that, patches 1-4 look good on cursory inspection. I'll look more carefully later. I need to think about patch 5 more. --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 0/2] kconfig: warn of unhandled characters in Kconfig commands
On Fri, Jul 10, 2015 at 10:25:31AM +0200, Andreas Ruprecht wrote: > This patchset changes the lexer file to emit a warning if any unhandled > characters are found in the input. So far, Kconfig options like > > +config FOO > bool > [...] > > (note the wrong '+'!) were parsed without a warning. As simply adding a > warning for '.' produces lots of warnings as occasionally '---help---' > is used instead of 'help' (and thus '-' is recognized as an unhandled > character), we need to handle '---help---' separately. > > Changes to v1: > - add '---help---' in zconf.gperf instead of special casing > it in zconf.l > > Changes to v2: > - Do no constify char parameter to warn_ignored_character > - Shorten rule definitions for '.' > > Andreas Ruprecht (2): > kconfig: warn of unhandled characters in Kconfig commands > kconfig: Regenerate shipped zconf.{hash,lex}.c files > > scripts/kconfig/zconf.gperf | 1 + > scripts/kconfig/zconf.hash.c_shipped | 58 --- > scripts/kconfig/zconf.l | 20 ++- > scripts/kconfig/zconf.lex.c_shipped | 325 > +-- > 4 files changed, 204 insertions(+), 200 deletions(-) > > -- > 1.9.1 > Looks good to me. I ran the Kconfiglib test suite on it too. Since it simply compares the output of Kconfiglib and the C implementation, it doubles as a good regression test for the C implementation. Cheers, Ulf -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4] smpboot: Allow to pass the cpumask on per-cpu thread registration
It makes the registration cheaper and simpler for the smpboot per-cpu kthread users that don't need to always update the cpumask after threads creation. Reviewed-by: Chris Metcalf Cc: Andrew Morton Cc: Chris Metcalf Cc: Don Zickus Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ulrich Obergfell Signed-off-by: Frederic Weisbecker --- include/linux/smpboot.h | 11 ++- kernel/smpboot.c| 12 kernel/watchdog.c | 9 +++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index da3c593..e6109a6 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -48,7 +48,16 @@ struct smp_hotplug_thread { const char *thread_comm; }; -int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); +int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, + const struct cpumask *cpumask); + +static inline int +smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) +{ + return smpboot_register_percpu_thread_cpumask(plug_thread, + cpu_possible_mask); +} + void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, const struct cpumask *); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 60aa858..d99a41d 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -273,19 +273,22 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) } /** - * smpboot_register_percpu_thread - Register a per_cpu thread related to hotplug + * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related + * to hotplug * @plug_thread: Hotplug thread descriptor + * @cpumask: The cpumask where threads run * * Creates and starts the threads on all online cpus. */ -int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) +int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, + const struct cpumask *cpumask) { unsigned int cpu; int ret = 0; if (!alloc_cpumask_var(_thread->cpumask, GFP_KERNEL)) return -ENOMEM; - cpumask_copy(plug_thread->cpumask, cpu_possible_mask); + cpumask_copy(plug_thread->cpumask, cpumask); get_online_cpus(); mutex_lock(_threads_lock); @@ -296,7 +299,8 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) free_cpumask_var(plug_thread->cpumask); goto out; } - smpboot_unpark_thread(plug_thread, cpu); + if (cpumask_test_cpu(cpu, cpumask)) + smpboot_unpark_thread(plug_thread, cpu); } list_add(_thread->list, _threads); out: diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a6ffa43..e5bb86f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -713,15 +713,12 @@ static int watchdog_enable_all_cpus(void) int err = 0; if (!watchdog_running) { - err = smpboot_register_percpu_thread(_threads); + err = smpboot_register_percpu_thread_cpumask(_threads, +_cpumask); if (err) pr_err("Failed to create watchdog threads, disabled\n"); - else { - if (smpboot_update_cpumask_percpu_thread( - _threads, _cpumask)) - pr_err("Failed to set cpumask for watchdog threads\n"); + else watchdog_running = 1; - } } else { /* * Enable/disable the lockup detectors or -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/