[patch] sched: minimalist select_idle_sibling() bouncing cow syndrome fix

2013-01-26 Thread Mike Galbraith

If the previous CPU is cache affine and idle, select it.

Signed-off-by: Mike Galbraith 
---
 kernel/sched/fair.c |   21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3270,25 +3270,18 @@ find_idlest_cpu(struct sched_group *grou
  */
 static int select_idle_sibling(struct task_struct *p, int target)
 {
-   int cpu = smp_processor_id();
-   int prev_cpu = task_cpu(p);
struct sched_domain *sd;
struct sched_group *sg;
-   int i;
+   int i = task_cpu(p);
 
-   /*
-* If the task is going to be woken-up on this cpu and if it is
-* already idle, then it is the right target.
-*/
-   if (target == cpu && idle_cpu(cpu))
-   return cpu;
+   if (idle_cpu(target))
+   return target;
 
/*
-* If the task is going to be woken-up on the cpu where it previously
-* ran and if it is currently idle, then it the right target.
+* If the prevous cpu is cache affine and idle, don't be stupid.
 */
-   if (target == prev_cpu && idle_cpu(prev_cpu))
-   return prev_cpu;
+   if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
+   return i;
 
/*
 * Otherwise, iterate the domains and find an elegible idle cpu.
@@ -3302,7 +3295,7 @@ static int select_idle_sibling(struct ta
goto next;
 
for_each_cpu(i, sched_group_cpus(sg)) {
-   if (!idle_cpu(i))
+   if (i == target || !idle_cpu(i))
goto next;
}
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] regulator: Add missing of_node_put()

2013-01-26 Thread Axel Lin
of_find_node_by_name() returns a node pointer with refcount incremented, use
of_node_put() on it when done.

Signed-off-by: Axel Lin 
Cc: Haojian Zhuang 
Cc: David Dajun Chen 
Cc: Gyungoh Yoo 
Cc: MyungJoo Ham 
Cc: Graeme Gregory 
Cc: Laxman Dewangan 
Cc: Shawn Guo 
---
Hi,
I don't have these hardware, only compile test.
Axel
 drivers/regulator/88pm8607.c   |1 +
 drivers/regulator/da9052-regulator.c   |1 +
 drivers/regulator/max8907-regulator.c  |1 +
 drivers/regulator/max8925-regulator.c  |1 +
 drivers/regulator/max8997.c|2 ++
 drivers/regulator/mc13xxx-regulator-core.c |1 +
 drivers/regulator/palmas-regulator.c   |1 +
 drivers/regulator/tps65910-regulator.c |2 ++
 8 files changed, 10 insertions(+)

diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c
index a957e8c..a84048a 100644
--- a/drivers/regulator/88pm8607.c
+++ b/drivers/regulator/88pm8607.c
@@ -363,6 +363,7 @@ static int pm8607_regulator_dt_init(struct platform_device 
*pdev,
break;
}
}
+   of_node_put(nproot);
return 0;
 }
 #else
diff --git a/drivers/regulator/da9052-regulator.c 
b/drivers/regulator/da9052-regulator.c
index c6d8651..fe06ff6 100644
--- a/drivers/regulator/da9052-regulator.c
+++ b/drivers/regulator/da9052-regulator.c
@@ -385,6 +385,7 @@ static int da9052_regulator_probe(struct platform_device 
*pdev)
break;
}
}
+   of_node_put(nproot);
 #endif
}
 
diff --git a/drivers/regulator/max8907-regulator.c 
b/drivers/regulator/max8907-regulator.c
index d40cf7f..dccf9ff 100644
--- a/drivers/regulator/max8907-regulator.c
+++ b/drivers/regulator/max8907-regulator.c
@@ -239,6 +239,7 @@ static int max8907_regulator_parse_dt(struct 
platform_device *pdev)
 
ret = of_regulator_match(>dev, regulators, max8907_matches,
 ARRAY_SIZE(max8907_matches));
+   of_node_put(regulators);
if (ret < 0) {
dev_err(>dev, "Error parsing regulator init data: %d\n",
ret);
diff --git a/drivers/regulator/max8925-regulator.c 
b/drivers/regulator/max8925-regulator.c
index 446a854..f4f52f0 100644
--- a/drivers/regulator/max8925-regulator.c
+++ b/drivers/regulator/max8925-regulator.c
@@ -263,6 +263,7 @@ static int max8925_regulator_dt_init(struct platform_device 
*pdev,
 
rcount = of_regulator_match(>dev, np,
_regulator_matches[ridx], 1);
+   of_node_put(np);
if (rcount < 0)
return -ENODEV;
config->init_data = max8925_regulator_matches[ridx].init_data;
diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index 5556a15..7d2b1b5 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c
@@ -937,6 +937,7 @@ static int max8997_pmic_dt_parse_pdata(struct 
platform_device *pdev,
rdata = devm_kzalloc(>dev, sizeof(*rdata) *
pdata->num_regulators, GFP_KERNEL);
if (!rdata) {
+   of_node_put(regulators_np);
dev_err(>dev, "could not allocate memory for regulator 
data\n");
return -ENOMEM;
}
@@ -959,6 +960,7 @@ static int max8997_pmic_dt_parse_pdata(struct 
platform_device *pdev,
rdata->reg_node = reg_np;
rdata++;
}
+   of_node_put(regulators_np);
 
if (of_get_property(pmic_np, "max8997,pmic-buck1-uses-gpio-dvs", NULL))
pdata->buck1_gpiodvs = true;
diff --git a/drivers/regulator/mc13xxx-regulator-core.c 
b/drivers/regulator/mc13xxx-regulator-core.c
index 2ecf1d8..04cf962 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -175,6 +175,7 @@ int mc13xxx_get_num_regulators_dt(struct platform_device 
*pdev)
for_each_child_of_node(parent, child)
num++;
 
+   of_node_put(parent);
return num;
 }
 EXPORT_SYMBOL_GPL(mc13xxx_get_num_regulators_dt);
diff --git a/drivers/regulator/palmas-regulator.c 
b/drivers/regulator/palmas-regulator.c
index c9e912f..bb6ea5e 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -535,6 +535,7 @@ static void palmas_dt_to_pdata(struct device *dev,
 
ret = of_regulator_match(dev, regulators, palmas_matches,
PALMAS_NUM_REGS);
+   of_node_put(regulators);
if (ret < 0) {
dev_err(dev, "Error parsing regulator init data: %d\n", ret);
return;
diff --git a/drivers/regulator/tps65910-regulator.c 
b/drivers/regulator/tps65910-regulator.c
index b0e4c0b..32ca2b0 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -994,11 +994,13 @@ static struct tps65910_board *tps65910_parse_dt_reg_data(

Boot crashes due to global async pending list

2013-01-26 Thread Mark Brown
Booting current -next causes crashes during regulator_bulk_enable() in
the async code (the function uses an async domain to run multiple calls
to regulator_enable() in parallel, synchronising the domain to finish
them).  Reverting commit 9fdb04 (async: replace list of active domains
with global list of pending items) causes the problem to go away.

I've not got any sensible diagnostics at the minute, the crash was in
async_entry_function() in one of the list_del_init() calls suggesting a
locking issue but everything looks in order there.  Will try to
investigate properly when I get time, I appreciate that this isn't an
awesome bug report.  If there's any diagnostics that'd be helpful please
let me know.


signature.asc
Description: Digital signature


Re: block: optionally snapshot page contents to provide stable pages during write

2013-01-26 Thread Darrick J. Wong
On Sat, Jan 26, 2013 at 01:39:46PM +0100, Sedat Dilek wrote:
> Hi Darrick,
> 
> can you tell me why you do not put your help text where it normally
> belongs ("help" Kconfig item)?

Sure -- the non-ISA bounce pool is only used by a small number of specific
parts of the kernel that require it.  If those parts aren't built, then forcing
it on causes a useless memory pool to be created, wasting memory.  Since kbuild
can figure out when we need it and when we don't, there's no need to present
the user with a config option that they can only use to do the wrong thing.

--D
> 
> 273 # We also use the bounce pool to provide stable page writes for jbd.  jbd
> 274 # initiates buffer writeback without locking the page or setting
> PG_writeback,
> 275 # and fixing that behavior (a second time; jbd2 doesn't have this
> problem) is
> 276 # a major rework effort.  Instead, use the bounce buffer to snapshot pages
> 277 # (until jbd goes away).  The only jbd user is ext3.
> 278 config NEED_BOUNCE_POOL
> 279 bool
> 280 default y if (TILE && USB_OHCI_HCD) || (BLK_DEV_INTEGRITY && JBD)
> 281 help
> 282 line #273..277
> 
> Noticed while hunting a culprit commit in Linux-Next as my
> kernel-config got changed between next-20130123..next-20130124.
> 
> Regards,
> - Sedat -
> 
> [1] 
> http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git;a=commitdiff;h=3f1c22e#patch5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 02/14] atm/nicstar: don't use idr_remove_all()

2013-01-26 Thread David Miller
From: Tejun Heo 
Date: Fri, 25 Jan 2013 17:31:00 -0800

> idr_destroy() can destroy idr by itself and idr_remove_all() is being
> deprecated.  Drop its usage.
> 
> Signed-off-by: Tejun Heo 
> Cc: Chas Williams 
> Cc: net...@vger.kernel.org
> ---
> This patch depends on an earlier idr patch and given the trivial
> nature of the patch, I think it would be best to route these together
> through -mm.  Please holler if there's any objection.

Please do:

Acked-by: David S. Miller 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: pull request: wireless 2013-01-23

2013-01-26 Thread David Miller
From: "John W. Linville" 
Date: Wed, 23 Jan 2013 15:16:06 -0500

> This is a batch of fixes intende for the 3.8 stream.
> 
> Regarding the iwlwifi bits, Johannes says this:
> 
> "Please pull to get a single fix from Emmanuel for a bug I introduced due
> to misunderstanding the code."
> 
> Regarding the mac80211 bits, Johannes says this:
> 
> "I have a few small fixes for you:
>  * some mesh frames would cause encryption warnings -- fixes from Bob
>  * scanning would pretty much break an association if we transmitted
>anything to the AP while scanning -- fix from Stanislaw
>  * mode injection was broken by channel contexts -- fix from Felix
>  * FT roaming was broken: hardware crypto would get disabled by it"
> 
> Along with that, a handful of other fixes confined to specific drivers.
> 
> Avinash Patil fixes a typo in a NULL check in mwifiex.
> 
> Larry Finger fixes a build warning in rtlwifi.  Seems safe...
> 
> Stanislaw Gruszka fixes iwlegacy to prevent microcode errors when
> switching from IBSS mode to STA mode.
> 
> Felix Fietkau provides a trio of ath9k fixes related to proper tuning.
> 
> Please let me know if there are problems!

Pulled, thanks John.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] spi: Ensure memory used for spi_write_then_read() is DMA safe

2013-01-26 Thread Mark Brown
Use GFP_DMA in order to ensure that the memory we allocate for transfers
in spi_write_then_read() can be DMAed. On most platforms this will have
no effect.

Signed-off-by: Mark Brown 
---
 drivers/spi/spi.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901..14d0fba 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1656,7 +1656,8 @@ int spi_write_then_read(struct spi_device *spi,
 * using the pre-allocated buffer or the transfer is too large.
 */
if ((n_tx + n_rx) > SPI_BUFSIZ || !mutex_trylock()) {
-   local_buf = kmalloc(max((unsigned)SPI_BUFSIZ, n_tx + n_rx), 
GFP_KERNEL);
+   local_buf = kmalloc(max((unsigned)SPI_BUFSIZ, n_tx + n_rx),
+   GFP_KERNEL | GFP_DMA);
if (!local_buf)
return -ENOMEM;
} else {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V8 2/3] virtio-net: split out clean affinity function

2013-01-26 Thread David Miller
From: Wanlong Gao 
Date: Fri, 25 Jan 2013 17:51:30 +0800

> Split out the clean affinity function to virtnet_clean_affinity().
> 
> Cc: Rusty Russell 
> Cc: "Michael S. Tsirkin" 
> Cc: Jason Wang 
> Cc: Eric Dumazet 
> Cc: "David S. Miller" 
> Cc: virtualizat...@lists.linux-foundation.org
> Cc: net...@vger.kernel.org
> Signed-off-by: Wanlong Gao 
> Acked-by: Michael S. Tsirkin 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V8 3/3] virtio-net: reset virtqueue affinity when doing cpu hotplug

2013-01-26 Thread David Miller
From: Wanlong Gao 
Date: Fri, 25 Jan 2013 17:51:31 +0800

> Add a cpu notifier to virtio-net, so that we can reset the
> virtqueue affinity if the cpu hotplug happens. It improve
> the performance through enabling or disabling the virtqueue
> affinity after doing cpu hotplug.
> 
> Cc: Rusty Russell 
> Cc: "Michael S. Tsirkin" 
> Cc: Jason Wang 
> Cc: Eric Dumazet 
> Cc: "David S. Miller" 
> Cc: virtualizat...@lists.linux-foundation.org
> Cc: net...@vger.kernel.org
> Signed-off-by: Wanlong Gao 
> Acked-by: Michael S. Tsirkin 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V8 1/3] virtio-net: fix the set affinity bug when CPU IDs are not consecutive

2013-01-26 Thread David Miller
From: Wanlong Gao 
Date: Fri, 25 Jan 2013 17:51:29 +0800

> As Michael mentioned, set affinity and select queue will not work very
> well when CPU IDs are not consecutive, this can happen with hot unplug.
> Fix this bug by traversal the online CPUs, and create a per cpu variable
> to find the mapping from CPU to the preferable virtual-queue.
> 
> Cc: Rusty Russell 
> Cc: "Michael S. Tsirkin" 
> Cc: Jason Wang 
> Cc: Eric Dumazet 
> Cc: "David S. Miller" 
> Cc: virtualizat...@lists.linux-foundation.org
> Cc: net...@vger.kernel.org
> Signed-off-by: Wanlong Gao 
> Acked-by: Michael S. Tsirkin 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 11/11] ksm: stop hotremove lockdep warning

2013-01-26 Thread Simon Jeons
On Fri, 2013-01-25 at 18:10 -0800, Hugh Dickins wrote:
> Complaints are rare, but lockdep still does not understand the way
> ksm_memory_callback(MEM_GOING_OFFLINE) takes ksm_thread_mutex, and
> holds it until the ksm_memory_callback(MEM_OFFLINE): that appears
> to be a problem because notifier callbacks are made under down_read
> of blocking_notifier_head->rwsem (so first the mutex is taken while
> holding the rwsem, then later the rwsem is taken while still holding
> the mutex); but is not in fact a problem because mem_hotplug_mutex
> is held throughout the dance.
> 
> There was an attempt to fix this with mutex_lock_nested(); but if that
> happened to fool lockdep two years ago, apparently it does so no longer.
> 
> I had hoped to eradicate this issue in extending KSM page migration not
> to need the ksm_thread_mutex.  But then realized that although the page
> migration itself is safe, we do still need to lock out ksmd and other
> users of get_ksm_page() while offlining memory - at some point between
> MEM_GOING_OFFLINE and MEM_OFFLINE, the struct pages themselves may
> vanish, and get_ksm_page()'s accesses to them become a violation.
> 
> So, give up on holding ksm_thread_mutex itself from MEM_GOING_OFFLINE to
> MEM_OFFLINE, and add a KSM_RUN_OFFLINE flag, and wait_while_offlining()
> checks, to achieve the same lockout without being caught by lockdep.
> This is less elegant for KSM, but it's more important to keep lockdep
> useful to other users - and I apologize for how long it took to fix.
> 
> Reported-by: Gerald Schaefer 
> Signed-off-by: Hugh Dickins 
> ---
>  mm/ksm.c |   55 +++--
>  1 file changed, 41 insertions(+), 14 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:06.880206290 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:38:53.984208836 -0800
> @@ -226,7 +226,9 @@ static unsigned int ksm_merge_across_nod
>  #define KSM_RUN_STOP 0
>  #define KSM_RUN_MERGE1
>  #define KSM_RUN_UNMERGE  2
> -static unsigned int ksm_run = KSM_RUN_STOP;
> +#define KSM_RUN_OFFLINE  4
> +static unsigned long ksm_run = KSM_RUN_STOP;
> +static void wait_while_offlining(void);
>  
>  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
>  static DEFINE_MUTEX(ksm_thread_mutex);
> @@ -1700,6 +1702,7 @@ static int ksm_scan_thread(void *nothing
>  
>   while (!kthread_should_stop()) {
>   mutex_lock(_thread_mutex);
> + wait_while_offlining();
>   if (ksmd_should_run())
>   ksm_do_scan(ksm_thread_pages_to_scan);
>   mutex_unlock(_thread_mutex);
> @@ -2056,6 +2059,22 @@ void ksm_migrate_page(struct page *newpa
>  #endif /* CONFIG_MIGRATION */
>  
>  #ifdef CONFIG_MEMORY_HOTREMOVE
> +static int just_wait(void *word)
> +{
> + schedule();
> + return 0;
> +}
> +
> +static void wait_while_offlining(void)
> +{
> + while (ksm_run & KSM_RUN_OFFLINE) {
> + mutex_unlock(_thread_mutex);
> + wait_on_bit(_run, ilog2(KSM_RUN_OFFLINE),
> + just_wait, TASK_UNINTERRUPTIBLE);
> + mutex_lock(_thread_mutex);
> + }
> +}
> +
>  static void ksm_check_stable_tree(unsigned long start_pfn,
> unsigned long end_pfn)
>  {
> @@ -2098,15 +2117,15 @@ static int ksm_memory_callback(struct no
>   switch (action) {
>   case MEM_GOING_OFFLINE:
>   /*
> -  * Keep it very simple for now: just lock out ksmd and
> -  * MADV_UNMERGEABLE while any memory is going offline.
> -  * mutex_lock_nested() is necessary because lockdep was alarmed
> -  * that here we take ksm_thread_mutex inside notifier chain
> -  * mutex, and later take notifier chain mutex inside
> -  * ksm_thread_mutex to unlock it.   But that's safe because both
> -  * are inside mem_hotplug_mutex.
> +  * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
> +  * and remove_all_stable_nodes() while memory is going offline:
> +  * it is unsafe for them to touch the stable tree at this time.
> +  * But unmerge_ksm_pages(), rmap lookups and other entry points

Why unmerge_ksm_pages beneath us is safe for ksm memory hotremove?

> +  * which do not need the ksm_thread_mutex are all safe.
>*/
> - mutex_lock_nested(_thread_mutex, SINGLE_DEPTH_NESTING);
> + mutex_lock(_thread_mutex);
> + ksm_run |= KSM_RUN_OFFLINE;
> + mutex_unlock(_thread_mutex);
>   break;
>  
>   case MEM_OFFLINE:
> @@ -2122,11 +2141,20 @@ static int ksm_memory_callback(struct no
>   /* fallthrough */
>  
>   case MEM_CANCEL_OFFLINE:
> + mutex_lock(_thread_mutex);
> + ksm_run &= ~KSM_RUN_OFFLINE;
>   mutex_unlock(_thread_mutex);
> +
> + smp_mb();   /* wake_up_bit advises 

Re: [PATCH review 3/6] userns: Recommend use of memory control groups.

2013-01-26 Thread Eric W. Biederman
"Serge E. Hallyn"  writes:

> Quoting Eric W. Biederman (ebied...@xmission.com):
>> 
>> In the help text describing user namespaces recommend use of memory
>> control groups.  In many cases memory control groups are the only
>> mechanism there is to limit how much memory a user who can create
>> user namespaces can use.
>> 
>> Signed-off-by: "Eric W. Biederman" 
>
> Acked-by: Serge Hallyn 

>
> nit:
>

I have fixed you nit and added the following text, so people know
have a clue where to look to configure cgroups in userspace.

diff --git a/Documentation/namespaces/resource-control.txt 
b/Documentation/namespaces/resource-control.txt
index 3d8178a..abc13c3 100644
--- a/Documentation/namespaces/resource-control.txt
+++ b/Documentation/namespaces/resource-control.txt
@@ -7,4 +7,8 @@ users programs to play nice this problems becomes more acute.
 Therefore it is recommended that memory control groups be enabled in
 kernels that enable user namespaces, and it is further recommended
 that userspace configure memory control groups to limit how much
-memory users they don't trust to play nice can use.
+memory user's they don't trust to play nice can use.
+
+Memory control groups can be configured by installing the libcgroup
+package present on most distros editing /etc/cgrules.conf,
+/etc/cgconfig.conf and setting up libpam-cgroup.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RESEND] ARM: dts: max77686: Add DTS file for max77686 PMIC

2013-01-26 Thread Mark Brown
On Sat, Jan 26, 2013 at 03:06:53PM +0900, Dongjin Kim wrote:
> Hello Mark,
> 
> Yes, this is not ARM-specific chip at all. Just wanted to be reviewed
> by you and others if the format is ok before integrating to my board
> file. I had sent similar one before,
> https://patchwork.kernel.org/patch/1287711, and you advised that was
> too board specific. And plan to integrate like OMAP boards have with
> twl6030.dtsi and twl6040.dtsi.

> If would be nice if somewhere you specify a directory for such device files.

I think we need to create one, not quite sure where though.  drivers/of
perhaps but that's a bit non-idiomatic, or possibly something top level.
firmware might do too.


signature.asc
Description: Digital signature


Re: [PATCH V8 00/13] MIPS: Add Loongson-3 based machines support

2013-01-26 Thread 陈华才
Hi, John,

Compiling fails because __dev* prefix should be removed
due to upstream changes.

You said that patch 3 need to be rework, but I don't know
how to improve... Could you please tell me where is unsane?

Maybe you means I should make cpu_has_coherent_cache a
runtime value rather than a config option as follows?

1, remove CONFIG_CPU_SUPPORTS_COHERENT_CACHE
2, in arch/mips/include/asm/mach-loongson/cpu-feature-overrides.h
#define cpu_has_coherent_cache   1
3, in arch/mips/include/asm/cpu.h
#define MIPS_CPU_COHERENT_CACHE   0x0008
4, in arch/mips/include/asm/cpu-features.h
#ifndef cpu_has_coherent_cache
#define cpu_has_coherent_cache   (cpu_data[0].options &
MIPS_CPU_INCLUSIVE_CACHES)
#endif

Besides, the SMP code has a bug to fix (IPI sending) and
patch 3, patch 6 need to update. So I think a V9 is needed :(



> On 25/01/13 01:15, 陈华才 wrote:
>> ok, I'll prepare v9 of this seris in these days.
>>>
>
>
> Please dont send v9
>
> read my mail and compile / runtime test the tree please
>
> only patch 3 needs to be reworked and an update for the "MIPS: Loongson
> 3: Add HT-linked PCI support." needs to e made
>
>   John
>

 Huacai Chen(13):
MIPS: Loongson: Add basic Loongson-3 definition.
MIPS: Loongson: Add basic Loongson-3 CPU support.
MIPS: Loongson: Introduce and use cpu_has_coherent_cache feature.
MIPS: Loongson 3: Add Lemote-3A machtypes definition.
MIPS: Loongson: Add UEFI-like firmware interface support.
MIPS: Loongson 3: Add HT-linked PCI support.
MIPS: Loongson 3: Add IRQ init and dispatch support.
MIPS: Loongson 3: Add serial port support.
MIPS: Loongson: Add swiotlb to support big memory (>4GB).
MIPS: Loongson: Add Loongson-3 Kconfig options.
MIPS: Loongson 3: Add Loongson-3 SMP support.
MIPS: Loongson 3: Add CPU hotplug support.
MIPS: Loongson: Add a Loongson-3 default config file.

 Signed-off-by: Huacai Chen
 Signed-off-by: Hongliang Tao
 Signed-off-by: Hua Yan
 ---
>>>
>>> Hi,
>>>
>>> I have added all patches apart from 3/13 to my queue.
>>>
>>> I believe "MIPS: Loongson: Introduce and use cpu_has_coherent_cache
>>> feature." should e rewritten in a saner way.
>>>
>>> Please compile and runtime test the tree before I send it to Ralf
>>> -->
>>> http://git.linux-mips.org/?p=john/linux-john.git;a=shortlog;h=refs/heads/mips-next-3.9
>>>
>>> I cleaned up a few minor whitespace errors while merging.
>>>
>>> http://patchwork.linux-mips.org/patch/4547/ has a few comments. Please
>>> prepare a patch asap to address those so i can fold it into the series.
>>>
>>> John
>>>
>>
>>
>
>


-- 
江苏中科梦兰电子科技有限公司

软件部 陈华才

E-mail: che...@lemote.com

Web: http://www.lemote.com/

Add: 江苏省常熟市虞山镇梦兰工业园

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] ALSA: ASoC: McASP: Fix data rotation for playback. Enables 24bit audio playback

2013-01-26 Thread Mark Brown
On Fri, Jan 18, 2013 at 10:17:00AM +0100, Michal Bachraty wrote:
> u32 rotate = (32 - word_length) / 4;
> This implementation is wrong, but it works only for 16, or 32 bit audio data.

Applied, thanks.


signature.asc
Description: Digital signature


[PATCH]video:uvesafb: Fix dereference NULL pointer code path

2013-01-26 Thread Wang YanQing
platform_device_alloc could failed and return NULL,
we should check this before call platform_device_put.

Signed-off-by: Wang YanQing 
---
 drivers/video/uvesafb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 2f8f82d..230bd45 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -1975,7 +1975,8 @@ static int __devinit uvesafb_init(void)
err = -ENOMEM;
 
if (err) {
-   platform_device_put(uvesafb_device);
+   if (uvesafb_device)
+   platform_device_put(uvesafb_device);
platform_driver_unregister(_driver);
cn_del_callback(_cn_id);
return err;
-- 
1.7.11.1.116.g8228a23
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 4/4] lp8788-ldo: use ena_pin of regulator-core for external control

2013-01-26 Thread Mark Brown
On Tue, Jan 15, 2013 at 04:35:53AM +, Kim, Milo wrote:
>  Regulator core driver provides enable GPIO control for enabling/disabling a
>  regulator. Now, enable GPIO is shared among regulators.
>  Use this internal working, so unnecessary code are removed.
>  GPIO enable pin configurations are added in digital LDO and analog LDO 
> drivers.

Looks good.


signature.asc
Description: Digital signature


Re: [PATCH v2 3/4] regulator-core: free requested GPIOs and manage the enable list

2013-01-26 Thread Mark Brown
On Tue, Jan 15, 2013 at 04:35:50AM +, Kim, Milo wrote:
>  The regulator_ena_gpio_request() allocates the enable GPIO and add the list.
>  Related resources should be released if they are not used any more.
>  To free requested GPIO and remove it from the enable GPIO list,
>  new function, 'regulator_ena_gpio_free()' is added.

Ah, sorry - I hadn't realised this patch had the cleanup code.  This
needs a bit of an update - as I said for one of the earlier patches we
need to reference count to make sure we only free the GPIO when it has
no users.  With this patch we will free the GPIO when the first
regulator to use it is freed which might mean other regulators are still
using it.


signature.asc
Description: Digital signature


Re: [PATCH review 5/6] userns: Allow the userns root to mount ramfs.

2013-01-26 Thread Eric W. Biederman
"Serge E. Hallyn"  writes:

> Quoting Eric W. Biederman (ebied...@xmission.com):
>> 
>> There is no backing store to ramfs and file creation
>> rules are the same as for any other filesystem so
>> it is semantically safe to allow unprivileged users
>> to mount it.
>> 
>> The memory control group successfully limits how much
>> memory ramfs can consume on any system that cares about
>> a user namespace root using ramfs to exhaust memory
>> the memory control group can be deployed.
>
> But that does mean that to avoid this new type of attack, when handed a
> new kernel (i.e. by one's distro) one has to explicitly (know about and)
> configure those limits.  The "your distro should do this for you"
> argument doesn't seem right.  And I'd really prefer there not be
> barriers to user namespaces being compiled in when there don't have to
> be.

The thing is this really isn't a new type of attack.  There are a lot of
existing methods to exhaust memory with the default configuration on
most distros.  All this is is a new method to method to implement such
an attack.

Most distros allow a large number or processes and allow those processes
to consume a large if not unlimited amount of ram.

The OOM killer still will recover your system from a ramfs or a tmpfs
mounted in a mount namespace created with user namespace permissions.
It works because the OOM killer will kill all of the processes in the
mount namespace.  At which point all of the mounts have their reference
counts go to 0 the filesystems are unmounted.  When a ramfs or 
tmpfs is unmounted all of the files in a ramfs or tmpfs are freed.

On the flip side every resource has historically come with it's own new
knob.  The new knob in this case is memory control groups.  It isn't an
rlimit, and it isn't global limit tunable with a sysctl.  It is a much
more general knob than that.

> What was your thought on the suggestion to only allow FS_USERNS_MOUNT
> mounts by users confined in a non-init memory cgroup?

Over design.

But more than that there are a lot of other ways to get into trouble if
you don't enable memory control groups with user namespaces.   tmpfs is
just the first one I identified.

for (;;) unshare(CLONE_NEWUSER) is equally as bad, and if I look I can
find a bunch of others.

The practical fact is that allowing userspace to exhaust memory and get
the system into an OOM condition happens today.   There are lots of lots
of resources that it would take a lot of time to individually limit, or
put a knob on and even then we would miss some.  The memory control group
limits all of those now, and isn't particularly hard to configure.

So for the people who care I recommend using the tools that are
available now and work now the memory control group.

Personally I don't think distros care.

> Alternatively, what about a simple sysctl knob to turn on
> FS_USERNS_MOUNTs?  Then if I've got no untrusted users I can just turn
> that on without the system second-guessing me for not having extra
> configuration...

I suppose we could do something like what happens on terminals where
scheduler control groups are automatically created by the kernel.  Or
perhaps have an on/off sysctl knob for user namespaces themselves.  I
don't think anything more fine grained is worth it at this point.

Not that I will oppose more fine grained patches if someone writes else
writes them, I just don't see the bang for the buck.

I understand about not wanting to introduce limits on people enabling
user namespaces.  Most distro's don't appear to limit users memory today
so enabling user namespaces won't change anything.  For people who do
want to limit a users memory consumption it looks like all you need
to do is something like:

$ apt-get install cgroup-bin libcgroup1 libpam-cgroup

$ cat >> /etc/cgconfig <> /etc/cgrules 

Re: [PATCH v2 2/4] regulator-core: manage enable GPIO list

2013-01-26 Thread Mark Brown
On Tue, Jan 15, 2013 at 04:35:46AM +, Kim, Milo wrote:

> +/**
> + * Balance enable_count of each GPIO and actual GPIO pin control.
> + * GPIO is enabled in case of initial use. (enable_count is 0)
> + * GPIO is disabled when it is not shared any more. (enable_count is 1)
> + */
> +static void _do_ena_gpio_ctrl(struct regulator_enable_gpio *pin,
> + struct regulator_dev *rdev, bool enable)
> +{
> + if (enable) {
> + /* Enable GPIO at initial use */
> + if (pin->enable_count == 0)
> + gpio_set_value_cansleep(rdev->ena_gpio,
> + !rdev->ena_gpio_invert);
> +
> + rdev->ena_gpio_state = 1;

ena_gpio_state is redundant with this patch, we can just replace
references to it with pin->enable_count.

We'll also need a request count to keep track of how many regulators are
using the GPIO for use in cleanup.

> + } else {
> + rdev->ena_gpio_state = 0;
> + if (pin->enable_count > 1) {
> + pin->enable_count--;
> + return;
> + }
> +
> + /* Disable GPIO if not used */
> + if (pin->enable_count == 1) {
> + gpio_set_value_cansleep(rdev->ena_gpio,
> + rdev->ena_gpio_invert);
> + pin->enable_count = 0;
> + }

Ideally we should also check if we're trying to take the enable count
below zero and complain about that.

> +static void regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
> +{
> + struct regulator_enable_gpio *pin;
> +
> + list_for_each_entry(pin, _ena_gpio_list, list) {
> + if (pin->gpio == rdev->ena_gpio) {
> + _do_ena_gpio_ctrl(pin, rdev, enable);
> + return;
> + }
> + }
> +}

This should return an error code as the users return errors, the GPIO
API won't give us errors but we can generate them internally.  It'd be
better to just add a pointer to the GPIO struct to the regulator_dev (in
place of the GPIO) so we don't need to scan through the list every time
we look for the GPIO.


signature.asc
Description: Digital signature


Re: [PATCH v2 1/4] regulator-core: support shared enable GPIO concept

2013-01-26 Thread Mark Brown
On Tue, Jan 15, 2013 at 04:35:41AM +, Kim, Milo wrote:
>  A Regulator can be enabled by external GPIO pin.
>  This is configurable in the regulator_config.

Please use subject lines matching the subsystem - not doing this makes
it more likely that patches will be missed or responses delayed.  For
example, when looking at my patch queue for regulator patches I search
for "regulator:" in my review pending queue, patches that don't have
that won't turn up.  This should be "regulator: core: ...".

Anyway, this series looks pretty close now...

> +/* Manage enable GPIO list. Same GPIO pin can be shared among regulators */
> +static int regulator_ena_gpio_request(struct regulator_dev *rdev,
> + const struct regulator_config *config)
> +{
> + struct regulator_enable_gpio *pin;
> + int ret;
> +
> + list_for_each_entry(pin, _ena_gpio_list, list) {
> + if (pin->gpio == config->ena_gpio) {
> + rdev_info(rdev, "GPIO %d is already used\n",
> + config->ena_gpio);
> + return 0;

This log is going to get noisy once the GPIOs are shared.  A _dbg()
would be OK though.

> + ret = gpio_request_one(config->ena_gpio,
> + GPIOF_DIR_OUT | config->ena_gpio_flags,
> + rdev_get_name(rdev));
> + if (ret)
> + return ret;
> +
> + pin = kzalloc(sizeof(struct regulator_enable_gpio), GFP_KERNEL);
> + if (pin == NULL)
> + return -ENOMEM;

Should free the GPIO if there's an error here.

> + pin->regulator = rdev;

Do we really want to keep track of the regulator here, again once we
start sharing pins...

We also need some matching code in the release path to free the GPIO and
struct when the regulator is removed.


signature.asc
Description: Digital signature


Re: [PATCH 7/11] ksm: make KSM page migration possible

2013-01-26 Thread Simon Jeons
On Fri, 2013-01-25 at 18:03 -0800, Hugh Dickins wrote:
> KSM page migration is already supported in the case of memory hotremove,
> which takes the ksm_thread_mutex across all its migrations to keep life
> simple.
> 
> But the new KSM NUMA merge_across_nodes knob introduces a problem, when
> it's set to non-default 0: if a KSM page is migrated to a different NUMA
> node, how do we migrate its stable node to the right tree?  And what if
> that collides with an existing stable node?
> 
> So far there's no provision for that, and this patch does not attempt
> to deal with it either.  But how will I test a solution, when I don't
> know how to hotremove memory?  The best answer is to enable KSM page
> migration in all cases now, and test more common cases.  With THP and
> compaction added since KSM came in, page migration is now mainstream,
> and it's a shame that a KSM page can frustrate freeing a page block.
> 
> Without worrying about merge_across_nodes 0 for now, this patch gets
> KSM page migration working reliably for default merge_across_nodes 1
> (but leave the patch enabling it until near the end of the series).
> 
> It's much simpler than I'd originally imagined, and does not require
> an additional tier of locking: page migration relies on the page lock,
> KSM page reclaim relies on the page lock, the page lock is enough for
> KSM page migration too.
> 
> Almost all the care has to be in get_ksm_page(): that's the function
> which worries about when a stable node is stale and should be freed,
> now it also has to worry about the KSM page being migrated.
> 
> The only new overhead is an additional put/get/lock/unlock_page when
> stable_tree_search() arrives at a matching node: to make sure migration
> respects the raised page count, and so does not migrate the page while
> we're busy with it here.  That's probably avoidable, either by changing
> internal interfaces from using kpage to stable_node, or by moving the
> ksm_migrate_page() callsite into a page_freeze_refs() section (even if
> not swapcache); but this works well, I've no urge to pull it apart now.
> 
> (Descents of the stable tree may pass through nodes whose KSM pages are
> under migration: being unlocked, the raised page count does not prevent
> that, nor need it: it's safe to memcmp against either old or new page.)
> 
> You might worry about mremap, and whether page migration's rmap_walk
> to remove migration entries will find all the KSM locations where it
> inserted earlier: that should already be handled, by the satisfyingly
> heavy hammer of move_vma()'s call to ksm_madvise(,,,MADV_UNMERGEABLE,).
> 
> Signed-off-by: Hugh Dickins 
> ---
>  mm/ksm.c |   94 ++---
>  mm/migrate.c |5 ++
>  2 files changed, 77 insertions(+), 22 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:37:00.768206145 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:37:03.832206218 -0800
> @@ -499,6 +499,7 @@ static void remove_node_from_stable_tree
>   * In which case we can trust the content of the page, and it
>   * returns the gotten page; but if the page has now been zapped,
>   * remove the stale node from the stable tree and return NULL.
> + * But beware, the stable node's page might be being migrated.
>   *
>   * You would expect the stable_node to hold a reference to the ksm page.
>   * But if it increments the page's count, swapping out has to wait for
> @@ -509,44 +510,77 @@ static void remove_node_from_stable_tree
>   * pointing back to this stable node.  This relies on freeing a PageAnon
>   * page to reset its page->mapping to NULL, and relies on no other use of
>   * a page to put something that might look like our key in page->mapping.
> - *
> - * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
> - * but this is different - made simpler by ksm_thread_mutex being held, but
> - * interesting for assuming that no other use of the struct page could ever
> - * put our expected_mapping into page->mapping (or a field of the union which
> - * coincides with page->mapping).
> - *
> - * Note: it is possible that get_ksm_page() will return NULL one moment,
> - * then page the next, if the page is in between page_freeze_refs() and
> - * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
>   * is on its way to being freed; but it is an anomaly to bear in mind.
>   */
>  static struct page *get_ksm_page(struct stable_node *stable_node, bool 
> locked)
>  {
>   struct page *page;
>   void *expected_mapping;
> + unsigned long kpfn;
>  
> - page = pfn_to_page(stable_node->kpfn);
>   expected_mapping = (void *)stable_node +
>   (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
> - if (page->mapping != expected_mapping)
> - goto stale;
> - if (!get_page_unless_zero(page))
> +again:
> + kpfn = ACCESS_ONCE(stable_node->kpfn);
> + page = pfn_to_page(kpfn);
> +
> + /*
> +  * page is computed from 

[PATCH tip/core/rcu 08/12] rcu: Repurpose no-CBs event tracing to future-GP events

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Dyntick-idle CPUs need to be able to pre-announce their need for grace
periods.  This can be done using something similar to the mechanism used
by no-CB CPUs to announce their need for grace periods.  This commit
moves in this direction by renaming the no-CBs grace-period event tracing
to suit the new future-grace-period needs.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 include/trace/events/rcu.h |   16 +-
 kernel/rcutree_plugin.h|   62 ++-
 2 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index cdfed6d..59ebcc8 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -72,10 +72,10 @@ TRACE_EVENT(rcu_grace_period,
 );
 
 /*
- * Tracepoint for no-callbacks grace-period events.  The caller should
- * pull the data from the rcu_node structure, other than rcuname, which
- * comes from the rcu_state structure, and event, which is one of the
- * following:
+ * Tracepoint for future grace-period events, including those for no-callbacks
+ * CPUs.  The caller should pull the data from the rcu_node structure,
+ * other than rcuname, which comes from the rcu_state structure, and event,
+ * which is one of the following:
  *
  * "Startleaf": Request a nocb grace period based on leaf-node data.
  * "Startedleaf": Leaf-node start proved sufficient.
@@ -87,7 +87,7 @@ TRACE_EVENT(rcu_grace_period,
  * "Cleanup": Clean up rcu_node structure after previous GP.
  * "CleanupMore": Clean up, and another no-CB GP is needed.
  */
-TRACE_EVENT(rcu_nocb_grace_period,
+TRACE_EVENT(rcu_future_grace_period,
 
TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed,
 unsigned long c, u8 level, int grplo, int grphi,
@@ -653,9 +653,9 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
-#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \
-   level, grplo, grphi, event) \
-   do { } while (0)
+#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
+ level, grplo, grphi, event) \
+ do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index d55475b..bcd8268 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2080,9 +2080,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, 
struct rcu_node *rnp)
wake_up_all(>nocb_gp_wq[c & 0x1]);
rnp->n_nocb_gp_requests[c & 0x1] = 0;
needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1];
-   trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed,
-   c, rnp->level, rnp->grplo, rnp->grphi,
-   needmore ? "CleanupMore" : "Cleanup");
+   trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed,
+ c, rnp->level, rnp->grplo, rnp->grphi,
+ needmore ? "CleanupMore" : "Cleanup");
return needmore;
 }
 
@@ -2229,9 +2229,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 
/* Count our request for a grace period. */
rnp->n_nocb_gp_requests[c & 0x1]++;
-   trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed,
-   c, rnp->level, rnp->grplo, rnp->grphi,
-   "Startleaf");
+   trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+ rnp->completed, c, rnp->level,
+ rnp->grplo, rnp->grphi, "Startleaf");
 
if (rnp->gpnum != rnp->completed) {
 
@@ -2240,10 +2240,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 * is in progress, so we are done.  When this grace
 * period ends, our request will be acted upon.
 */
-   trace_rcu_nocb_grace_period(rdp->rsp->name,
-   rnp->gpnum, rnp->completed, c,
-   rnp->level, rnp->grplo, rnp->grphi,
-   "Startedleaf");
+   trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+ rnp->completed, c, rnp->level,
+ rnp->grplo, rnp->grphi,
+

[PATCH tip/core/rcu 0/14] v2 RCU idle/no-CB changes for 3.9

2013-01-26 Thread Paul E. McKenney
Hello!

This series contains changes to RCU_FAST_NO_HZ idle entry/exit and also
removes restrictions on no-CBs CPUs.  This series contains some commits
that are still a bit on experimental side, so you should avoid using
these patches unless you would like to help debug them.  ;-)

1.  Remove restrictions on no-CBs CPUs.  This patch is probably the
highest-risk of the group.
2.  Allow some control of no-CBs CPUs at kernel-build time.  The option
of most interest is probably the one that makes -all- CPUs be
no-CBs CPUs.
3.  Distinguish the no-CBs kthreads for the various RCU flavors.
Without this patch, CPU 0 would have up to three kthreads all
named "rcuo0", which is less than optimal.
4.  Export RCU_FAST_NO_HZ parameters to sysfs to allow run-time
adjustment.
5.  Re-introduce callback acceleration during grace-period cleanup.
Now that the callbacks are associated with specific grace periods,
such acceleration is idempotent, and it is now safe to accelerate
more than needed.  (In contrast, in the past, too-frequent callback
acceleration resulted in infrequent RCU failures.)
6.  Use the newly numbered callbacks to greatly reduce the CPU overhead
incurred at idle entry by RCU_FAST_NO_HZ.  The fact that the
callbacks are now numbered means that instead of repeatedly
cranking the RCU state machine to try to get all callbacks
invoked, we can instead rely on the numbering so that the CPU
can take full advantage of any grace periods that elapse while
it is asleep.  CPUs with callbacks still have limited sleep times,
especially if they have at least one non-lazy callback queued.
7-12.   Allow CPUs to make known their need for future grace periods,
which is also used to reduce the need for frenetic RCU
state-machine cranking upon RCU_FAST_NO_HZ entry to idle.
7.  Move the release of the root rcu_node structure's ->lock
to then end of rcu_start_gp().
8.  Repurpose no-CB's grace-period event tracing to that of
future grace periods, which share no-CB's grace-period
mechanism.
9.  Move the release of the root rcu_node structure's ->lock
to rcu_start_gp()'s callers.
10. Rename the rcu_node ->n_nocb_gp_requests field to
->need_future_gp.
11. Abstract rcu_start_future_gp() from rcu_nocb_wait_gp()
to that RCU_FAST_NO_HZ can use the no-CB CPUs mechanism
for allowing a CPU to record its need for future grace
periods.
12. Make rcu_accelerate_cbs() note the need for future
grace periods, thus avoiding delays in starting grace
periods that currently happen due to the CPUs needing
those grace periods being out of action when the previous
grace period ends.

Changes since v1:

o   Fixed a deadlock in #1 spotted by Xie ChanglongX.
o   Updated #2 to bring the abbreviations in line with conventional
per-CPU kthread naming.
o   Moved the first two patches into their own group.

Thanx, Paul


 b/Documentation/kernel-parameters.txt |7 
 b/include/linux/rcupdate.h|1 
 b/include/trace/events/rcu.h  |   71 ++
 b/init/Kconfig|   52 +-
 b/kernel/rcutree.c|  277 --
 b/kernel/rcutree.h|   39 -
 b/kernel/rcutree_plugin.h |  865 ++
 b/kernel/rcutree_trace.c  |2 
 8 files changed, 710 insertions(+), 604 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 03/12] rcu: Distinguish "rcuo" kthreads by RCU flavor

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, the per-no-CBs-CPU kthreads are named "rcuo" followed by
the CPU number, for example, "rcuo".  This is problematic given that
there are either two or three RCU flavors, each of which gets a per-CPU
kthread with exactly the same name.  This commit therefore introduces
a one-letter abbreviation for each RCU flavor, namely 'b' for RCU-bh,
'p' for RCU-preempt, and 's' for RCU-sched.  This abbreviation use used
to distinguish the "rcuo" kthreads, for example, for CPU 0 we would have
"rcuo0b", "rcuo0p", and "rcuo0s".

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
Tested-by: Dietmar Eggemann 
---
 Documentation/kernel-parameters.txt |7 +--
 init/Kconfig|   13 +++--
 kernel/rcutree.c|7 ---
 kernel/rcutree.h|1 +
 kernel/rcutree_plugin.h |5 +++--
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 6c72381..c2c3abf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2432,9 +2432,12 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
the specified list of CPUs to be no-callback CPUs.
Invocation of these CPUs' RCU callbacks will
-   be offloaded to "rcuoN" kthreads created for
-   that purpose.  This reduces OS jitter on the
+   be offloaded to "rcuox/N" kthreads created for
+   that purpose, where "x" is "b" for RCU-bh, "p"
+   for RCU-preempt, and "s" for RCU-sched, and "N"
+   is the CPU number.  This reduces OS jitter on the
offloaded CPUs, which can be useful for HPC and
+
real-time workloads.  It can also improve energy
efficiency for asymmetric multiprocessors.
 
diff --git a/init/Kconfig b/init/Kconfig
index 9a04156..82f4e3c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -676,12 +676,13 @@ config RCU_NOCB_CPU
 
  This option offloads callback invocation from the set of
  CPUs specified at boot time by the rcu_nocbs parameter.
- For each such CPU, a kthread ("rcuoN") will be created to
- invoke callbacks, where the "N" is the CPU being offloaded.
- Nothing prevents this kthread from running on the specified
- CPUs, but (1) the kthreads may be preempted between each
- callback, and (2) affinity or cgroups can be used to force
- the kthreads to run on whatever set of CPUs is desired.
+ For each such CPU, a kthread ("rcuox/N") will be created to
+ invoke callbacks, where the "N" is the CPU being offloaded,
+ and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
+ "s" for RCU-sched.  Nothing prevents this kthread from running
+ on the specified CPUs, but (1) the kthreads may be preempted
+ between each callback, and (2) affinity or cgroups can be used
+ to force the kthreads to run on whatever set of CPUs is desired.
 
  Say Y here if you want to help to debug reduced OS jitter.
  Say N here if you are unsure.
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 433f426..074cb2d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -64,7 +64,7 @@
 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
 
-#define RCU_STATE_INITIALIZER(sname, cr) { \
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
.level = { ##_state.node[0] }, \
.call = cr, \
.fqs_state = RCU_GP_IDLE, \
@@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
.name = #sname, \
+   .abbr = sabbr, \
 }
 
 struct rcu_state rcu_sched_state =
-   RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
+   RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 
-struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', 
call_rcu_bh);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 static struct rcu_state *rcu_state;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index e51373c..b6c2335 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -443,6 +443,7 @@ struct rcu_state {
unsigned long gp_max;   /* Maximum GP duration in */
/*  jiffies. */
char *name; /* Name of 

[PATCH tip/core/rcu 10/12] rcu: Rename n_nocb_gp_requests to need_future_gp

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

CPUs going idle need to be able to indicate their need for future grace
periods.  A mechanism for doing this already exists for no-callbacks
CPUs, so the idea is to re-use that mechanism.  This commit therefore
moves the ->n_nocb_gp_requests field of the rcu_node structure out from
under the CONFIG_RCU_NOCB_CPU #ifdef and renames it to ->need_future_gp.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.h|4 ++--
 kernel/rcutree_plugin.h |   18 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 96a27f9..034b524 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -194,9 +194,9 @@ struct rcu_node {
 #ifdef CONFIG_RCU_NOCB_CPU
wait_queue_head_t nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
-   int n_nocb_gp_requests[2];
-   /* Counts of upcoming no-CB GP requests. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
+   int need_future_gp[2];
+   /* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock cacheline_internodealigned_in_smp;
 } cacheline_internodealigned_in_smp;
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 7a66312..9647a6b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2064,7 +2064,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
 {
struct rcu_node *rnp = rcu_get_root(rsp);
 
-   return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
+   return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
 }
 
 /*
@@ -2078,8 +2078,8 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, 
struct rcu_node *rnp)
int needmore;
 
wake_up_all(>nocb_gp_wq[c & 0x1]);
-   rnp->n_nocb_gp_requests[c & 0x1] = 0;
-   needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1];
+   rnp->need_future_gp[c & 0x1] = 0;
+   needmore = rnp->need_future_gp[(c + 1) & 0x1];
trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed,
  c, rnp->level, rnp->grplo, rnp->grphi,
  needmore ? "CleanupMore" : "Cleanup");
@@ -2087,7 +2087,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, 
struct rcu_node *rnp)
 }
 
 /*
- * Set the root rcu_node structure's ->n_nocb_gp_requests field
+ * Set the root rcu_node structure's ->need_future_gp field
  * based on the sum of those of all rcu_node structures.  This does
  * double-count the root rcu_node structure's requests, but this
  * is necessary to handle the possibility of a rcu_nocb_kthread()
@@ -2096,7 +2096,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, 
struct rcu_node *rnp)
  */
 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
 {
-   rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq;
+   rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
 }
 
 static void rcu_init_one_nocb(struct rcu_node *rnp)
@@ -2227,7 +2227,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
c = rnp->completed + 2;
 
/* Count our request for a grace period. */
-   rnp->n_nocb_gp_requests[c & 0x1]++;
+   rnp->need_future_gp[c & 0x1]++;
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
  rnp->completed, c, rnp->level,
  rnp->grplo, rnp->grphi, "Startleaf");
@@ -2271,10 +2271,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 * Adjust counters accordingly and start the
 * needed grace period.
 */
-   rnp->n_nocb_gp_requests[c & 0x1]--;
+   rnp->need_future_gp[c & 0x1]--;
c = rnp_root->completed + 1;
-   rnp->n_nocb_gp_requests[c & 0x1]++;
-   rnp_root->n_nocb_gp_requests[c & 0x1]++;
+   rnp->need_future_gp[c & 0x1]++;
+   rnp_root->need_future_gp[c & 0x1]++;
trace_rcu_future_grace_period(rdp->rsp->name,
  rnp->gpnum,
  rnp->completed,
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 09/12] rcu: Push lock release to rcu_start_gp()'s callers

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

If CPUs are to give prior notice of needed grace periods, it will be
necessary to invoke rcu_start_gp() without dropping the root rcu_node
structure's ->lock.  This commit takes a second step in this direction
by moving the release of this lock to rcu_start_gp()'s callers.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c|   24 ++--
 kernel/rcutree_plugin.h |5 ++---
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 2c6a931..0d53295 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1487,16 +1487,14 @@ static int __noreturn rcu_gp_kthread(void *arg)
 /*
  * Start a new RCU grace period if warranted, re-initializing the hierarchy
  * in preparation for detecting the next grace period.  The caller must hold
- * the root node's ->lock, which is released before return.  Hard irqs must
- * be disabled.
+ * the root node's ->lock and hard irqs must be disabled.
  *
  * Note that it is legal for a dying CPU (which is marked as offline) to
  * invoke this function.  This can happen when the dying CPU reports its
  * quiescent state.
  */
 static void
-rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
-   __releases(rcu_get_root(rsp)->lock)
+rcu_start_gp(struct rcu_state *rsp)
 {
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
@@ -1510,15 +1508,13 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 */
rcu_advance_cbs(rsp, rnp, rdp);
 
-   if (!rsp->gp_kthread ||
-   !cpu_needs_another_gp(rsp, rdp)) {
+   if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
 * Either we have not yet spawned the grace-period
 * task, this CPU does not need another grace period,
 * or a grace period is already in progress.
 * Either way, don't start a new grace period.
 */
-   raw_spin_unlock_irqrestore(>lock, flags);
return;
}
rsp->gp_flags = RCU_GP_FLAG_INIT;
@@ -1528,15 +1524,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(>gp_wq);
-   raw_spin_unlock_irqrestore(>lock, flags);
 }
 
 /*
  * Report a full set of quiescent states to the specified rcu_state
  * data structure.  This involves cleaning up after the prior grace
  * period and letting rcu_start_gp() start up the next grace period
- * if one is needed.  Note that the caller must hold rnp->lock, as
- * required by rcu_start_gp(), which will release it.
+ * if one is needed.  Note that the caller must hold rnp->lock, which
+ * is released before return.
  */
 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
@@ -2134,7 +2129,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
raw_spin_lock(_get_root(rsp)->lock); /* irqs disabled. */
-   rcu_start_gp(rsp, flags);  /* releases above lock */
+   rcu_start_gp(rsp);
+   raw_spin_unlock_irqrestore(_get_root(rsp)->lock, flags);
} else {
local_irq_restore(flags);
}
@@ -2214,11 +2210,11 @@ static void __call_rcu_core(struct rcu_state *rsp, 
struct rcu_data *rdp,
 
/* Start a new grace period if one not already started. */
if (!rcu_gp_in_progress(rsp)) {
-   unsigned long nestflag;
struct rcu_node *rnp_root = rcu_get_root(rsp);
 
-   raw_spin_lock_irqsave(_root->lock, nestflag);
-   rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */
+   raw_spin_lock(_root->lock);
+   rcu_start_gp(rsp);
+   raw_spin_unlock(_root->lock);
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index bcd8268..7a66312 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2220,7 +2220,6 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
unsigned long c;
bool d;
unsigned long flags;
-   unsigned long flags1;
struct rcu_node *rnp = rdp->mynode;
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
@@ -2282,8 +2281,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
  c, rnp->level,
  rnp->grplo, rnp->grphi,
  "Startedroot");
-   local_save_flags(flags1);
-

[PATCH tip/core/rcu 06/12] rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Because RCU callbacks are now associated with the number of the grace
period that they must wait for, CPUs can now take advance callbacks
corresponding to grace periods that ended while a given CPU was in
dyntick-idle mode.  This eliminates the need to try forcing the RCU
state machine while entering idle, thus reducing the CPU intensiveness
of RCU_FAST_NO_HZ, which should increase its energy efficiency.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 include/linux/rcupdate.h |1 +
 kernel/rcutree.c |   28 +++--
 kernel/rcutree.h |   12 +--
 kernel/rcutree_plugin.h  |  350 +++---
 kernel/rcutree_trace.c   |2 -
 5 files changed, 131 insertions(+), 262 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b758ce1..9ed2c9a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
 #define UINT_CMP_LT(a, b)  (UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+#define ulong2long(a)  (*(long *)(&(a)))
 
 /* Exported common interfaces */
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 2015bce..7b1d776 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu)
 }
 
 /*
- * Check to see if any future RCU-related work will need to be done
- * by the current CPU, even if none need be done immediately, returning
- * 1 if so.
+ * Return true if the specified CPU has any callback.  If all_lazy is
+ * non-NULL, store an indication of whether all callbacks are lazy.
+ * (If there are no callbacks, all of them are deemed to be lazy.)
  */
-static int rcu_cpu_has_callbacks(int cpu)
+static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
 {
+   bool al = true;
+   bool hc = false;
+   struct rcu_data *rdp;
struct rcu_state *rsp;
 
-   /* RCU callbacks either ready or pending? */
-   for_each_rcu_flavor(rsp)
-   if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
-   return 1;
-   return 0;
+   for_each_rcu_flavor(rsp) {
+   rdp = per_cpu_ptr(rsp->rda, cpu);
+   if (rdp->qlen != rdp->qlen_lazy)
+   al = false;
+   if (rdp->nxtlist)
+   hc = true;
+   }
+   if (all_lazy)
+   *all_lazy = al;
+   return hc;
 }
 
 /*
@@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int 
preemptible)
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
atomic_set(>dynticks->dynticks,
   (atomic_read(>dynticks->dynticks) & ~0x1) + 1);
-   rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(>lock);/* irqs remain disabled. */
 
/* Add CPU to rcu_node bitmasks. */
@@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block 
*self,
 */
for_each_rcu_flavor(rsp)
rcu_cleanup_dying_cpu(rsp);
-   rcu_cleanup_after_idle(cpu);
break;
case CPU_DEAD:
case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index b6c2335..96a27f9 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -88,18 +88,13 @@ struct rcu_dynticks {
int dynticks_nmi_nesting;   /* Track NMI nesting level. */
atomic_t dynticks;  /* Even value for idle, else odd. */
 #ifdef CONFIG_RCU_FAST_NO_HZ
-   int dyntick_drain;  /* Prepare-for-idle state variable. */
-   unsigned long dyntick_holdoff;
-   /* No retries for the jiffy of failure. */
-   struct timer_list idle_gp_timer;
-   /* Wake up CPU sleeping with callbacks. */
-   unsigned long idle_gp_timer_expires;
-   /* When to wake up CPU (for repost). */
-   bool idle_first_pass;   /* First pass of attempt to go idle? */
+   bool all_lazy;  /* Are all CPU's CBs lazy? */
unsigned long nonlazy_posted;
/* # times non-lazy CBs posted to CPU. */
unsigned long nonlazy_posted_snap;
/* idle-period nonlazy_posted snapshot. */
+   unsigned long last_accelerate;
+   /* Last jiffy CBs were accelerated. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 };
@@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct 
rcu_state *rsp,
 struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
 static void __cpuinit rcu_prepare_kthreads(int cpu);
-static void 

[PATCH tip/core/rcu 1/2] rcu: Provide RCU CPU stall warnings for tiny RCU

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Tiny RCU has historically omitted RCU CPU stall warnings in order to
reduce memory requirements, however, lack of these warnings caused
Thomas Gleixner some debugging pain recently.  Therefore, this commit
adds RCU CPU stall warnings to tiny RCU if RCU_TRACE=y.  This keeps
the memory footprint small, while still enabling CPU stall warnings
in kernels built to enable them.

Updated to include Josh Triplett's suggested use of RCU_STALL_COMMON
config variable to simplify #if expressions.

Reported-by: Thomas Gleixner 
Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 init/Kconfig|8 ++
 kernel/rcu.h|7 ++
 kernel/rcupdate.c   |   51 ++
 kernel/rcutiny.c|6 +++-
 kernel/rcutiny_plugin.h |   56 +++
 kernel/rcutree.c|   46 ++
 kernel/rcutree.h|5 
 lib/Kconfig.debug   |2 +-
 8 files changed, 130 insertions(+), 51 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 7d30240..a5e90e1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -486,6 +486,14 @@ config PREEMPT_RCU
  This option enables preemptible-RCU code that is common between
  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
+config RCU_STALL_COMMON
+   def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
+   help
+ This option enables RCU CPU stall code that is common between
+ the TINY and TREE variants of RCU.  The purpose is to allow
+ the tiny variants to disable RCU CPU stall warnings, while
+ making these warnings mandatory for the tree variants.
+
 config CONTEXT_TRACKING
bool
 
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 20dfba5..7f8e759 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head 
*head)
 
 extern int rcu_expedited;
 
+#ifdef CONFIG_RCU_STALL_COMMON
+
+extern int rcu_cpu_stall_suppress;
+int rcu_jiffies_till_stall_check(void);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
 #endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf761..076730d 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -412,3 +412,54 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #else
 #define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 #endif
+
+#ifdef CONFIG_RCU_STALL_COMMON
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA 0
+#endif
+
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
+module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);
+
+int rcu_jiffies_till_stall_check(void)
+{
+   int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+   /*
+* Limit check must be consistent with the Kconfig limits
+* for CONFIG_RCU_CPU_STALL_TIMEOUT.
+*/
+   if (till_stall_check < 3) {
+   ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+   till_stall_check = 3;
+   } else if (till_stall_check > 300) {
+   ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+   till_stall_check = 300;
+   }
+   return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+   rcu_cpu_stall_suppress = 1;
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+   .notifier_call = rcu_panic,
+};
+
+static int __init check_cpu_stall_init(void)
+{
+   atomic_notifier_chain_register(_notifier_list, _panic_block);
+   return 0;
+}
+early_initcall(check_cpu_stall_init);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e7dce58..b899df3 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
   void (*func)(struct rcu_head *rcu),
   struct rcu_ctrlblk *rcp);
 
-#include "rcutiny_plugin.h"
-
 static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 
+#include "rcutiny_plugin.h"
+
 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. 
*/
 static void rcu_idle_enter_common(long long newval)
 {
@@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
+   reset_cpu_stall_ticks(rcp);
if (rcp->rcucblist != NULL &&
rcp->donetail != rcp->curtail) {
rcp->donetail = rcp->curtail;
@@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
+   check_cpu_stalls();
if (user || 

[PATCH tip/core/rcu 04/12] rcu: Export RCU_FAST_NO_HZ parameters to sysfs

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

RCU_FAST_NO_HZ operation is controlled by four compile-time C-preprocessor
macros, but some use cases benefit greatly from runtime adjustment,
particularly when tuning devices.  This commit therefore creates the
corresponding sysfs entries.

Reported-by: Robin Randhawa 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree_plugin.h |   31 ---
 1 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c016444..28185ad 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1617,6 +1617,15 @@ static void rcu_idle_count_callbacks_posted(void)
 #define RCU_IDLE_GP_DELAY 4/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)/* Roughly six seconds. */
 
+static int rcu_idle_flushes = RCU_IDLE_FLUSHES;
+module_param(rcu_idle_flushes, int, 0644);
+static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES;
+module_param(rcu_idle_opt_flushes, int, 0644);
+static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
+module_param(rcu_idle_gp_delay, int, 0644);
+static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
+module_param(rcu_idle_lazy_gp_delay, int, 0644);
+
 extern int tick_nohz_enabled;
 
 /*
@@ -1696,10 +1705,10 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
}
/* Set up for the possibility that RCU will post a timer. */
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
-   *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
- RCU_IDLE_GP_DELAY) - jiffies;
+   *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies,
+ rcu_idle_gp_delay) - jiffies;
} else {
-   *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
+   *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay;
*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
}
return 0;
@@ -1805,11 +1814,11 @@ static void rcu_prepare_for_idle(int cpu)
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
trace_rcu_prep_idle("User dyntick with callbacks");
rdtp->idle_gp_timer_expires =
-   round_up(jiffies + RCU_IDLE_GP_DELAY,
-RCU_IDLE_GP_DELAY);
+   round_up(jiffies + rcu_idle_gp_delay,
+rcu_idle_gp_delay);
} else if (rcu_cpu_has_callbacks(cpu)) {
rdtp->idle_gp_timer_expires =
-   round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
+   round_jiffies(jiffies + rcu_idle_lazy_gp_delay);
trace_rcu_prep_idle("User dyntick with lazy callbacks");
} else {
return;
@@ -1861,8 +1870,8 @@ static void rcu_prepare_for_idle(int cpu)
/* Check and update the ->dyntick_drain sequencing. */
if (rdtp->dyntick_drain <= 0) {
/* First time through, initialize the counter. */
-   rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
-   } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
+   rdtp->dyntick_drain = rcu_idle_flushes;
+   } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes &&
   !rcu_pending(cpu) &&
   !local_softirq_pending()) {
/* Can we go dyntick-idle despite still having callbacks? */
@@ -1871,11 +1880,11 @@ static void rcu_prepare_for_idle(int cpu)
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
trace_rcu_prep_idle("Dyntick with callbacks");
rdtp->idle_gp_timer_expires =
-   round_up(jiffies + RCU_IDLE_GP_DELAY,
-RCU_IDLE_GP_DELAY);
+   round_up(jiffies + rcu_idle_gp_delay,
+rcu_idle_gp_delay);
} else {
rdtp->idle_gp_timer_expires =
-   round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
+   round_jiffies(jiffies + rcu_idle_lazy_gp_delay);
trace_rcu_prep_idle("Dyntick with lazy callbacks");
}
tp = >idle_gp_timer;
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 05/12] rcu: Accelerate RCU callbacks at grace-period end

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Now that callback acceleration is idempotent, it is safe to accelerate
callbacks during grace-period cleanup on any CPUs that the kthread happens
to be running on.  This commit therefore propagates the completion
of the grace period to the per-CPU data structures, and also adds an
rcu_advance_cbs() just before the cpu_needs_another_gp() check in order
to reduce false-positive grace periods.

Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c |   21 +
 1 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 074cb2d..2015bce 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1396,6 +1396,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rcu_for_each_node_breadth_first(rsp, rnp) {
raw_spin_lock_irq(>lock);
rnp->completed = rsp->gpnum;
+   rdp = this_cpu_ptr(rsp->rda);
+   if (rnp == rdp->mynode)
+   __rcu_process_gp_end(rsp, rnp, rdp);
nocb += rcu_nocb_gp_cleanup(rsp, rnp);
raw_spin_unlock_irq(>lock);
cond_resched();
@@ -1408,6 +1411,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
rsp->fqs_state = RCU_GP_IDLE;
rdp = this_cpu_ptr(rsp->rda);
+   rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
if (cpu_needs_another_gp(rsp, rdp))
rsp->gp_flags = 1;
raw_spin_unlock_irq(>lock);
@@ -1497,6 +1501,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
 
+   /*
+* If there is no grace period in progress right now, any
+* callbacks we have up to this point will be satisfied by the
+* next grace period.  Also, advancing the callbacks reduces the
+* probability of false positives from cpu_needs_another_gp()
+* resulting in pointless grace periods.  So, advance callbacks!
+*/
+   rcu_advance_cbs(rsp, rnp, rdp);
+
if (!rsp->gp_kthread ||
!cpu_needs_another_gp(rsp, rdp)) {
/*
@@ -1509,14 +1522,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
return;
}
 
-   /*
-* Because there is no grace period in progress right now,
-* any callbacks we have up to this point will be satisfied
-* by the next grace period.  So this is a good place to
-* assign a grace period number to recently posted callbacks.
-*/
-   rcu_accelerate_cbs(rsp, rnp, rdp);
-
rsp->gp_flags = RCU_GP_FLAG_INIT;
raw_spin_unlock(>lock); /* Interrupts remain disabled. */
 
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 07/12] rcu: Rearrange locking in rcu_start_gp()

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

If CPUs are to give prior notice of needed grace periods, it will be
necessary to invoke rcu_start_gp() without dropping the root rcu_node
structure's ->lock.  This commit takes a first step in this direction
by moving the release of this lock to the end of rcu_start_gp().

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7b1d776..2c6a931 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1521,16 +1521,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
raw_spin_unlock_irqrestore(>lock, flags);
return;
}
-
rsp->gp_flags = RCU_GP_FLAG_INIT;
-   raw_spin_unlock(>lock); /* Interrupts remain disabled. */
 
/* Ensure that CPU is aware of completion of last grace period. */
-   rcu_process_gp_end(rsp, rdp);
-   local_irq_restore(flags);
+   __rcu_process_gp_end(rsp, rdp->mynode, rdp);
 
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(>gp_wq);
+   raw_spin_unlock_irqrestore(>lock, flags);
 }
 
 /*
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 0/2] v2 Tiny RCU changes for 3.9

2013-01-26 Thread Paul E. McKenney
Hello!

This series provides a couple of tiny-RCU changes:

1.  Make Tiny RCU emit RCU CPU stall warnings when RCU_TRACE=y.
2.  Allow TREE_PREEMPT_RCU to be used on UP systems.

Changes since v1:

o   Updated #1 as suggested by Josh Triplett to simplify the
#if expressions.
o   Added #2.

Thanx, Paul


 b/init/Kconfig|   12 +
 b/kernel/rcu.h|7 +
 b/kernel/rcupdate.c   |   51 +
 b/kernel/rcutiny.c|6 +++-
 b/kernel/rcutiny_plugin.h |   56 ++
 b/kernel/rcutree.c|   46 ++---
 b/kernel/rcutree.h|5 
 b/lib/Kconfig.debug   |2 -
 8 files changed, 133 insertions(+), 52 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 1/2] rcu: Tag callback lists with corresponding grace-period number

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, callbacks are advanced each time the corresponding CPU
notices a change in its leaf rcu_node structure's ->completed value
(this value counts grace-period completions).  This approach has worked
quite well, but with the advent of RCU_FAST_NO_HZ, we cannot count on
a given CPU seeing all the grace-period completions.  When a CPU misses
a grace-period completion that occurs while it is in dyntick-idle mode,
this will delay invocation of its callbacks.

In addition, acceleration of callbacks (when RCU realizes that a given
callback need only wait until the end of the next grace period, rather
than having to wait for a partial grace period followed by a full
grace period) must be carried out extremely carefully.  Insufficient
acceleration will result in unnecessarily long grace-period latencies,
while excessive acceleration will result in premature callback invocation.
Changes that involve this tradeoff are therefore among the most
nerve-wracking changes to RCU.

This commit therefore explicitly tags groups of callbacks with the
number of the grace period that they are waiting for.  This means that
callback-advancement and callback-acceleration functions are idempotent,
so that excessive acceleration will merely waste a few CPU cycles.  This
also allows a CPU to take full advantage of any grace periods that have
elapsed while it has been in dyntick-idle mode.  It should also enable
simulataneous simplifications to and optimizations of RCU_FAST_NO_HZ.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c |  195 ++
 kernel/rcutree.h |2 +
 2 files changed, 169 insertions(+), 28 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77..ac6a75d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -305,17 +305,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
 }
 
 /*
- * Does the current CPU require a yet-as-unscheduled grace period?
+ * Does the current CPU require a not-yet-started grace period?
+ * The caller must have disabled interrupts to prevent races with
+ * normal callback registry.
  */
 static int
 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 {
-   struct rcu_head **ntp;
+   int i;
 
-   ntp = rdp->nxttail[RCU_DONE_TAIL +
-  (ACCESS_ONCE(rsp->completed) != rdp->completed)];
-   return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
-  !rcu_gp_in_progress(rsp);
+   if (rcu_gp_in_progress(rsp))
+   return 0;  /* No, a grace period is already in progress. */
+   if (!rdp->nxttail[RCU_NEXT_TAIL])
+   return 0;  /* No, this is a no-CBs (or offline) CPU. */
+   if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
+   return 1;  /* Yes, this CPU has newly registered callbacks. */
+   for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
+   if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
+   ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
+rdp->nxtcompleted[i]))
+   return 1;  /* Yes, CBs for future grace period. */
+   return 0; /* No grace period needed. */
 }
 
 /*
@@ -1071,6 +1081,139 @@ static void init_callback_list(struct rcu_data *rdp)
 }
 
 /*
+ * Determine the value that ->completed will have at the end of the
+ * next subsequent grace period.  This is used to tag callbacks so that
+ * a CPU can invoke callbacks in a timely fashion even if that CPU has
+ * been dyntick-idle for an extended period with callbacks under the
+ * influence of RCU_FAST_NO_HZ.
+ *
+ * The caller must hold rnp->lock with interrupts disabled.
+ */
+static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
+  struct rcu_node *rnp)
+{
+   /*
+* If RCU is idle, we just wait for the next grace period.
+* But we can only be sure that RCU is idle if we are looking
+* at the root rcu_node structure -- otherwise, a new grace
+* period might have started, but just not yet gotten around
+* to initializing the current non-root rcu_node structure.
+*/
+   if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
+   return rnp->completed + 1;
+
+   /*
+* Otherwise, wait for a possible partial grace period and
+* then the subsequent full grace period.
+*/
+   return rnp->completed + 2;
+}
+
+/*
+ * If there is room, assign a ->completed number to any callbacks on
+ * this CPU that have not already been assigned.  Also accelerate any
+ * callbacks that were previously assigned a ->completed number that has
+ * since proven to be too conservative, which can happen if callbacks get
+ * assigned a ->completed number while RCU is idle, but with reference to
+ * a non-root rcu_node structure.  This function is idempotent, so it does
+ * not hurt to call it 

[PATCH tip/core/rcu 2/2] rcu: Trace callback acceleration

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

This commit adds event tracing for callback acceleration to allow better
tracking of callbacks through the system.

Signed-off-by: Paul E. McKenney 
---
 include/trace/events/rcu.h |6 --
 kernel/rcutree.c   |6 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b..5678114 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization,
  * of a new grace period or the end of an old grace period ("cpustart"
  * and "cpuend", respectively), a CPU passing through a quiescent
  * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
- * and "cpuofl", respectively), and a CPU being kicked for being too
- * long in dyntick-idle mode ("kick").
+ * and "cpuofl", respectively), a CPU being kicked for being too
+ * long in dyntick-idle mode ("kick"), a CPU accelerating its new
+ * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU
+ * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB").
  */
 TRACE_EVENT(rcu_grace_period,
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ac6a75d..e9dce4f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1168,6 +1168,12 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, 
struct rcu_node *rnp,
rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtcompleted[i] = c;
}
+
+   /* Trace depending on how much we were able to accelerate. */
+   if (!*rdp->nxttail[RCU_WAIT_TAIL])
+   trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB");
+   else
+   trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB");
 }
 
 /*
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 11/12] rcu: Abstract rcu_start_future_gp() from rcu_nocb_wait_gp()

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

CPUs going idle will need to record the need for a future grace
period, but won't actually need to block waiting on it.  This commit
therefore splits rcu_start_future_gp(), which does the recording, from
rcu_nocb_wait_gp(), which now invokes rcu_start_future_gp() to do the
recording, after which rcu_nocb_wait_gp() does the waiting.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c|  123 +--
 kernel/rcutree.h|2 +-
 kernel/rcutree_plugin.h |  104 
 3 files changed, 130 insertions(+), 99 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0d53295..f4b23f1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -224,6 +224,7 @@ static ulong jiffies_till_next_fqs = 
RCU_JIFFIES_TILL_FORCE_QS;
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
 
+static void rcu_start_gp(struct rcu_state *rsp);
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(int cpu);
@@ -1075,6 +1076,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state 
*rsp,
 }
 
 /*
+ * Trace-event helper function for rcu_start_future_gp() and
+ * rcu_nocb_wait_gp().
+ */
+static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+   unsigned long c, char *s)
+{
+   trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+ rnp->completed, c, rnp->level,
+ rnp->grplo, rnp->grphi, s);
+}
+
+/*
+ * Start some future grace period, as needed to handle newly arrived
+ * callbacks.  The required future grace periods are recorded in each
+ * rcu_node structure's ->need_future_gp field.
+ *
+ * The caller must hold the specified rcu_node structure's ->lock.
+ */
+static unsigned long __maybe_unused
+rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+{
+   unsigned long c;
+   int i;
+   struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+
+   /*
+* Pick up grace-period number for new callbacks.  If this
+* grace period is already marked as needed, return to the caller.
+*/
+   c = rcu_cbs_completed(rdp->rsp, rnp);
+   trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+   if (rnp->need_future_gp[c & 0x1]) {
+   trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+   return c;
+   }
+
+   /*
+* If either this rcu_node structure or the root rcu_node structure
+* believe that a grace period is in progress, then we must wait
+* for the one following, which is in "c".  Because our request
+* will be noticed at the end of the current grace period, we don't
+* need to explicitly start one.
+*/
+   if (rnp->gpnum != rnp->completed ||
+   ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+   rnp->need_future_gp[c & 0x1]++;
+   trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+   return c;
+   }
+
+   /*
+* There might be no grace period in progress.  If we don't already
+* hold it, acquire the root rcu_node structure's lock in order to
+* start one (if needed).
+*/
+   if (rnp != rnp_root)
+   raw_spin_lock(_root->lock);
+
+   /*
+* Get a new grace-period number.  If there really is no grace
+* period in progress, it will be smaller than the one we obtained
+* earlier.  Adjust callbacks as needed.  Note that even no-CBs
+* CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+*/
+   c = rcu_cbs_completed(rdp->rsp, rnp_root);
+   for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
+   if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
+   rdp->nxtcompleted[i] = c;
+
+   /*
+* If the needed for the required grace period is already
+* recorded, trace and leave.
+*/
+   if (rnp_root->need_future_gp[c & 0x1]) {
+   trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+   goto unlock_out;
+   }
+
+   /* Record the need for the future grace period. */
+   rnp_root->need_future_gp[c & 0x1]++;
+
+   /* If a grace period is not already in progress, start one. */
+   if (rnp_root->gpnum != rnp_root->completed) {
+   trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+   } else {
+   trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+   rcu_start_gp(rdp->rsp);
+   }
+unlock_out:
+   if (rnp != rnp_root)
+   raw_spin_unlock(_root->lock);
+   return c;
+}
+
+/*
+ * Clean up any old requests for the just-ended grace period.  Also return
+ * 

[PATCH tip/core/rcu 2/2] rcu: Allow TREE_PREEMPT_RCU on UP systems

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

The TINY_PREEMPT_RCU is complex, does not provide that much memory
savings, and therefore TREE_PREEMPT_RCU should be used instead.  The
systems where the difference between TINY_PREEMPT_RCU and TREE_PREEMPT_RCU
are quite small compared to the memory footprint of CONFIG_PREEMPT.

This commit therefore takes a first step towards eliminating
TINY_PREEMPT_RCU by allowing TREE_PREEMPT_RCU to be configured on !SMP
systems.

Signed-off-by: Paul E. McKenney 
---
 init/Kconfig |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index a5e90e1..fb19b46 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -453,7 +453,7 @@ config TREE_RCU
 
 config TREE_PREEMPT_RCU
bool "Preemptible tree-based hierarchical RCU"
-   depends on PREEMPT && SMP
+   depends on PREEMPT
help
  This option selects the RCU implementation that is
  designed for very large SMP systems with hundreds or
@@ -461,6 +461,8 @@ config TREE_PREEMPT_RCU
  is also required.  It also scales down nicely to
  smaller systems.
 
+ Select this option if you are unsure.
+
 config TINY_RCU
bool "UP-only small-memory-footprint RCU"
depends on !PREEMPT && !SMP
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 12/12] rcu: Make rcu_accelerate_cbs() note need for future grace periods

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Now that rcu_start_future_gp() has been abstracted from
rcu_nocb_wait_gp(), rcu_accelerate_cbs() can invoke rcu_start_future_gp()
so as to register the need for any future grace periods needed by a
CPU about to enter dyntick-idle mode.  This commit makes this change.
Note that some refactoring of rcu_start_gp() is carried out to avoid
recursion and subsequent self-deadlocks.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutree.c |   50 --
 1 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f4b23f1..9cb91e4 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -224,7 +224,8 @@ static ulong jiffies_till_next_fqs = 
RCU_JIFFIES_TILL_FORCE_QS;
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
 
-static void rcu_start_gp(struct rcu_state *rsp);
+static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp);
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(int cpu);
@@ -1162,7 +1163,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data 
*rdp)
trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
} else {
trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
-   rcu_start_gp(rdp->rsp);
+   rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
}
 unlock_out:
if (rnp != rnp_root)
@@ -1248,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, 
struct rcu_node *rnp,
rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
rdp->nxtcompleted[i] = c;
}
+   /* Record any needed additional grace periods. */
+   rcu_start_future_gp(rnp, rdp);
 
/* Trace depending on how much we were able to accelerate. */
if (!*rdp->nxttail[RCU_WAIT_TAIL])
@@ -1609,20 +1612,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
  * quiescent state.
  */
 static void
-rcu_start_gp(struct rcu_state *rsp)
+rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp)
 {
-   struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-   struct rcu_node *rnp = rcu_get_root(rsp);
-
-   /*
-* If there is no grace period in progress right now, any
-* callbacks we have up to this point will be satisfied by the
-* next grace period.  Also, advancing the callbacks reduces the
-* probability of false positives from cpu_needs_another_gp()
-* resulting in pointless grace periods.  So, advance callbacks!
-*/
-   rcu_advance_cbs(rsp, rnp, rdp);
-
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
/*
 * Either we have not yet spawned the grace-period
@@ -1634,14 +1626,36 @@ rcu_start_gp(struct rcu_state *rsp)
}
rsp->gp_flags = RCU_GP_FLAG_INIT;
 
-   /* Ensure that CPU is aware of completion of last grace period. */
-   __rcu_process_gp_end(rsp, rdp->mynode, rdp);
-
/* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(>gp_wq);
 }
 
 /*
+ * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
+ * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
+ * is invoked indirectly from rcu_advance_cbs(), which would result in
+ * endless recursion -- or would do so if it wasn't for the self-deadlock
+ * that is encountered beforehand.
+ */
+static void
+rcu_start_gp(struct rcu_state *rsp)
+{
+   struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+   struct rcu_node *rnp = rcu_get_root(rsp);
+
+   /*
+* If there is no grace period in progress right now, any
+* callbacks we have up to this point will be satisfied by the
+* next grace period.  Also, advancing the callbacks reduces the
+* probability of false positives from cpu_needs_another_gp()
+* resulting in pointless grace periods.  So, advance callbacks
+* then start the grace period!
+*/
+   rcu_advance_cbs(rsp, rnp, rdp);
+   rcu_start_gp_advanced(rsp, rnp, rdp);
+}
+
+/*
  * Report a full set of quiescent states to the specified rcu_state
  * data structure.  This involves cleaning up after the prior grace
  * period and letting rcu_start_gp() start up the next grace period
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/11] ksm: remove old stable nodes more thoroughly

2013-01-26 Thread Simon Jeons
Hi Hugh,
On Fri, 2013-01-25 at 18:01 -0800, Hugh Dickins wrote:
> Switching merge_across_nodes after running KSM is liable to oops on stale
> nodes still left over from the previous stable tree.  It's not something
> that people will often want to do, but it would be lame to demand a reboot
> when they're trying to determine which merge_across_nodes setting is best.
> 
> How can this happen?  We only permit switching merge_across_nodes when
> pages_shared is 0, and usually set run 2 to force that beforehand, which
> ought to unmerge everything: yet oopses still occur when you then run 1.
> 
> Three causes:
> 
> 1. The old stable tree (built according to the inverse merge_across_nodes)
> has not been fully torn down.  A stable node lingers until get_ksm_page()
> notices that the page it references no longer references it: but the page
> is not necessarily freed as soon as expected, particularly when swapcache.
> 

When can this happen?  

> Fix this with a pass through the old stable tree, applying get_ksm_page()
> to each of the remaining nodes (most found stale and removed immediately),
> with forced removal of any left over.  Unless the page is still mapped:
> I've not seen that case, it shouldn't occur, but better to WARN_ON_ONCE
> and EBUSY than BUG.
> 
> 2. __ksm_enter() has a nice little optimization, to insert the new mm
> just behind ksmd's cursor, so there's a full pass for it to stabilize
> (or be removed) before ksmd addresses it.  Nice when ksmd is running,
> but not so nice when we're trying to unmerge all mms: we were missing
> those mms forked and inserted behind the unmerge cursor.  Easily fixed
> by inserting at the end when KSM_RUN_UNMERGE.

mms forked will be unmerged just after ksmd's cursor since they're
inserted behind it, why will be missing?

> 
> 3. It is possible for a KSM page to be faulted back from swapcache into
> an mm, just after unmerge_and_remove_all_rmap_items() scanned past it.
> Fix this by copying on fault when KSM_RUN_UNMERGE: but that is private
> to ksm.c, so dissolve the distinction between ksm_might_need_to_copy()
> and ksm_does_need_to_copy(), doing it all in the one call into ksm.c.
> 

Make sense. :)

> A long outstanding, unrelated bugfix sneaks in with that third fix:
> ksm_does_need_to_copy() would copy from a !PageUptodate page (implying
> I/O error when read in from swap) to a page which it then marks Uptodate.
> Fix this case by not copying, letting do_swap_page() discover the error.
> 
> Signed-off-by: Hugh Dickins 
> ---
>  include/linux/ksm.h |   18 ++---
>  mm/ksm.c|   83 +++---
>  mm/memory.c |   19 -
>  3 files changed, 92 insertions(+), 28 deletions(-)
> 
> --- mmotm.orig/include/linux/ksm.h2013-01-25 14:27:58.220193250 -0800
> +++ mmotm/include/linux/ksm.h 2013-01-25 14:37:00.764206145 -0800
> @@ -16,9 +16,6 @@
>  struct stable_node;
>  struct mem_cgroup;
>  
> -struct page *ksm_does_need_to_copy(struct page *page,
> - struct vm_area_struct *vma, unsigned long address);
> -
>  #ifdef CONFIG_KSM
>  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
>   unsigned long end, int advice, unsigned long *vm_flags);
> @@ -73,15 +70,8 @@ static inline void set_page_stable_node(
>   * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
>   * but what if the vma was unmerged while the page was swapped out?
>   */
> -static inline int ksm_might_need_to_copy(struct page *page,
> - struct vm_area_struct *vma, unsigned long address)
> -{
> - struct anon_vma *anon_vma = page_anon_vma(page);
> -
> - return anon_vma &&
> - (anon_vma->root != vma->anon_vma->root ||
> -  page->index != linear_page_index(vma, address));
> -}
> +struct page *ksm_might_need_to_copy(struct page *page,
> + struct vm_area_struct *vma, unsigned long address);
>  
>  int page_referenced_ksm(struct page *page,
>   struct mem_cgroup *memcg, unsigned long *vm_flags);
> @@ -113,10 +103,10 @@ static inline int ksm_madvise(struct vm_
>   return 0;
>  }
>  
> -static inline int ksm_might_need_to_copy(struct page *page,
> +static inline struct page *ksm_might_need_to_copy(struct page *page,
>   struct vm_area_struct *vma, unsigned long address)
>  {
> - return 0;
> + return page;
>  }
>  
>  static inline int page_referenced_ksm(struct page *page,
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:36:58.856206099 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:37:00.768206145 -0800
> @@ -644,6 +644,57 @@ static int unmerge_ksm_pages(struct vm_a
>  /*
>   * Only called through the sysfs control interface:
>   */
> +static int remove_stable_node(struct stable_node *stable_node)
> +{
> + struct page *page;
> + int err;
> +
> + page = get_ksm_page(stable_node, true);
> + if (!page) {
> + /*
> +  * get_ksm_page did 

Re: [patch v4 0/18] sched: simplified fork, release load avg and power awareness scheduling

2013-01-26 Thread Mike Galbraith
On Sun, 2013-01-27 at 10:41 +0800, Alex Shi wrote: 
> On 01/24/2013 11:07 PM, Alex Shi wrote:
> > On 01/24/2013 05:44 PM, Borislav Petkov wrote:
> >> On Thu, Jan 24, 2013 at 11:06:42AM +0800, Alex Shi wrote:
> >>> Since the runnable info needs 345ms to accumulate, balancing
> >>> doesn't do well for many tasks burst waking. After talking with Mike
> >>> Galbraith, we are agree to just use runnable avg in power friendly 
> >>> scheduling and keep current instant load in performance scheduling for 
> >>> low latency.
> >>>
> >>> So the biggest change in this version is removing runnable load avg in
> >>> balance and just using runnable data in power balance.
> >>>
> >>> The patchset bases on Linus' tree, includes 3 parts,
> >>> ** 1, bug fix and fork/wake balancing clean up. patch 1~5,
> >>> --
> >>> the first patch remove one domain level. patch 2~5 simplified fork/wake
> >>> balancing, it can increase 10+% hackbench performance on our 4 sockets
> >>> SNB EP machine.
> >>
> >> Ok, I see some benchmarking results here and there in the commit
> >> messages but since this is touching the scheduler, you probably would
> >> need to make sure it doesn't introduce performance regressions vs
> >> mainline with a comprehensive set of benchmarks.
> >>
> > 
> > Thanks a lot for your comments, Borislav! :)
> > 
> > For this patchset, the code will just check current policy, if it is
> > performance, the code patch will back to original performance code at
> > once. So there should no performance change on performance policy.
> > 
> > I once tested the balance policy performance with benchmark
> > kbuild/hackbench/aim9/dbench/tbench on version 2, only hackbench has a
> > bit drop ~3%. others have no clear change.
> > 
> >> And, AFAICR, mainline does by default the 'performance' scheme by
> >> spreading out tasks to idle cores, so have you tried comparing vanilla
> >> mainline to your patchset in the 'performance' setting so that you can
> >> make sure there are no problems there? And not only hackbench or a
> >> microbenchmark but aim9 (I saw that in a commit message somewhere) and
> >> whatever else multithreaded benchmark you can get your hands on.
> >>
> >> Also, you might want to run it on other machines too, not only SNB :-)
> > 
> > Anyway I will redo the performance testing on this version again on all
> > machine. but doesn't expect something change. :)
> 
> Just rerun some benchmarks: kbuild, specjbb2005, oltp, tbench, aim9,
> hackbench, fileio-cfq of sysbench, dbench, aiostress, multhreads
> loopback netperf. on my core2, nhm, wsm, snb, platforms. no clear
> performance change found.

With aim7 compute on 4 node 40 core box, I see stable throughput
improvement at tasks = nr_cores and below w. balance and powersaving. 

 3.8.0-performance  3.8.0-balance   
   3.8.0-powersaving
Tasksjobs/min  jti  jobs/min/task  real   cpu   jobs/min  jti  
jobs/min/task  real   cpu   jobs/min  jti  jobs/min/task  real  
 cpu
1  432.86  100   432.8571 14.00  3.99 433.48  100   
433.4764 13.98  3.97 433.17  100   433.1665 13.99  3.98
1  437.23  100   437.2294 13.86  3.85 436.60  100   
436.5994 13.88  3.86 435.66  100   435.6578 13.91  3.90
1  434.10  100   434.0974 13.96  3.95 436.29  100   
436.2851 13.89  3.89 436.29  100   436.2851 13.89  3.87
5 2400.95   99   480.1902 12.62 12.492554.81   98   
510.9612 11.86  7.552487.68   98   497.5369 12.18  8.22
5 2341.58   99   468.3153 12.94 13.952578.72   99   
515.7447 11.75  7.252527.11   99   505.4212 11.99  7.90
5 2350.66   99   470.1319 12.89 13.662600.86   99   
520.1717 11.65  7.092508.28   98   501.6556 12.08  8.24
   10 4291.78   99   429.1785 14.12 40.145334.51   99   
533.4507 11.36 11.135183.92   98   518.3918 11.69 12.15
   10 4334.76   99   433.4764 13.98 38.705311.13   99   
531.1131 11.41 11.235215.15   99   521.5146 11.62 12.53
   10 4273.62   99   427.3625 14.18 40.295287.96   99   
528.7958 11.46 11.465144.31   98   514.4312 11.78 12.32
   20 8487.39   94   424.3697 14.28 63.14   10594.41   99   
529.7203 11.44 23.72   10575.92   99   528.7958 11.46 22.08
   20 8387.54   97   419.3772 14.45 77.01   10575.92   98   
528.7958 11.46 23.41   10520.83   99   526.0417 11.52 21.88
   20 8713.16   95   435.6578 13.91 55.10   10659.63   99   
532.9815 11.37 24.17   10539.13   99   526.9565 11.50 

[PATCH 08/10] mfd: wm5102: Refresh register defaults

2013-01-26 Thread Mark Brown
The WM5102 register defaults are not up to date with the current register
map, synchronise them with those for current devices.

Signed-off-by: Mark Brown 
---
 drivers/mfd/wm5102-tables.c |   95 +++
 1 file changed, 33 insertions(+), 62 deletions(-)

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 167e6c4..edee1da 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -224,11 +224,9 @@ const struct regmap_irq_chip wm5102_irq = {
 static const struct reg_default wm5102_reg_default[] = {
{ 0x0008, 0x0019 },   /* R8 - Ctrl IF SPI CFG 1 */ 
{ 0x0009, 0x0001 },   /* R9 - Ctrl IF I2C1 CFG 1 */ 
-   { 0x000D, 0x },   /* R13- Ctrl IF Status 1 */ 
{ 0x0016, 0x },   /* R22- Write Sequencer Ctrl 0 */ 
{ 0x0017, 0x },   /* R23- Write Sequencer Ctrl 1 */ 
{ 0x0018, 0x },   /* R24- Write Sequencer Ctrl 2 */ 
-   { 0x001A, 0x },   /* R26- Write Sequencer PROM */ 
{ 0x0020, 0x },   /* R32- Tone Generator 1 */ 
{ 0x0021, 0x1000 },   /* R33- Tone Generator 2 */ 
{ 0x0022, 0x },   /* R34- Tone Generator 3 */ 
@@ -243,12 +241,14 @@ static const struct reg_default wm5102_reg_default[] = {
{ 0x0062, 0x01FF },   /* R98- Sample Rate Sequence Select 2 */ 
{ 0x0063, 0x01FF },   /* R99- Sample Rate Sequence Select 3 */ 
{ 0x0064, 0x01FF },   /* R100   - Sample Rate Sequence Select 4 */ 
-   { 0x0068, 0x01FF },   /* R104   - Always On Triggers Sequence 
Select 1 */ 
-   { 0x0069, 0x01FF },   /* R105   - Always On Triggers Sequence 
Select 2 */ 
-   { 0x006A, 0x01FF },   /* R106   - Always On Triggers Sequence 
Select 3 */ 
-   { 0x006B, 0x01FF },   /* R107   - Always On Triggers Sequence 
Select 4 */ 
-   { 0x006C, 0x01FF },   /* R108   - Always On Triggers Sequence 
Select 5 */ 
-   { 0x006D, 0x01FF },   /* R109   - Always On Triggers Sequence 
Select 6 */ 
+   { 0x0066, 0x01FF },   /* R102   - Always On Triggers Sequence 
Select 1 */
+   { 0x0067, 0x01FF },   /* R103   - Always On Triggers Sequence 
Select 2 */
+   { 0x0068, 0x01FF },   /* R104   - Always On Triggers Sequence 
Select 3 */
+   { 0x0069, 0x01FF },   /* R105   - Always On Triggers Sequence 
Select 4 */
+   { 0x006A, 0x01FF },   /* R106   - Always On Triggers Sequence 
Select 5 */
+   { 0x006B, 0x01FF },   /* R107   - Always On Triggers Sequence 
Select 6 */
+   { 0x006E, 0x01FF },   /* R110   - Trigger Sequence Select 32 */
+   { 0x006F, 0x01FF },   /* R111   - Trigger Sequence Select 33 */
{ 0x0070, 0x },   /* R112   - Comfort Noise Generator */ 
{ 0x0090, 0x },   /* R144   - Haptics Control 1 */ 
{ 0x0091, 0x7FFF },   /* R145   - Haptics Control 2 */ 
@@ -258,13 +258,14 @@ static const struct reg_default wm5102_reg_default[] = {
{ 0x0095, 0x },   /* R149   - Haptics phase 2 duration */ 
{ 0x0096, 0x },   /* R150   - Haptics phase 3 intensity */ 
{ 0x0097, 0x },   /* R151   - Haptics phase 3 duration */ 
-   { 0x0100, 0x0001 },   /* R256   - Clock 32k 1 */ 
+   { 0x0100, 0x0002 },   /* R256   - Clock 32k 1 */
{ 0x0101, 0x0304 },   /* R257   - System Clock 1 */ 
{ 0x0102, 0x0011 },   /* R258   - Sample rate 1 */ 
{ 0x0103, 0x0011 },   /* R259   - Sample rate 2 */ 
{ 0x0104, 0x0011 },   /* R260   - Sample rate 3 */ 
{ 0x0112, 0x0305 },   /* R274   - Async clock 1 */ 
{ 0x0113, 0x0011 },   /* R275   - Async sample rate 1 */ 
+   { 0x0114, 0x0011 },   /* R276   - Async sample rate 2 */
{ 0x0149, 0x },   /* R329   - Output system clock */ 
{ 0x014A, 0x },   /* R330   - Output async clock */ 
{ 0x0152, 0x },   /* R338   - Rate Estimator 1 */ 
@@ -273,13 +274,14 @@ static const struct reg_default wm5102_reg_default[] = {
{ 0x0155, 0x },   /* R341   - Rate Estimator 4 */ 
{ 0x0156, 0x },   /* R342   - Rate Estimator 5 */ 
{ 0x0161, 0x },   /* R353   - Dynamic Frequency Scaling 1 */ 
-   { 0x0171, 0x },   /* R369   - FLL1 Control 1 */ 
+   { 0x0171, 0x0002 },   /* R369   - FLL1 Control 1 */
{ 0x0172, 0x0008 },   /* R370   - FLL1 Control 2 */ 
{ 0x0173, 0x0018 },   /* R371   - FLL1 Control 3 */ 
{ 0x0174, 0x007D },   /* R372   - FLL1 Control 4 */ 
{ 0x0175, 0x0004 },   /* R373   - FLL1 Control 5 */ 
{ 0x0176, 0x },   /* R374   - FLL1 Control 6 */ 
{ 0x0177, 0x0181 },   /* R375   - FLL1 Loop Filter Test 1 */ 
+   { 0x0178, 0x },   /* R376   - FLL1 NCO Test 0 */
{ 

[PATCH 01/10] mfd: wm5102: Mark DSP memory regions as volatile and readable

2013-01-26 Thread Mark Brown
We can cache some of them but this is simpler for now.

Signed-off-by: Mark Brown 
---
 drivers/mfd/wm5102-tables.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 088872a..4a01192 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1825,7 +1825,13 @@ static bool wm5102_readable_register(struct device *dev, 
unsigned int reg)
case ARIZONA_DSP1_STATUS_3:
return true;
default:
-   return false;
+   if ((reg >= 0x10 && reg < 0x106000) ||
+   (reg >= 0x18 && reg < 0x180800) ||
+   (reg >= 0x19 && reg < 0x194800) ||
+   (reg >= 0x1a8000 && reg < 0x1a9800))
+   return true;
+   else
+   return false;
}
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 05/10] mfd: arizona: Check errors from regcache_sync()

2013-01-26 Thread Mark Brown
If the control bus is unrelabile we may hit errors during regcache_sync(),
especially given that it tends to be one the most dense bursts of I/O in
many systems.

Signed-off-by: Mark Brown 
---
 drivers/mfd/arizona-core.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index a8b8a7b..1ab02a7 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -239,7 +239,12 @@ static int arizona_runtime_resume(struct device *dev)
return ret;
}
 
-   regcache_sync(arizona->regmap);
+   ret = regcache_sync(arizona->regmap);
+   if (ret != 0) {
+   dev_err(arizona->dev, "Failed to restore register cache\n");
+   regulator_disable(arizona->dcvdd);
+   return ret;
+   }
 
return 0;
 }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 07/10] mfd: wm5102: Add registers for microphone detection level configuration

2013-01-26 Thread Mark Brown
Signed-off-by: Mark Brown 
---
 drivers/mfd/wm5102-tables.c |8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 0317d11..167e6c4 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -315,6 +315,10 @@ static const struct reg_default wm5102_reg_default[] = {
{ 0x02A3, 0x1102 },   /* R675   - Mic Detect 1 */ 
{ 0x02A4, 0x009F },   /* R676   - Mic Detect 2 */ 
{ 0x02A5, 0x },   /* R677   - Mic Detect 3 */ 
+   { 0x02A6, 0x3737 },   /* R678   - Mic Detect Level 1 */
+   { 0x02A7, 0x372C },   /* R679   - Mic Detect Level 2 */
+   { 0x02A8, 0x1422 },   /* R680   - Mic Detect Level 3 */
+   { 0x02A9, 0x300A },   /* R681   - Mic Detect Level 4 */
{ 0x02C3, 0x },   /* R707   - Mic noise mix control 1 */ 
{ 0x02CB, 0x },   /* R715   - Isolation control */ 
{ 0x02D3, 0x },   /* R723   - Jack detect analogue */ 
@@ -1109,6 +1113,10 @@ static bool wm5102_readable_register(struct device *dev, 
unsigned int reg)
case ARIZONA_MIC_DETECT_1:
case ARIZONA_MIC_DETECT_2:
case ARIZONA_MIC_DETECT_3:
+   case ARIZONA_MIC_DETECT_LEVEL_1:
+   case ARIZONA_MIC_DETECT_LEVEL_2:
+   case ARIZONA_MIC_DETECT_LEVEL_3:
+   case ARIZONA_MIC_DETECT_LEVEL_4:
case ARIZONA_MIC_NOISE_MIX_CONTROL_1:
case ARIZONA_ISOLATION_CONTROL:
case ARIZONA_JACK_DETECT_ANALOGUE:
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 04/10] mfd: arizona: Disable control interface reporting for WM5102 and WM5110

2013-01-26 Thread Mark Brown
Rather than disabling the error reporting only for earlier revisions
unconditionally disable it.

Signed-off-by: Mark Brown 
---
 drivers/mfd/arizona-irq.c |   18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c
index 74713bf..2bec5f0 100644
--- a/drivers/mfd/arizona-irq.c
+++ b/drivers/mfd/arizona-irq.c
@@ -176,14 +176,7 @@ int arizona_irq_init(struct arizona *arizona)
aod = _aod;
irq = _irq;
 
-   switch (arizona->rev) {
-   case 0:
-   case 1:
-   ctrlif_error = false;
-   break;
-   default:
-   break;
-   }
+   ctrlif_error = false;
break;
 #endif
 #ifdef CONFIG_MFD_WM5110
@@ -191,14 +184,7 @@ int arizona_irq_init(struct arizona *arizona)
aod = _aod;
irq = _irq;
 
-   switch (arizona->rev) {
-   case 0:
-   case 1:
-   ctrlif_error = false;
-   break;
-   default:
-   break;
-   }
+   ctrlif_error = false;
break;
 #endif
default:
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 06/10] mfd: arizona: Allow customisation of microphone detection levels

2013-01-26 Thread Mark Brown
The microphone detection levels for Arizona parts can be customised.
Allow this to be done via platform data, the values chosen will depend
on the system design and determined in discussion with Wolfson.

Signed-off-by: Mark Brown 
---
 drivers/mfd/arizona-core.c|8 
 include/linux/mfd/arizona/pdata.h |5 +
 include/linux/mfd/arizona/registers.h |4 
 3 files changed, 17 insertions(+)

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 1ab02a7..5f0de90 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -458,6 +458,14 @@ int arizona_dev_init(struct arizona *arizona)
 arizona->pdata.gpio_defaults[i]);
}
 
+   for (i = 0; i < ARRAY_SIZE(arizona->pdata.micd_level); i++) {
+   if (!arizona->pdata.micd_level[i])
+   continue;
+
+   regmap_write(arizona->regmap, ARIZONA_MIC_DETECT_LEVEL_1 + i,
+arizona->pdata.micd_level[i]);
+   }
+
pm_runtime_set_autosuspend_delay(arizona->dev, 100);
pm_runtime_use_autosuspend(arizona->dev);
pm_runtime_enable(arizona->dev);
diff --git a/include/linux/mfd/arizona/pdata.h 
b/include/linux/mfd/arizona/pdata.h
index 8b1d1da..73822bd 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -67,6 +67,8 @@
 
 #define ARIZONA_MAX_PDM_SPK 2
 
+#define ARIZONA_NUM_MICD_LEVEL 4
+
 struct regulator_init_data;
 
 struct arizona_micd_config {
@@ -99,6 +101,9 @@ struct arizona_pdata {
/** GPIO for mic detection polarity */
int micd_pol_gpio;
 
+   /** Mic detect level parameters */
+   int micd_level[ARIZONA_NUM_MICD_LEVEL];
+
/** Headset polarity configurations */
struct arizona_micd_config *micd_configs;
int num_micd_configs;
diff --git a/include/linux/mfd/arizona/registers.h 
b/include/linux/mfd/arizona/registers.h
index 1f6fe31..fb3a1b8 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -122,6 +122,10 @@
 #define ARIZONA_MIC_DETECT_1 0x2A3
 #define ARIZONA_MIC_DETECT_2 0x2A4
 #define ARIZONA_MIC_DETECT_3 0x2A5
+#define ARIZONA_MIC_DETECT_LEVEL_1  0x2A6
+#define ARIZONA_MIC_DETECT_LEVEL_2  0x2A7
+#define ARIZONA_MIC_DETECT_LEVEL_3  0x2A8
+#define ARIZONA_MIC_DETECT_LEVEL_4  0x2A9
 #define ARIZONA_MIC_NOISE_MIX_CONTROL_1  0x2C3
 #define ARIZONA_ISOLATION_CONTROL0x2CB
 #define ARIZONA_JACK_DETECT_ANALOGUE 0x2D3
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 02/10] mfd: wm5102: Mark only extant DSP registers volatile

2013-01-26 Thread Mark Brown
Since regmap sometimes uses volatile as a proxy for readable simply
having a blanket condition can mark too many registers as readable.

Signed-off-by: Mark Brown 
---
 drivers/mfd/wm5102-tables.c |   11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 4a01192..0317d11 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1837,9 +1837,6 @@ static bool wm5102_readable_register(struct device *dev, 
unsigned int reg)
 
 static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 {
-   if (reg > 0x)
-   return true;
-
switch (reg) {
case ARIZONA_SOFTWARE_RESET:
case ARIZONA_DEVICE_REVISION:
@@ -1884,7 +1881,13 @@ static bool wm5102_volatile_register(struct device *dev, 
unsigned int reg)
case ARIZONA_MIC_DETECT_3:
return true;
default:
-   return false;
+   if ((reg >= 0x10 && reg < 0x106000) ||
+   (reg >= 0x18 && reg < 0x180800) ||
+   (reg >= 0x19 && reg < 0x194800) ||
+   (reg >= 0x1a8000 && reg < 0x1a9800))
+   return true;
+   else
+   return false;
}
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 10/10] mfd: arizona: Disable interrupts during resume

2013-01-26 Thread Mark Brown
Runtime power management does not function during system suspend but the
Arizona devices need to use runtime power management to power up the device
in order to handle interrupts. Try to avoid interrupts firing during
resume by disabling the primary IRQ before interrupts are reenabled on
resume and only reenabling it again during main resume.

The goal is to avoid issues in the situation where an interrupt is asserted
during resume (eg, due to it being the wake source) and the interrupt
handling gets scheduled prior to the device being able to handle runtime
PM.

Signed-off-by: Mark Brown 
---
 drivers/mfd/arizona-core.c |   26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 5f0de90..0bb79c8 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -263,10 +263,36 @@ static int arizona_runtime_suspend(struct device *dev)
 }
 #endif
 
+#ifdef CONFIG_PM_SLEEP
+static int arizona_resume_noirq(struct device *dev)
+{
+   struct arizona *arizona = dev_get_drvdata(dev);
+
+   dev_dbg(arizona->dev, "Early resume, disabling IRQ\n");
+   disable_irq(arizona->irq);
+
+   return 0;
+}
+
+static int arizona_resume(struct device *dev)
+{
+   struct arizona *arizona = dev_get_drvdata(dev);
+
+   dev_dbg(arizona->dev, "Late resume, reenabling IRQ\n");
+   enable_irq(arizona->irq);
+
+   return 0;
+}
+#endif
+
 const struct dev_pm_ops arizona_pm_ops = {
SET_RUNTIME_PM_OPS(arizona_runtime_suspend,
   arizona_runtime_resume,
   NULL)
+   SET_SYSTEM_SLEEP_PM_OPS(NULL, arizona_resume)
+#ifdef CONFIG_PM_SLEEP
+   .resume_noirq = arizona_resume_noirq,
+#endif
 };
 EXPORT_SYMBOL_GPL(arizona_pm_ops);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 09/10] mfd: wm5102: Update rev B patch for latest evaluation

2013-01-26 Thread Mark Brown
The latest evaluation of the revision B silicon suggests some changes to
the tuning applied for optimal performance.

Signed-off-by: Mark Brown 
---
 drivers/mfd/wm5102-tables.c |   12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index edee1da..a687901 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -59,12 +59,14 @@ static const struct reg_default wm5102_reva_patch[] = {
 static const struct reg_default wm5102_revb_patch[] = {
{ 0x80, 0x0003 },
{ 0x081, 0xE022 },
-   { 0x410, 0x6080 },
-   { 0x418, 0x6080 },
-   { 0x420, 0x6080 },
+   { 0x410, 0x4080 },
+   { 0x418, 0x4080 },
+   { 0x420, 0x4080 },
{ 0x428, 0xC000 },
-   { 0x441, 0x8014 },
+   { 0x4B0, 0x0066 },
{ 0x458, 0x000b },
+   { 0x212, 0x },
+   { 0x171, 0x },
{ 0x80, 0x },
 };
 
@@ -274,7 +276,7 @@ static const struct reg_default wm5102_reg_default[] = {
{ 0x0155, 0x },   /* R341   - Rate Estimator 4 */ 
{ 0x0156, 0x },   /* R342   - Rate Estimator 5 */ 
{ 0x0161, 0x },   /* R353   - Dynamic Frequency Scaling 1 */ 
-   { 0x0171, 0x0002 },   /* R369   - FLL1 Control 1 */
+   { 0x0171, 0x },   /* R369   - FLL1 Control 1 */
{ 0x0172, 0x0008 },   /* R370   - FLL1 Control 2 */ 
{ 0x0173, 0x0018 },   /* R371   - FLL1 Control 3 */ 
{ 0x0174, 0x007D },   /* R372   - FLL1 Control 4 */ 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 03/10] mfd: arizona: Register MICVDD supply first to ensure no retries

2013-01-26 Thread Mark Brown
Not strictly required as probe deferral will take care of everything but
it makes boot a little smoother.

Reported-by: Ryo Tsutsui 
Signed-off-by: Mark Brown 
---
 drivers/mfd/arizona-core.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index bc8a3ed..a8b8a7b 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -270,19 +270,19 @@ static struct mfd_cell early_devs[] = {
 };
 
 static struct mfd_cell wm5102_devs[] = {
+   { .name = "arizona-micsupp" },
{ .name = "arizona-extcon" },
{ .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
-   { .name = "arizona-micsupp" },
{ .name = "arizona-pwm" },
{ .name = "wm5102-codec" },
 };
 
 static struct mfd_cell wm5110_devs[] = {
+   { .name = "arizona-micsupp" },
{ .name = "arizona-extcon" },
{ .name = "arizona-gpio" },
{ .name = "arizona-haptics" },
-   { .name = "arizona-micsupp" },
{ .name = "arizona-pwm" },
{ .name = "wm5110-codec" },
 };
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Include kernel config by default

2013-01-26 Thread Andreas Mohr
Hi,

[CC'd extract-ikconfig creator]

> I've seen too many systems where the config to build the used kernel got
> lost and people were unable to diagnose problems or to rebuild a modified
> or updated kernel. It's a subject which worries me since several years.

I'm strongly in favour of such a change. The actual config is simply very
important for traceability.
In a recent case of mine I simply built the kernel on an external HDD
and did not copy the config file on deployment (e.g. to /boot).
With that kernel build tree then unavailable,
I then realized that this kernel fortunately did have IKCONFIG_PROC enabled,
thus configs.ko was available for (re-)use.

However an argument could be made that the default setting
should be the bare-minimum needed to provide this information source
in emergency cases (i.e., a manual run of scripts/extract-ikconfig would
be in order and easily acceptable).

$ ls -l kernel/configs.ko 
-rw-r--r-- 1 root root 35788 Jan 25 18:43 kernel/configs.ko

IMHO more than 32kB for the proc/config.gz module arguably is
quite a bit of code for it to painlessly be enabled by default
given availability of other methods of retrieval.

However, now that I think of it ISTR that using extract-ikconfig
on my bzImage did NOT work yet loading configs.ko of the same install
successfully provided a /proc/config.gz. Huh??
If this is the case, then one or both of these things ought to be done:
- make extract-ikconfig not fail in such a case
- [failing the prior one] enable /proc/config.gz by default, too

Aww wait: I was mistaken in thinking that the config data is statically
included in the kernel and configs.ko then is about providing /proc
only. This not being the case (config data itself is provided by *module*,
too) explains both my extract-ikconfig failure on the static kernel
image and the size of configs.ko, so additionally enabling
/proc/config.gz in this module most likely is a non-issue.

However, this means that extract-ikconfig is missing a user error message
indicating that the reason for lookup failure perhaps is that
the setting is module-based and thus cannot be found in image
(extract-ikconfig's header comments don't fully document this fact either).
Will be creating a commit for this eventually (or do you want to add
such thing to your patchset now? ;).

Thanks for your effort,

Andreas Mohr
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] spi: Add helper functions for setting up transfers

2013-01-26 Thread Mark Brown
On Wed, Jan 09, 2013 at 06:31:09PM +0100, Lars-Peter Clausen wrote:

> The second function spi_sync_transfer() takes a SPI device and an array of
> spi_transfers. It will allocate a new spi_message (on the stack) and add all
> transfers in the array to the message. Finally it will call spi_sync() on the
> message.

Reviewed-by: Mark Brown 

for the helpers, we should try to get them merged separately to the
coccinelle rules if there's issue with those.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mc13892: sanity check num_regulators parsed vs. registered

2013-01-26 Thread Mark Brown
On Mon, Jan 21, 2013 at 12:25:45PM -0600, Matt Sealey wrote:
> Imagine a situation where a device tree has a few regulators in an
> appropriate node:

Applied, thanks.  Always use subject lines appropriate for the
subsystem.


signature.asc
Description: Digital signature


Re: [PATCH] mc13892-regulator: correct/refine handling of the SWxHI bit

2013-01-26 Thread Mark Brown
On Mon, Jan 21, 2013 at 11:38:40AM -0600, Matt Sealey wrote:
> MC13892 PMIC supports a "HI" bit for 3 of it's 4 buck switcher outputs,
> which enables a higher set of voltage ranges.

Applied, thanks.


signature.asc
Description: Digital signature


Re: [PATCH 4/6] davinci: regulator: tps6507x: add device tree support.

2013-01-26 Thread Mark Brown
On Thu, Jan 24, 2013 at 04:25:18PM +0530, Vishwanathrao Badarkhe, Manish wrote:
> Add device tree based initialization support for
> TI's tps6507x regulators.

Applied, thanks.  Please always use subject lines appropriate for the
subsystem you are submitting against - this is not a DaVinci change.


signature.asc
Description: Digital signature


Re: [PATCH 1/11] ksm: allow trees per NUMA node

2013-01-26 Thread Simon Jeons
On Sat, 2013-01-26 at 18:54 -0800, Hugh Dickins wrote:
> On Sat, 26 Jan 2013, Simon Jeons wrote:
> > On Fri, 2013-01-25 at 17:54 -0800, Hugh Dickins wrote:
> > > From: Petr Holasek 
> > > @@ -1122,6 +1166,18 @@ struct rmap_item *unstable_tree_search_i
> > >   return NULL;
> > >   }
> > >  
> > > + /*
> > > +  * If tree_page has been migrated to another NUMA node, it
> > > +  * will be flushed out and put into the right unstable tree
> > 
> > Then why not insert the new page to unstable tree during page migration
> > against current upstream? Because default behavior is merge across
> > nodes.
> 
> I don't understand the words "against current upstream" in your question.

I mean current upstream codes without numa awareness. :)

> 
> We cannot move a page (strictly, a node) from one tree to another during
> page migration itself, because the necessary ksm_thread_mutex is not held.
> Not would we even want to while "merge across nodes".
> 
> Ah, perhaps you are pointing out that in current upstream, the only user
> of ksm page migration is memory hotremove, which in current upstream does
> hold ksm_thread_mutex.
> 
> So you'd like us to add code for moving a node from one tree to another
> in ksm_migrate_page() (and what would it do when it collides with an

Without numa awareness, I still can't understand your explanation why
can't insert the node to the tree just after page migration instead of
inserting it at the next scan.

> existing node?), code which will then be removed a few patches later
> when ksm page migration is fully enabled?
> 
> No, I'm not going to put any more thought into that.  When Andrea pointed
> out the problem with Petr's original change to ksm_migrate_page(), I did
> indeed think that we could do something cleverer at that point; but once
> I got down to trying it, found that a dead end.  I wasn't going to be
> able to test the hotremove case properly anyway, so no good pursuing
> solutions that couldn't be generalized.
> 
> Hugh


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] regulator: lp8755: Use LP8755_BUCK_MAX instead of magic number

2013-01-26 Thread Mark Brown
On Sat, Jan 26, 2013 at 01:19:47PM +0800, Axel Lin wrote:
> Signed-off-by: Axel Lin 

Applied, thanks.


signature.asc
Description: Digital signature


Re: [PATCH 1/3] regulator: max8907: Fix using wrong dev argument for calling of_regulator_match

2013-01-26 Thread Mark Brown
On Fri, Jan 25, 2013 at 10:20:29AM +0800, Axel Lin wrote:
> The dev parameter is the device requesting the data.
> In this case it should be >dev rather than pdev->dev.parent.

Applied all, thanks.


signature.asc
Description: Digital signature


Re: [PATCH 1/11] ksm: allow trees per NUMA node

2013-01-26 Thread Hugh Dickins
On Sat, 26 Jan 2013, Simon Jeons wrote:
> On Fri, 2013-01-25 at 17:54 -0800, Hugh Dickins wrote:
> > From: Petr Holasek 
> > @@ -1122,6 +1166,18 @@ struct rmap_item *unstable_tree_search_i
> > return NULL;
> > }
> >  
> > +   /*
> > +* If tree_page has been migrated to another NUMA node, it
> > +* will be flushed out and put into the right unstable tree
> 
> Then why not insert the new page to unstable tree during page migration
> against current upstream? Because default behavior is merge across
> nodes.

I don't understand the words "against current upstream" in your question.

We cannot move a page (strictly, a node) from one tree to another during
page migration itself, because the necessary ksm_thread_mutex is not held.
Not would we even want to while "merge across nodes".

Ah, perhaps you are pointing out that in current upstream, the only user
of ksm page migration is memory hotremove, which in current upstream does
hold ksm_thread_mutex.

So you'd like us to add code for moving a node from one tree to another
in ksm_migrate_page() (and what would it do when it collides with an
existing node?), code which will then be removed a few patches later
when ksm page migration is fully enabled?

No, I'm not going to put any more thought into that.  When Andrea pointed
out the problem with Petr's original change to ksm_migrate_page(), I did
indeed think that we could do something cleverer at that point; but once
I got down to trying it, found that a dead end.  I wasn't going to be
able to test the hotremove case properly anyway, so no good pursuing
solutions that couldn't be generalized.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 11/19] regmap: regmap: avoid spurious warning in regmap_read_debugfs

2013-01-26 Thread Mark Brown
On Sat, Jan 26, 2013 at 11:45:35AM +, Arnd Bergmann wrote:

> Gcc warns about the case where regmap_read_debugfs tries to walk an
> empty map->debugfs_off_cache list, which would results in uninitialized
> variable getting returned, if we hadn't checked the same condition
> just before that.

Applied, thanks.


signature.asc
Description: Digital signature


Re: [PATCH 5/11] ksm: get_ksm_page locked

2013-01-26 Thread Simon Jeons
On Fri, 2013-01-25 at 18:00 -0800, Hugh Dickins wrote:
> In some places where get_ksm_page() is used, we need the page to be locked.
> 
> When KSM migration is fully enabled, we shall want that to make sure that
> the page just acquired cannot be migrated beneath us (raised page count is
> only effective when there is serialization to make sure migration notices).
> Whereas when navigating through the stable tree, we certainly do not want
> to lock each node (raised page count is enough to guarantee the memcmps,
> even if page is migrated to another node).
> 
> Since we're about to add another use case, add the locked argument to
> get_ksm_page() now.
> 
> Hmm, what's that rcu_read_lock() about?  Complete misunderstanding, I
> really got the wrong end of the stick on that!  There's a configuration
> in which page_cache_get_speculative() can do something cheaper than
> get_page_unless_zero(), relying on its caller's rcu_read_lock() to have
> disabled preemption for it.  There's no need for rcu_read_lock() around
> get_page_unless_zero() (and mapping checks) here.  Cut out that
> silliness before making this any harder to understand.

BTW, what's the meaning of ksm page forked? 

> 
> Signed-off-by: Hugh Dickins 
> ---
>  mm/ksm.c |   23 +--
>  1 file changed, 13 insertions(+), 10 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:36:53.244205966 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:36:58.856206099 -0800
> @@ -514,15 +514,14 @@ static void remove_node_from_stable_tree
>   * but this is different - made simpler by ksm_thread_mutex being held, but
>   * interesting for assuming that no other use of the struct page could ever
>   * put our expected_mapping into page->mapping (or a field of the union which
> - * coincides with page->mapping).  The RCU calls are not for KSM at all, but
> - * to keep the page_count protocol described with page_cache_get_speculative.
> + * coincides with page->mapping).
>   *
>   * Note: it is possible that get_ksm_page() will return NULL one moment,
>   * then page the next, if the page is in between page_freeze_refs() and
>   * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
>   * is on its way to being freed; but it is an anomaly to bear in mind.
>   */
> -static struct page *get_ksm_page(struct stable_node *stable_node)
> +static struct page *get_ksm_page(struct stable_node *stable_node, bool 
> locked)
>  {
>   struct page *page;
>   void *expected_mapping;
> @@ -530,7 +529,6 @@ static struct page *get_ksm_page(struct
>   page = pfn_to_page(stable_node->kpfn);
>   expected_mapping = (void *)stable_node +
>   (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
> - rcu_read_lock();
>   if (page->mapping != expected_mapping)
>   goto stale;
>   if (!get_page_unless_zero(page))
> @@ -539,10 +537,16 @@ static struct page *get_ksm_page(struct
>   put_page(page);
>   goto stale;
>   }
> - rcu_read_unlock();
> + if (locked) {
> + lock_page(page);
> + if (page->mapping != expected_mapping) {
> + unlock_page(page);
> + put_page(page);
> + goto stale;
> + }
> + }
>   return page;
>  stale:
> - rcu_read_unlock();
>   remove_node_from_stable_tree(stable_node);
>   return NULL;
>  }
> @@ -558,11 +562,10 @@ static void remove_rmap_item_from_tree(s
>   struct page *page;
>  
>   stable_node = rmap_item->head;
> - page = get_ksm_page(stable_node);
> + page = get_ksm_page(stable_node, true);
>   if (!page)
>   goto out;
>  
> - lock_page(page);
>   hlist_del(_item->hlist);
>   unlock_page(page);
>   put_page(page);
> @@ -1042,7 +1045,7 @@ static struct page *stable_tree_search(s
>  
>   cond_resched();
>   stable_node = rb_entry(node, struct stable_node, node);
> - tree_page = get_ksm_page(stable_node);
> + tree_page = get_ksm_page(stable_node, false);
>   if (!tree_page)
>   return NULL;
>  
> @@ -1086,7 +1089,7 @@ static struct stable_node *stable_tree_i
>  
>   cond_resched();
>   stable_node = rb_entry(*new, struct stable_node, node);
> - tree_page = get_ksm_page(stable_node);
> + tree_page = get_ksm_page(stable_node, false);
>   if (!tree_page)
>   return NULL;
>  
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org;> em...@kvack.org 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: 3.8.0-rc4+ - Oops on removing WinTV-HVR-1400 expresscard TV Tuner

2013-01-26 Thread Yijing Wang
于 2013-01-27 4:54, Chris Clayton 写道:
> Hi Martin,
> 
> On 01/24/13 19:21, Martin Mokrejs wrote:
>> Hi Chris,
>>try to include in kernel only acpiphp and omit pciehp. Don't use modules 
>> but include
>> them statically. And try, in addition, check whether "pcie_aspm=off" in 
>> grub.conf helped.
>>
> 
> Thanks for the tip. I had the pciehp driver installed, but it was a module 
> and not loaded. I didn't have acpiphp enabled at all. Building them both in 
> statically, appears to have papered over the cracks of the oops :-)

Not loaded pciehp driver? Remove the device from this slot without poweroff ?

> 
>>The best would if you subscribe to linux-pci, and read my recent threads
>> about similar issues I had with express cards with Dell Vostro 3550. 
>> Further, there is
>> a lot of changes to PCI hotplug done by Yingahi Liu and Rafael Wysockij, 
>> just browse the
>> archives of linux-pci and see the pacthes and the discussion.
> 
> Those discussions are way above my level of knowledge. I guess all this work 
> will be merged into mainline in due course, so I'll watch for them in 3.9 or 
> later. Unless, of course, there is a tree I could clone and help test the 
> changes with my laptop and expresscard.
> 
> Hotplug isn't working at all on my Fujitsu laptop, so I can only get the card 
> recognised by rebooting with the card inserted (or by writing 1 
> to/sys/bus/pci/rescan). There seem to be a few reports on this in the kernel 
> bugzilla, so I'll look through them and see what's being done.

Hi Chris,
   What about use #modprobe pciehp pciehp_debug=1 pciehp_poll_mode=1 
pciehp_poll_time=1 ?

Can you resend the dmesg log and "lspci -vvv" info after hotplug device from 
your Fujitsu laptop with above module parameters?

Thanks!
Yijing.

> Thanks again.
> 
> Chris
> 
>> Martin
>>
>> Chris Clayton wrote:
>>> Hi,
>>>
>>> I've today taken delivery of a WinTV-HVR-1400 expresscard TV Tuner and got 
>>> an Oops when I removed from the expresscard slot in my laptop. I will quite 
>>> understand if the response to this report is "don't do that!", but in that 
>>> case, how should one remove one of these cards?
>>>
>>> I have attached three files:
>>>
>>> 1. the dmesg output from when I rebooted the machine after the oops. I have 
>>> turned debugging on in the dib700p and cx23885 modules via modules options 
>>> in /etc/modprobe.d/hvr1400.conf;
>>>
>>> 2. the .config file for the kernel that oopsed.
>>>
>>> 3. the text of the oops message. I've typed this up from a photograph of 
>>> the screen because the laptop was locked up and there was nothing in the 
>>> log files. Apologies for any typos, but I have tried to be careful.
>>>
>>> Assuming the answer isn't don't do that, let me know if I can provide any 
>>> additional diagnostics, test any patches, etc. Please, however, cc me as 
>>> I'm not subscribed.
>>>
>>> Chris
> -- 
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch v4 0/18] sched: simplified fork, release load avg and power awareness scheduling

2013-01-26 Thread Alex Shi
On 01/24/2013 11:07 PM, Alex Shi wrote:
> On 01/24/2013 05:44 PM, Borislav Petkov wrote:
>> On Thu, Jan 24, 2013 at 11:06:42AM +0800, Alex Shi wrote:
>>> Since the runnable info needs 345ms to accumulate, balancing
>>> doesn't do well for many tasks burst waking. After talking with Mike
>>> Galbraith, we are agree to just use runnable avg in power friendly 
>>> scheduling and keep current instant load in performance scheduling for 
>>> low latency.
>>>
>>> So the biggest change in this version is removing runnable load avg in
>>> balance and just using runnable data in power balance.
>>>
>>> The patchset bases on Linus' tree, includes 3 parts,
>>> ** 1, bug fix and fork/wake balancing clean up. patch 1~5,
>>> --
>>> the first patch remove one domain level. patch 2~5 simplified fork/wake
>>> balancing, it can increase 10+% hackbench performance on our 4 sockets
>>> SNB EP machine.
>>
>> Ok, I see some benchmarking results here and there in the commit
>> messages but since this is touching the scheduler, you probably would
>> need to make sure it doesn't introduce performance regressions vs
>> mainline with a comprehensive set of benchmarks.
>>
> 
> Thanks a lot for your comments, Borislav! :)
> 
> For this patchset, the code will just check current policy, if it is
> performance, the code patch will back to original performance code at
> once. So there should no performance change on performance policy.
> 
> I once tested the balance policy performance with benchmark
> kbuild/hackbench/aim9/dbench/tbench on version 2, only hackbench has a
> bit drop ~3%. others have no clear change.
> 
>> And, AFAICR, mainline does by default the 'performance' scheme by
>> spreading out tasks to idle cores, so have you tried comparing vanilla
>> mainline to your patchset in the 'performance' setting so that you can
>> make sure there are no problems there? And not only hackbench or a
>> microbenchmark but aim9 (I saw that in a commit message somewhere) and
>> whatever else multithreaded benchmark you can get your hands on.
>>
>> Also, you might want to run it on other machines too, not only SNB :-)
> 
> Anyway I will redo the performance testing on this version again on all
> machine. but doesn't expect something change. :)

Just rerun some benchmarks: kbuild, specjbb2005, oltp, tbench, aim9,
hackbench, fileio-cfq of sysbench, dbench, aiostress, multhreads
loopback netperf. on my core2, nhm, wsm, snb, platforms. no clear
performance change found.

I also tested balance policy/powersaving policy with above benchmark,
found, the specjbb2005 drop much 30~50% on both of policy whenever with
openjdk or jrockit. and hackbench drops a lots with powersaving policy
on snb 4 sockets platforms. others has no clear change.

> 
>> And what about ARM, maybe someone there can run your patchset too?
>>
>> So, it would be cool to see comprehensive results from all those runs
>> and see what the numbers say.
>>
>> Thanks.
>>
> 
> 


-- 
Thanks
Alex
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/11] ksm: get_ksm_page locked

2013-01-26 Thread Simon Jeons
Hi Hugh, 
On Fri, 2013-01-25 at 18:00 -0800, Hugh Dickins wrote:
> In some places where get_ksm_page() is used, we need the page to be locked.
> 

In function get_ksm_page, why check page->mapping =>
get_page_unless_zero => check page->mapping instead of
get_page_unless_zero => check page->mapping, because
get_page_unless_zero is expensive?

> When KSM migration is fully enabled, we shall want that to make sure that
> the page just acquired cannot be migrated beneath us (raised page count is
> only effective when there is serialization to make sure migration notices).
> Whereas when navigating through the stable tree, we certainly do not want

What's the meaning of "navigating through the stable tree"?

> to lock each node (raised page count is enough to guarantee the memcmps,
> even if page is migrated to another node).
> 
> Since we're about to add another use case, add the locked argument to
> get_ksm_page() now.

Why the parameter lock passed from stable_tree_search/insert is true,
but remove_rmap_item_from_tree is false?

> 
> Hmm, what's that rcu_read_lock() about?  Complete misunderstanding, I
> really got the wrong end of the stick on that!  There's a configuration
> in which page_cache_get_speculative() can do something cheaper than
> get_page_unless_zero(), relying on its caller's rcu_read_lock() to have
> disabled preemption for it.  There's no need for rcu_read_lock() around
> get_page_unless_zero() (and mapping checks) here.  Cut out that
> silliness before making this any harder to understand.
> 
> Signed-off-by: Hugh Dickins 
> ---
>  mm/ksm.c |   23 +--
>  1 file changed, 13 insertions(+), 10 deletions(-)
> 
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:36:53.244205966 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:36:58.856206099 -0800
> @@ -514,15 +514,14 @@ static void remove_node_from_stable_tree
>   * but this is different - made simpler by ksm_thread_mutex being held, but
>   * interesting for assuming that no other use of the struct page could ever
>   * put our expected_mapping into page->mapping (or a field of the union which
> - * coincides with page->mapping).  The RCU calls are not for KSM at all, but
> - * to keep the page_count protocol described with page_cache_get_speculative.
> + * coincides with page->mapping).
>   *
>   * Note: it is possible that get_ksm_page() will return NULL one moment,
>   * then page the next, if the page is in between page_freeze_refs() and
>   * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
>   * is on its way to being freed; but it is an anomaly to bear in mind.
>   */
> -static struct page *get_ksm_page(struct stable_node *stable_node)
> +static struct page *get_ksm_page(struct stable_node *stable_node, bool 
> locked)
>  {
>   struct page *page;
>   void *expected_mapping;
> @@ -530,7 +529,6 @@ static struct page *get_ksm_page(struct
>   page = pfn_to_page(stable_node->kpfn);
>   expected_mapping = (void *)stable_node +
>   (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
> - rcu_read_lock();
>   if (page->mapping != expected_mapping)
>   goto stale;
>   if (!get_page_unless_zero(page))
> @@ -539,10 +537,16 @@ static struct page *get_ksm_page(struct
>   put_page(page);
>   goto stale;
>   }
> - rcu_read_unlock();
> + if (locked) {
> + lock_page(page);
> + if (page->mapping != expected_mapping) {
> + unlock_page(page);
> + put_page(page);
> + goto stale;
> + }
> + }
>   return page;
>  stale:
> - rcu_read_unlock();
>   remove_node_from_stable_tree(stable_node);
>   return NULL;
>  }
> @@ -558,11 +562,10 @@ static void remove_rmap_item_from_tree(s
>   struct page *page;
>  
>   stable_node = rmap_item->head;
> - page = get_ksm_page(stable_node);
> + page = get_ksm_page(stable_node, true);
>   if (!page)
>   goto out;
>  
> - lock_page(page);
>   hlist_del(_item->hlist);
>   unlock_page(page);
>   put_page(page);
> @@ -1042,7 +1045,7 @@ static struct page *stable_tree_search(s
>  
>   cond_resched();
>   stable_node = rb_entry(node, struct stable_node, node);
> - tree_page = get_ksm_page(stable_node);
> + tree_page = get_ksm_page(stable_node, false);
>   if (!tree_page)
>   return NULL;
>  
> @@ -1086,7 +1089,7 @@ static struct stable_node *stable_tree_i
>  
>   cond_resched();
>   stable_node = rb_entry(*new, struct stable_node, node);
> - tree_page = get_ksm_page(stable_node);
> + tree_page = get_ksm_page(stable_node, false);
>   if (!tree_page)
>   return NULL;
>  
> 
> --
> To unsubscribe, send a message with 

Re: boot warnings due to swap: make each swap partition have one address_space

2013-01-26 Thread Hugh Dickins
On Fri, 25 Jan 2013, Shaohua Li wrote:
> On Thu, Jan 24, 2013 at 10:45:57PM -0500, Sasha Levin wrote:
> > Hi folks,
> > 
> > Commit "swap: make each swap partition have one address_space" is triggering
> > a series of warnings on boot:
> > 
> > [3.446071] [ cut here ]
> > [3.446664] WARNING: at lib/debugobjects.c:261 
> > debug_print_object+0x8e/0xb0()
> > [3.447715] ODEBUG: init active (active state 0) object type: 
> > percpu_counter hint:   (null)
> > [3.450360] Modules linked in:
> > [3.451593] Pid: 1, comm: swapper/0 Tainted: GW
> > 3.8.0-rc4-next-20130124-sasha-4-g838a1b4 #266
> > [3.454508] Call Trace:
> > [3.455248]  [] warn_slowpath_common+0x8c/0xc0
> > [3.455248]  [] warn_slowpath_fmt+0x41/0x50
> > [3.455248]  [] debug_print_object+0x8e/0xb0
> > [3.455248]  [] __debug_object_init+0x20b/0x290
> > [3.455248]  [] debug_object_init+0x15/0x20
> > [3.455248]  [] __percpu_counter_init+0x6d/0xe0
> > [3.455248]  [] bdi_init+0x1ac/0x270
> > [3.455248]  [] swap_setup+0x3b/0x87
> > [3.455248]  [] ? swap_setup+0x87/0x87
> > [3.455248]  [] kswapd_init+0x11/0x7c
> > [3.455248]  [] do_one_initcall+0x8a/0x180
> > [3.455248]  [] do_basic_setup+0x96/0xb4
> > [3.455248]  [] ? loglevel+0x31/0x31
> > [3.455248]  [] ? sched_init_smp+0x150/0x157
> > [3.455248]  [] kernel_init_freeable+0xd2/0x14c
> > [3.455248]  [] ? rest_init+0x140/0x140
> > [3.455248]  [] kernel_init+0x9/0xf0
> > [3.455248]  [] ret_from_fork+0x7c/0xb0
> > [3.455248]  [] ? rest_init+0x140/0x140
> > [3.455248] ---[ end trace 0b176d5c0f21bffb ]---
> > 
> > I haven't looked deeper into it yet, and will do so tomorrow, unless this
> > spew is obvious to anyone.
> 
> Does this one help?
> 
> Subject: give-each-swapper-space-separate-backing_dev_info
> 
> The backing_dev_info can't be shared by all swapper address space.

Whyever not?  It's perfectly normal for different inodes/address_spaces
to share a single backing_dev!  Sasha's trace says that it's wrong to
initialize it MAX_SWAPFILES times: fair enough.  But why should I now
want to spend 32kB (not even counting their __percpu counters) on all
these pseudo-backing_devs?

Hugh

p.s. a grand little change would be to move page_cluster and swap_setup()
from mm/swap.c to mm/swap_state.c: they have nothing to do with the other
contents of swap.c, and everything to do with the contents of swap_state.c.
Why swap.c is called swap.c is rather a mystery.

> 
> Reported-by: Sasha Levin 
> Signed-off-by: Shaohua Li 
> ---
>  mm/swap.c   |1 +
>  mm/swap_state.c |   11 +++
>  2 files changed, 8 insertions(+), 4 deletions(-)
> 
> Index: linux/mm/swap.c
> ===
> --- linux.orig/mm/swap.c  2013-01-22 10:11:58.310933234 +0800
> +++ linux/mm/swap.c   2013-01-25 12:14:49.524863610 +0800
> @@ -859,6 +859,7 @@ void __init swap_setup(void)
>   int i;
>  
>   for (i = 0; i < MAX_SWAPFILES; i++) {
> + swapper_spaces[i].backing_dev_info += i;
>   bdi_init(swapper_spaces[i].backing_dev_info);
>   spin_lock_init(_spaces[i].tree_lock);
>   INIT_LIST_HEAD(_spaces[i].i_mmap_nonlinear);
> Index: linux/mm/swap_state.c
> ===
> --- linux.orig/mm/swap_state.c2013-01-24 18:08:05.149390977 +0800
> +++ linux/mm/swap_state.c 2013-01-25 12:14:12.849323671 +0800
> @@ -31,16 +31,19 @@ static const struct address_space_operat
>   .migratepage= migrate_page,
>  };
>  
> -static struct backing_dev_info swap_backing_dev_info = {
> - .name   = "swap",
> - .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
> +static struct backing_dev_info swap_backing_dev_info[MAX_SWAPFILES] = {
> + [0 ... MAX_SWAPFILES - 1] = {
> + .name   = "swap",
> + .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK |
> + BDI_CAP_SWAP_BACKED,
> + }
>  };
>  
>  struct address_space swapper_spaces[MAX_SWAPFILES] = {
>   [0 ... MAX_SWAPFILES - 1] = {
>   .page_tree  = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
>   .a_ops  = _aops,
> - .backing_dev_info = _backing_dev_info,
> + .backing_dev_info = _backing_dev_info[0],
>   }
>  };
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ 00/46] 3.7.5-stable review

2013-01-26 Thread Satoru Takeuchi
At Thu, 24 Jan 2013 13:12:38 -0800,
Greg Kroah-Hartman wrote:
> 
> This is the start of the stable review cycle for the 3.7.5 release.
> There are 46 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jan 26 21:09:21 UTC 2013.
> Anything received after that time might be too late.

It's too late... anyway, this kernel can be built and boot without
any problem. Building a kernel with this kernel also works fine.

 - Build Machine: debian wheezy x86_64
   CPU: Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz x 4
   memory: 8GB

 - Test machine: debian wheezy x86_64(KVM guest on the Build Machine)
   vCPU: x2
   memory: 2GB

I reviewed the following patches and it looks good to me.

> -
> Pseudo-Shortlog of commits:
> 
> Greg Kroah-Hartman 
> Linux 3.7.5-rc1
...
> Konrad Rzeszutek Wilk 
> intel_idle: Don't register CPU notifier if we are not running.
...
> Konrad Rzeszutek Wilk 
> ACPI / cpuidle: Fix NULL pointer issues when cpuidle is disabled
...
> Linus Torvalds 
> module: fix missing module_mutex unlock
...
> Oleg Nesterov 
> wake_up_process() should be never used to wakeup a TASK_STOPPED/TRACED 
> task
...
> Steven Rostedt 
> ftrace: Be first to run code modification on modules

Thanks,
Satoru
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [ 00/22] 3.4.28-stable review

2013-01-26 Thread Satoru Takeuchi
At Thu, 24 Jan 2013 13:15:21 -0800,
Greg Kroah-Hartman wrote:
> 
> This is the start of the stable review cycle for the 3.4.28 release.
> There are 22 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jan 26 21:13:38 UTC 2013.
> Anything received after that time might be too late.

It' too late... anyway, this kernel can be built and boot without
any problem. Building a kernel with this kernel also works fine.

 - Build Machine: debian wheezy x86_64
   CPU: Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz x 4
   memory: 8GB

 - Test machine: debian wheezy x86_64(KVM guest on the Build Machine)
   vCPU: x2
   memory: 2GB

I reviewed the following patches and it looks good to me.

> -
> Pseudo-Shortlog of commits:
> 
> Greg Kroah-Hartman 
> Linux 3.4.28-rc1
...
> Konrad Rzeszutek Wilk 
> ACPI / cpuidle: Fix NULL pointer issues when cpuidle is disabled
...
> Oleg Nesterov 
> wake_up_process() should be never used to wakeup a TASK_STOPPED/TRACED 
> task
...
> Steven Rostedt 
> ftrace: Be first to run code modification on modules

Thanks,
Satoru
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] mm: clean up soft_offline_page()

2013-01-26 Thread Andi Kleen
On Sat, Jan 26, 2013 at 12:02:11AM -0500, Naoya Horiguchi wrote:
> Currently soft_offline_page() is hard to maintain because it has many
> return points and goto statements. All of this mess come from get_any_page().
> This function should only get page refcount as the name implies, but it does
> some page isolating actions like SetPageHWPoison() and dequeuing hugepage.
> This patch corrects it and introduces some internal subroutines to make
> soft offlining code more readable and maintainable.
> 
> ChangeLog v2:
>   - receive returned value from __soft_offline_page and soft_offline_huge_page
>   - place __soft_offline_page after soft_offline_page to reduce the diff
>   - rebased onto mmotm-2013-01-23-17-04
>   - add comment on double checks of PageHWpoison

Ok for me if it passes mce-test

Reviewed-by: Andi Kleen 

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -v2 2/2] x86: Make Linux guest support optional

2013-01-26 Thread H. Peter Anvin

On 01/25/2013 10:43 AM, Borislav Petkov wrote:

On Fri, Jan 25, 2013 at 07:35:07PM +0100, Borislav Petkov wrote:

Ok, if you prefer. We can definitely make all .o's which depend on
x86_hyper also depend on CONFIG_HYPERVISOR_GUEST - this solves the
whole deal trivially.

I'll respin the patches.


... provided, of course, nothing out-of-tree is using x86_hyper. Can we
assume that?



Out-of-tree code is responsible for keeping up, not the other way around.

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/11] ksm: allow trees per NUMA node

2013-01-26 Thread Simon Jeons
Hi Hugh,
On Fri, 2013-01-25 at 17:54 -0800, Hugh Dickins wrote:
> From: Petr Holasek 
> 
> Introduces new sysfs boolean knob /sys/kernel/mm/ksm/merge_across_nodes
> which control merging pages across different numa nodes.
> When it is set to zero only pages from the same node are merged,
> otherwise pages from all nodes can be merged together (default behavior).
> 
> Typical use-case could be a lot of KVM guests on NUMA machine
> and cpus from more distant nodes would have significant increase
> of access latency to the merged ksm page. Sysfs knob was choosen
> for higher variability when some users still prefers higher amount
> of saved physical memory regardless of access latency.
> 
> Every numa node has its own stable & unstable trees because of faster
> searching and inserting. Changing of merge_across_nodes value is possible
> only when there are not any ksm shared pages in system.
> 
> I've tested this patch on numa machines with 2, 4 and 8 nodes and
> measured speed of memory access inside of KVM guests with memory pinned
> to one of nodes with this benchmark:
> 
> http://pholasek.fedorapeople.org/alloc_pg.c
> 
> Population standard deviations of access times in percentage of average
> were following:
> 
> merge_across_nodes=1
> 2 nodes 1.4%
> 4 nodes 1.6%
> 8 nodes   1.7%
> 
> merge_across_nodes=0
> 2 nodes   1%
> 4 nodes   0.32%
> 8 nodes   0.018%
> 
> RFC: https://lkml.org/lkml/2011/11/30/91
> v1: https://lkml.org/lkml/2012/1/23/46
> v2: https://lkml.org/lkml/2012/6/29/105
> v3: https://lkml.org/lkml/2012/9/14/550
> v4: https://lkml.org/lkml/2012/9/23/137
> v5: https://lkml.org/lkml/2012/12/10/540
> v6: https://lkml.org/lkml/2012/12/23/154
> v7: https://lkml.org/lkml/2012/12/27/225
> 
> Hugh notes that this patch brings two problems, whose solution needs
> further support in mm/ksm.c, which follows in subsequent patches:
> 1) switching merge_across_nodes after running KSM is liable to oops
>on stale nodes still left over from the previous stable tree;
> 2) memory hotremove may migrate KSM pages, but there is no provision
>here for !merge_across_nodes to migrate nodes to the proper tree.
> 
> Signed-off-by: Petr Holasek 
> Signed-off-by: Hugh Dickins 
> Acked-by: Rik van Riel 
> ---
>  Documentation/vm/ksm.txt |7 +
>  mm/ksm.c |  151 -
>  2 files changed, 139 insertions(+), 19 deletions(-)
> 
> --- mmotm.orig/Documentation/vm/ksm.txt   2013-01-25 14:36:31.724205455 
> -0800
> +++ mmotm/Documentation/vm/ksm.txt2013-01-25 14:36:38.608205618 -0800
> @@ -58,6 +58,13 @@ sleep_millisecs  - how many milliseconds
> e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
> Default: 20 (chosen for demonstration purposes)
>  
> +merge_across_nodes - specifies if pages from different numa nodes can be 
> merged.
> +   When set to 0, ksm merges only pages which physically
> +   reside in the memory area of same NUMA node. It brings
> +   lower latency to access to shared page. Value can be
> +   changed only when there is no ksm shared pages in system.
> +   Default: 1
> +
>  run  - set 0 to stop ksmd from running but keep merged pages,
> set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
> set 2 to stop ksmd and unmerge all pages currently merged,
> --- mmotm.orig/mm/ksm.c   2013-01-25 14:36:31.724205455 -0800
> +++ mmotm/mm/ksm.c2013-01-25 14:36:38.608205618 -0800
> @@ -36,6 +36,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include "internal.h"
> @@ -139,6 +140,9 @@ struct rmap_item {
>   struct mm_struct *mm;
>   unsigned long address;  /* + low bits used for flags below */
>   unsigned int oldchecksum;   /* when unstable */
> +#ifdef CONFIG_NUMA
> + unsigned int nid;
> +#endif
>   union {
>   struct rb_node node;/* when node of unstable tree */
>   struct {/* when listed from stable tree */
> @@ -153,8 +157,8 @@ struct rmap_item {
>  #define STABLE_FLAG  0x200   /* is listed from the stable tree */
>  
>  /* The stable and unstable tree heads */
> -static struct rb_root root_stable_tree = RB_ROOT;
> -static struct rb_root root_unstable_tree = RB_ROOT;
> +static struct rb_root root_unstable_tree[MAX_NUMNODES];
> +static struct rb_root root_stable_tree[MAX_NUMNODES];
>  
>  #define MM_SLOTS_HASH_BITS 10
>  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
> @@ -188,6 +192,9 @@ static unsigned int ksm_thread_pages_to_
>  /* Milliseconds ksmd should sleep between batches */
>  static unsigned int ksm_thread_sleep_millisecs = 20;
>  
> +/* Zeroed when merging across nodes is not allowed */
> +static unsigned int ksm_merge_across_nodes = 1;
> +
>  #define KSM_RUN_STOP 0
>  #define KSM_RUN_MERGE1
>  #define 

[PATCH tip/core/rcu 01/12] rcu: Remove restrictions on no-CBs CPUs

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore
at least one no-CBs CPU must remain online at any given time.  These
restrictions are problematic in some situations, such as cases where
all CPUs must run a real-time workload that needs to be insulated from
OS jitter and latencies due to RCU callback invocation.  This commit
therefore provides no-CBs CPUs a way to start and to wait for grace
periods independently of the normal RCU callback mechanisms.  This
approach allows any or all of the CPUs to be designated as no-CBs CPUs,
and allows any proper subset of the CPUs (whether no-CBs CPUs or not)
to be offlined.

This commit also provides event tracing, as well as a fix for a locking
bug spotted by Xie ChanglongX .

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 include/trace/events/rcu.h |   55 +
 init/Kconfig   |4 +-
 kernel/rcutree.c   |   18 ++--
 kernel/rcutree.h   |   20 ++--
 kernel/rcutree_plugin.h|  276 +++-
 5 files changed, 250 insertions(+), 123 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 1918e83..cdfed6d 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -72,6 +72,58 @@ TRACE_EVENT(rcu_grace_period,
 );
 
 /*
+ * Tracepoint for no-callbacks grace-period events.  The caller should
+ * pull the data from the rcu_node structure, other than rcuname, which
+ * comes from the rcu_state structure, and event, which is one of the
+ * following:
+ *
+ * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startedleaf": Leaf-node start proved sufficient.
+ * "Startedleafroot": Leaf-node start proved sufficient after checking root.
+ * "Startedroot": Requested a nocb grace period based on root-node data.
+ * "StartWait": Start waiting for the requested grace period.
+ * "ResumeWait": Resume waiting after signal.
+ * "EndWait": Complete wait.
+ * "Cleanup": Clean up rcu_node structure after previous GP.
+ * "CleanupMore": Clean up, and another no-CB GP is needed.
+ */
+TRACE_EVENT(rcu_nocb_grace_period,
+
+   TP_PROTO(char *rcuname, unsigned long gpnum, unsigned long completed,
+unsigned long c, u8 level, int grplo, int grphi,
+char *gpevent),
+
+   TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent),
+
+   TP_STRUCT__entry(
+   __field(char *, rcuname)
+   __field(unsigned long, gpnum)
+   __field(unsigned long, completed)
+   __field(unsigned long, c)
+   __field(u8, level)
+   __field(int, grplo)
+   __field(int, grphi)
+   __field(char *, gpevent)
+   ),
+
+   TP_fast_assign(
+   __entry->rcuname = rcuname;
+   __entry->gpnum = gpnum;
+   __entry->completed = completed;
+   __entry->c = c;
+   __entry->level = level;
+   __entry->grplo = grplo;
+   __entry->grphi = grphi;
+   __entry->gpevent = gpevent;
+   ),
+
+   TP_printk("%s %lu %lu %lu %u %d %d %s",
+ __entry->rcuname, __entry->gpnum, __entry->completed,
+ __entry->c, __entry->level, __entry->grplo, __entry->grphi,
+ __entry->gpevent)
+);
+
+/*
  * Tracepoint for grace-period-initialization events.  These are
  * distinguished by the type of RCU, the new grace-period number, the
  * rcu_node structure level, the starting and ending CPU covered by the
@@ -601,6 +653,9 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
qsmask) do { } while (0)
+#define trace_rcu_nocb_grace_period(rcuname, gpnum, completed, c, \
+   level, grplo, grphi, event) \
+   do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
diff --git a/init/Kconfig b/init/Kconfig
index fb19b46..97fc178 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -665,7 +665,7 @@ config RCU_BOOST_DELAY
  Accept the default if unsure.
 
 config RCU_NOCB_CPU
-   bool "Offload RCU callback processing from boot-selected CPUs"
+   bool "Offload RCU callback processing from boot-selected CPUs 
(EXPERIMENTAL"
depends on TREE_RCU || TREE_PREEMPT_RCU
default n
help
@@ -683,7 +683,7 @@ config RCU_NOCB_CPU
  callback, and (2) affinity or cgroups can be used to force
  the kthreads to run on whatever set of CPUs is desired.
 
- Say Y here if you want reduced OS jitter on selected CPUs.

Re: [PATCH 09/19] mfd/twl4030: don't warn about uninitialized return code

2013-01-26 Thread Samuel Ortiz
Hi Arnd,

On Fri, Jan 25, 2013 at 10:44:08PM +, Arnd Bergmann wrote:
> If the twl4030_write_script function gets called with
> a zero length argument, its return value does not
> get set. We know that all scripts have a nonzero
> length, but returning an error in case they ever
> do is probably appropriate.
> 
> Without this patch, building omap2plus_defconfig results in:
> 
> drivers/mfd/twl4030-power.c: In function 'load_twl4030_script':
> drivers/mfd/twl4030-power.c:414:5: error: 'err' may be used uninitialized in 
> this function
> 
> Signed-off-by: Arnd Bergmann 
> Reviewed-by: Peter Ujfalusi 
> Reviewed-by: Amit Kucheria 
> Cc: Samuel Ortiz 
> Cc: Kevin Hilman 
> Cc: "Kristo, Tero" 
> ---
>  drivers/mfd/twl4030-power.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Applied to my for-linus branch, thanks.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 02/12] rcu: Provide compile-time control for no-CBs CPUs

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, the only way to specify no-CBs CPUs is via the rcu_nocbs
kernel command-line parameter.  This is inconvenient in some cases,
particularly for randconfig testing, so this commit adds a new
RCU_NOCB_CPU_DEFAULT kernel configuration parameter.  Setting this
new parameter to zero (the default) retains the old behavior, setting
it to one offloads callback processing from CPU 0 (along with any
other CPUs specified by the rcu_nocbs boot-time parameter), and setting
it to two offloads callback processing from all CPUs.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
---
 init/Kconfig|   35 +++
 kernel/rcutree_plugin.h |   14 ++
 2 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 97fc178..9a04156 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -686,6 +686,41 @@ config RCU_NOCB_CPU
  Say Y here if you want to help to debug reduced OS jitter.
  Say N here if you are unsure.
 
+choice
+   prompt "Build-forced no-CBs CPUs"
+   default RCU_NOCB_CPU_NONE
+
+config RCU_NOCB_CPU_NONE
+   bool "No build_forced no-CBs CPUs"
+   depends on RCU_NOCB_CPU
+   help
+ This option does not force any of the CPUs to be no-CBs CPUs.
+ Only CPUs designated by the rcu_nocbs= boot parameter will be
+ no-CBs CPUs.
+
+config RCU_NOCB_CPU_ZERO
+   bool "CPU 0 is a build_forced no-CBs CPU"
+   depends on RCU_NOCB_CPU
+   help
+ This option forces CPU 0 to be a no-CBs CPU.  Additional CPUs
+ may be designated as no-CBs CPUs using the rcu_nocbs= boot
+ parameter will be no-CBs CPUs.
+
+ Select this if CPU 0 needs to be a no-CBs CPU for real-time
+ or energy-efficiency reasons.
+
+config RCU_NOCB_CPU_ALL
+   bool "All CPUs are build_forced no-CBs CPUs"
+   depends on RCU_NOCB_CPU
+   help
+ This option forces all CPUs to be no-CBs CPUs.  The rcu_nocbs=
+ boot parameter will be ignored.
+
+ Select this if all CPUs need to be no-CBs CPUs for real-time
+ or energy-efficiency reasons.
+
+endchoice
+
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 2db3160..e32236e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -85,6 +85,20 @@ static void __init rcu_bootup_announce_oddness(void)
if (nr_cpu_ids != NR_CPUS)
printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to 
nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
 #ifdef CONFIG_RCU_NOCB_CPU
+#ifndef CONFIG_RCU_NOCB_CPU_NONE
+   if (!have_rcu_nocb_mask) {
+   alloc_bootmem_cpumask_var(_nocb_mask);
+   have_rcu_nocb_mask = true;
+   }
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+   pr_info("\tExperimental no-CBs CPU 0\n");
+   cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+   pr_info("\tExperimental no-CBs for all CPUs\n");
+   cpumask_setall(rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
if (have_rcu_nocb_mask) {
cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v1/1] mfd: i2c issue fix for da9052/53

2013-01-26 Thread Samuel Ortiz
Hi Ashish,

On Fri, Jan 25, 2013 at 02:03:49PM +0530, Ashish Jangam wrote:
> An issue has been reported where the PMIC either locks up or fails to
> respond following a system Reset. This could result in a second write
> in which the bus writes the current content of the write buffer to address
> of the last I2C access.
> 
> The failure case is where this unwanted write transfers incorrect data to
> a critical register.
> 
> This patch fixes this issue to by following any read or write with a dummy 
> read
> to a safe register address. A safe register address is one where the contents
> will not affect the operation of the system.
> 
> Signed-off-by: Ashish Jangam 
> ---
>  drivers/mfd/da9052-i2c.c  |   61 ++
>  include/linux/mfd/da9052/da9052.h |   66 ++--
>  include/linux/mfd/da9052/reg.h|3 ++
>  3 files changed, 126 insertions(+), 4 deletions(-)
Applied to my for-linus branch.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mfd: pcf50633: Init pcf->dev before using it

2013-01-26 Thread Samuel Ortiz
Hi Axel,

On Fri, Jan 25, 2013 at 11:08:00AM +0800, Axel Lin wrote:
> 2013/1/22 Samuel Ortiz 
> 
> > Hi Axel,
> >
> > On Tue, Dec 25, 2012 at 10:52:49AM +0800, Axel Lin wrote:
> > > Current code uses pcf->dev in the dev_err call before setting it to
> > > >dev. Fix it.
> > >
> > > Signed-off-by: Axel Lin 
> > > ---
> > >  drivers/mfd/pcf50633-core.c |5 ++---
> > >  1 file changed, 2 insertions(+), 3 deletions(-)
> > Applied to my for-linus branch, thanks.
> >
> 
> Hi Samuel,
> Seems the patches queued in your for-linus branch does not (yet) appear in
> linux-next?
for-linus goes to Linus and then they show up in linux-next.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 09/10] rcu: Remove obsolete Kconfig option from comment

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Signed-off-by: Paul E. McKenney 
---
 include/linux/rcupdate.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f..7e12dba 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -749,7 +749,7 @@ static inline void rcu_preempt_sleep_check(void)
  * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  * be preempted, but explicit blocking is illegal.  Finally, in preemptible
- * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU implementations in real-time (with -rt patchset) kernel builds,
  * RCU read-side critical sections may be preempted and they may also
  * block, but only when acquiring spinlocks that are subject to priority
  * inheritance.
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 0/10] v2 RCU fixes for 3.9

2013-01-26 Thread Paul E. McKenney
Hello!

The following fixes are intended for 3.9:

1.  Fix int/long type confusion in trace_rcu_start_batch().
2.  Declare rcu_is_cpu_rrupt_from_idle() static, courtesy of
Josh Triplett.
3.  Make rcu_eqs_enter_common() trace the new nesting value instead
of zero, courtesy of Li Zhong.
4.  Silence a gcc array-out-of-bounds false positive in rcu_init_one().
5.  Code style fix in rcu_torture_barrier_init(), courtesy of Sasha Levin.
6.  Grammar fix to rcu_scheduler_active comment, courtesy of Cody
Schafer.
7.  Consolidate RCU's debugging Kconfig options, courtesy of Dave Hansen.
8.  Remove unused context-tracker functions, courtesy of Li Zhong.
9.  Remove obsolete Kconfig option from header comment.
10. Add comments to new context-tracking functions and internals,
courtesy of Frederic Weisbecker.

Changes since v1:

o   Added #7-#10.

Thanx, Paul


 b/include/linux/rcupdate.h   |2 
 b/include/trace/events/rcu.h |6 +-
 b/kernel/context_tracking.c  |   75 
 b/kernel/rcutiny.c   |2 
 b/kernel/rcutorture.c|2 
 b/kernel/rcutree.c   |   13 ++--
 b/kernel/rcutree.h   |4 -
 b/lib/Kconfig.debug  |  114 ++-
 8 files changed, 137 insertions(+), 81 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/4] mfd: palma: add RTC and GPIO support

2013-01-26 Thread Samuel Ortiz
Hi Laxman,

On Thu, Jan 03, 2013 at 04:16:56PM +0530, Laxman Dewangan wrote:
> This series add the RTC and gpio driver for the TI Palma series PMIC.
> The changes are splitted so that easy to apply in different sub systems.
> 
> Laxman Dewangan (4):
>   mfd: palmas: add rtc irq number as irq resource for palmas-rtc
>   mfd: palmas: add apis to access the Palmas' registers
>   gpio: palmas: Add support for Palams GPIO
>   rtc: palmas: add RTC driver Palmas series PMIC
All 4 patches applied to my for-next branch, thanks.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 02/10] rcu: Make rcu_is_cpu_rrupt_from_idle helper functions static

2013-01-26 Thread Paul E. McKenney
From: Josh Triplett 

Both rcutiny and rcutree define a helper function named
rcu_is_cpu_rrupt_from_idle(), each used exactly once, later in the
same file.  This commit therefore declares these helper functions static.

Signed-off-by: Josh Triplett 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutiny.c |2 +-
 kernel/rcutree.c |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e7dce58..9f72a0f 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
  * interrupts don't count, we must be running at the first interrupt
  * level.
  */
-int rcu_is_cpu_rrupt_from_idle(void)
+static int rcu_is_cpu_rrupt_from_idle(void)
 {
return rcu_dynticks_nesting <= 1;
 }
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e441b77..cceda76 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -727,7 +727,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
  * interrupt from idle, return true.  The caller must have at least
  * disabled preemption.
  */
-int rcu_is_cpu_rrupt_from_idle(void)
+static int rcu_is_cpu_rrupt_from_idle(void)
 {
return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
 }
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/1] mfd: wm8994: Use devm_regulator_bulk_get API

2013-01-26 Thread Samuel Ortiz
Hi Sachin,

On Thu, Jan 24, 2013 at 09:13:20AM +0530, Sachin Kamat wrote:
> Hi Samuel,
> 
> On 8 January 2013 16:06, Mark Brown  
> wrote:
> > On Tue, Jan 08, 2013 at 02:01:22PM +0530, Sachin Kamat wrote:
> >> devm_regulator_bulk_get is device managed and saves some cleanup
> >> and exit code.
> >
> > Acked-by: Mark Brown 
> 
> Would you be picking this patch up?
I will, yes.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 2/4] rcu: Reduce rcutorture tracing

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

Currently, rcutorture traces every read-side access.  This can be
problematic because even a two-minute rcutorture run on a two-CPU system
can generate 28,853,363 reads.  Normally, only a failing read is of
interest, so this commit traces adjusts rcutorture's tracing to only
trace failing reads.  The resulting event tracing records the time
and the ->completed value captured at the beginning of the RCU read-side
critical section, allowing correlation with other event-tracing messages.

Signed-off-by: Paul E. McKenney 
Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
[ paulmck: Add fix to build problem located by Randy Dunlap based on
  diagnosis by Steven Rostedt. ]
---
 include/linux/rcupdate.h   |   13 ++---
 include/trace/events/rcu.h |   19 ++-
 kernel/rcupdate.c  |9 ++---
 kernel/rcutorture.c|   31 ---
 lib/Kconfig.debug  |1 +
 5 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f..7f89cea 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
 extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
+ struct rcu_head *rhp,
+ unsigned long secs,
+ unsigned long c_old,
+ unsigned long c);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long 
vernum)
 }
 #ifdef CONFIG_RCU_TRACE
 extern void do_trace_rcu_torture_read(char *rcutorturename,
- struct rcu_head *rhp);
+ struct rcu_head *rhp,
+ unsigned long secs,
+ unsigned long c_old,
+ unsigned long c);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+   do { } while (0)
 #endif
 #endif
 
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b..09af021 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -523,22 +523,30 @@ TRACE_EVENT(rcu_batch_end,
  */
 TRACE_EVENT(rcu_torture_read,
 
-   TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
+   TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
+unsigned long secs, unsigned long c_old, unsigned long c),
 
-   TP_ARGS(rcutorturename, rhp),
+   TP_ARGS(rcutorturename, rhp, secs, c_old, c),
 
TP_STRUCT__entry(
__field(char *, rcutorturename)
__field(struct rcu_head *, rhp)
+   __field(unsigned long, secs)
+   __field(unsigned long, c_old)
+   __field(unsigned long, c)
),
 
TP_fast_assign(
__entry->rcutorturename = rcutorturename;
__entry->rhp = rhp;
+   __entry->secs = secs;
+   __entry->c_old = c_old;
+   __entry->c = c;
),
 
-   TP_printk("%s torture read %p",
- __entry->rcutorturename, __entry->rhp)
+   TP_printk("%s torture read %p %luus c: %lu %lu",
+ __entry->rcutorturename, __entry->rhp,
+ __entry->secs, __entry->c_old, __entry->c)
 );
 
 /*
@@ -608,7 +616,8 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+   do { } while (0)
 #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf761..303359d 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,14 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || 
defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
+void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
+  unsigned long secs,
+  unsigned long c_old, unsigned long c)
 {
-   trace_rcu_torture_read(rcutorturename, rhp);
+   

Re: [PATCH 0/3] Fix two bugs in rtl8411

2013-01-26 Thread Samuel Ortiz
Hi Wei,

On Wed, Jan 23, 2013 at 09:51:03AM +0800, wei_w...@realsil.com.cn wrote:
> From: Wei WANG 
> 
> 1. The method to switch output voltage is different in rtl8411
> 2. Fix the formula to calculate N from SD clock if clock divider is more than 
> 1
> 
> Wei WANG (3):
>   MFD:rtsx: Add callback function switch_output_voltage
>   MMC:rtsx: Using callback function to switch output voltage
>   MFD:rtsx: Add callback function conv_clk_and_div_n
All 3 patches applied to my for-linus branch. I rephrased the changelog
slightly, to make it clearer that this is a bug fix.

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 0/2] Callback tagging for 3.9

2013-01-26 Thread Paul E. McKenney
Hello!

This series contains callback-tagging patches.  These were sent earlier
in the idle/no-CBs series, but have been isolated as the more trustworthy
part of that series.

1.  Tag callback lists with the grace-period number that they are
waiting for.  This change enables a number of optimizations
for RCU_FAST_NO_HZ, and though it add a bit of code, it greatly
simplifies RCU's callback handling.
2.  Trace callback acceleration (which is when RCU notices that a
group of callbacks doesn't actually need to wait as long as it
previously thought).

Thanx, Paul


 b/include/trace/events/rcu.h |6 -
 b/kernel/rcutree.c   |  201 +--
 b/kernel/rcutree.h   |2 
 3 files changed, 179 insertions(+), 30 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.8-rc4 arm] SCSI_SYM53C8XX_2 module cannot register IRQ

2013-01-26 Thread Tetsuo Handa
Linus Walleij wrote:
> I'm trying to reproduce this, but how do you reconfigure the kernel to
> get PCI, SCSI and such stuff enabled?
> 
> The stock versatile_defconfig does not even have SCSI enabled...

I'm using a customized config for qemu. I've just updated the config to
http://I-love.SAKURA.ne.jp/tmp/config-3.8-rc1-arm .

Regards.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 08/10] rcu: Remove unused code originally used for context tracking

2013-01-26 Thread Paul E. McKenney
From: Li Zhong 

As context tracking subsystem evolved, it stopped using ignore_user_qs
and in_user defined in the rcu_dynticks structure.  This commit therefore
removes them.

Signed-off-by: Li Zhong 
Signed-off-by: Paul E. McKenney 
Acked-by: Frederic Weisbecker 
---
 kernel/rcutree.c |3 ---
 kernel/rcutree.h |4 
 2 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d78ba60..8a13c8e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2719,9 +2719,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->dynticks = _cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
WARN_ON_ONCE(atomic_read(>dynticks->dynticks) != 1);
-#ifdef CONFIG_RCU_USER_QS
-   WARN_ON_ONCE(rdp->dynticks->in_user);
-#endif
rdp->cpu = cpu;
rdp->rsp = rsp;
rcu_boot_init_nocb_percpu_data(rdp);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4b69291..6f21f2e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -102,10 +102,6 @@ struct rcu_dynticks {
/* idle-period nonlazy_posted snapshot. */
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-#ifdef CONFIG_RCU_USER_QS
-   bool ignore_user_qs;/* Treat userspace as extended QS or not */
-   bool in_user;   /* Is the CPU in userland from RCU POV? */
-#endif
 };
 
 /* RCU's kthread states for tracing. */
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 04/10] rcu: Silence compiler array out-of-bounds false positive

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

It turns out that gcc 4.8 warns on array indexes being out of bounds
unless it can prove otherwise.  It gives this warning on some RCU
initialization code.  Because this is far from any fastpath, add
an explicit check for array bounds and panic if so.  This gives the
compiler enough information to figure out that the array index is never
out of bounds.

However, if a similar false positive occurs on a fastpath, it will
probably be necessary to tell the compiler to keep its array-index
anxieties to itself.  ;-)

Markus Trippelsdorf 
Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 kernel/rcutree.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d145796..e0d9815 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2938,6 +2938,10 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 
BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));  /* Fix buf[] init! */
 
+   /* Silence gcc 4.8 warning about array index out of range. */
+   if (rcu_num_lvls > RCU_NUM_LVLS)
+   panic("rcu_init_one: rcu_num_lvls overflow");
+
/* Initialize the level-tracking arrays. */
 
for (i = 0; i < rcu_num_lvls; i++)
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 06/10] rcu: Correct 'optimized' to 'optimize' in header comment

2013-01-26 Thread Paul E. McKenney
From: Cody P Schafer 

Small grammar fix in rcutree comment regarding 'rcu_scheduler_active'
var.

Signed-off-by: Cody P Schafer 
Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 kernel/rcutree.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e0d9815..d78ba60 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # 
rcu_nodes in use. */
  * The rcu_scheduler_active variable transitions from zero to one just
  * before the first task is spawned.  So when this variable is zero, RCU
  * can assume that there is but one task, allowing RCU to (for example)
- * optimized synchronize_sched() to a simple barrier().  When this variable
+ * optimize synchronize_sched() to a simple barrier().  When this variable
  * is one, RCU must actually do all the hard work required to detect real
  * grace periods.  This variable is also used to suppress boot-time false
  * positives from lockdep-RCU error checking.
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 05/10] rcutorture: Don't compare ptr with 0

2013-01-26 Thread Paul E. McKenney
From: Sasha Levin 

Signed-off-by: Sasha Levin 
Reviewed-by: Josh Triplett 
Signed-off-by: Paul E. McKenney 
---
 kernel/rcutorture.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01..0249800 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -1749,7 +1749,7 @@ static int rcu_torture_barrier_init(void)
barrier_cbs_wq =
kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]),
GFP_KERNEL);
-   if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0)
+   if (barrier_cbs_tasks == NULL || !barrier_cbs_wq)
return -ENOMEM;
for (i = 0; i < n_barrier_cbs; i++) {
init_waitqueue_head(_cbs_wq[i]);
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 07/10] rcu: Consolidate debugging Kconfig options

2013-01-26 Thread Paul E. McKenney
From: Dave Hansen 

The RCU-related debugging Kconfig options are in two different places,
and consume too much screen real estate.  This commit therefore
consolidates them into their own menu.

Signed-off-by: Dave Hansen 
Signed-off-by: Paul E. McKenney 
---
 lib/Kconfig.debug |  114 +++-
 1 files changed, 59 insertions(+), 55 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3a35309..122db3d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -605,61 +605,6 @@ config PROVE_LOCKING
 
 For more details, see Documentation/lockdep-design.txt.
 
-config PROVE_RCU
-   bool "RCU debugging: prove RCU correctness"
-   depends on PROVE_LOCKING
-   default n
-   help
-This feature enables lockdep extensions that check for correct
-use of RCU APIs.  This is currently under development.  Say Y
-if you want to debug RCU usage or help work on the PROVE_RCU
-feature.
-
-Say N if you are unsure.
-
-config PROVE_RCU_REPEATEDLY
-   bool "RCU debugging: don't disable PROVE_RCU on first splat"
-   depends on PROVE_RCU
-   default n
-   help
-By itself, PROVE_RCU will disable checking upon issuing the
-first warning (or "splat").  This feature prevents such
-disabling, allowing multiple RCU-lockdep warnings to be printed
-on a single reboot.
-
-Say Y to allow multiple RCU-lockdep warnings per boot.
-
-Say N if you are unsure.
-
-config PROVE_RCU_DELAY
-   bool "RCU debugging: preemptible RCU race provocation"
-   depends on DEBUG_KERNEL && PREEMPT_RCU
-   default n
-   help
-There is a class of races that involve an unlikely preemption
-of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
-been set to INT_MIN.  This feature inserts a delay at that
-point to increase the probability of these races.
-
-Say Y to increase probability of preemption of __rcu_read_unlock().
-
-Say N if you are unsure.
-
-config SPARSE_RCU_POINTER
-   bool "RCU debugging: sparse-based checks for pointer usage"
-   default n
-   help
-This feature enables the __rcu sparse annotation for
-RCU-protected pointers.  This annotation will cause sparse
-to flag any non-RCU used of annotated pointers.  This can be
-helpful when debugging RCU usage.  Please note that this feature
-is not intended to enforce code cleanliness; it is instead merely
-a debugging aid.
-
-Say Y to make sparse flag questionable use of RCU-protected pointers
-
-Say N if you are unsure.
-
 config LOCKDEP
bool
depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT 
&& LOCKDEP_SUPPORT
@@ -937,6 +882,63 @@ config BOOT_PRINTK_DELAY
  BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect
  what it believes to be lockup conditions.
 
+menu "RCU Debugging"
+
+config PROVE_RCU
+   bool "RCU debugging: prove RCU correctness"
+   depends on PROVE_LOCKING
+   default n
+   help
+This feature enables lockdep extensions that check for correct
+use of RCU APIs.  This is currently under development.  Say Y
+if you want to debug RCU usage or help work on the PROVE_RCU
+feature.
+
+Say N if you are unsure.
+
+config PROVE_RCU_REPEATEDLY
+   bool "RCU debugging: don't disable PROVE_RCU on first splat"
+   depends on PROVE_RCU
+   default n
+   help
+By itself, PROVE_RCU will disable checking upon issuing the
+first warning (or "splat").  This feature prevents such
+disabling, allowing multiple RCU-lockdep warnings to be printed
+on a single reboot.
+
+Say Y to allow multiple RCU-lockdep warnings per boot.
+
+Say N if you are unsure.
+
+config PROVE_RCU_DELAY
+   bool "RCU debugging: preemptible RCU race provocation"
+   depends on DEBUG_KERNEL && PREEMPT_RCU
+   default n
+   help
+There is a class of races that involve an unlikely preemption
+of __rcu_read_unlock() just after ->rcu_read_lock_nesting has
+been set to INT_MIN.  This feature inserts a delay at that
+point to increase the probability of these races.
+
+Say Y to increase probability of preemption of __rcu_read_unlock().
+
+Say N if you are unsure.
+
+config SPARSE_RCU_POINTER
+   bool "RCU debugging: sparse-based checks for pointer usage"
+   default n
+   help
+This feature enables the __rcu sparse annotation for
+RCU-protected pointers.  This annotation will cause sparse
+to flag any non-RCU used of annotated pointers.  This can be
+helpful when debugging RCU usage.  Please note that this feature
+is not intended to enforce code cleanliness; it is instead merely
+a debugging aid.
+
+Say Y to 

[PATCH tip/core/rcu 03/10] rcu: Use new nesting value for rcu_dyntick trace in rcu_eqs_enter_common

2013-01-26 Thread Paul E. McKenney
From: Li Zhong 

This patch uses the real new value of dynticks_nesting instead of 0 in
rcu_eqs_enter_common().

Signed-off-by: Li Zhong 
Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 kernel/rcutree.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index cceda76..d145796 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -336,7 +336,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
bool user)
 {
-   trace_rcu_dyntick("Start", oldval, 0);
+   trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
if (!user && !is_idle_task(current)) {
struct task_struct *idle = idle_task(smp_processor_id());
 
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 10/10] context_tracking: Add comments on interface and internals

2013-01-26 Thread Paul E. McKenney
From: Frederic Weisbecker 

This subsystem lacks many explanations on its purpose and
design. Add these missing comments.

v4: Document function parameter to be more kernel-doc
friendly, as per Namhyung suggestion.

Reported-by: Andrew Morton 
Signed-off-by: Frederic Weisbecker 
Cc: Alessio Igor Bogani 
Cc: Andrew Morton 
Cc: Chris Metcalf 
Cc: Christoph Lameter 
Cc: Geoff Levand 
Cc: Gilad Ben Yossef 
Cc: Hakan Akkan 
Cc: Ingo Molnar 
Cc: Li Zhong 
Cc: Namhyung Kim 
Cc: Paul E. McKenney 
Cc: Paul Gortmaker 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Cc: Thomas Gleixner 
Signed-off-by: Paul E. McKenney 
---
 kernel/context_tracking.c |   75 +++--
 1 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd..d566aba 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,3 +1,19 @@
+/*
+ * Context tracking: Probe on high level context boundaries such as kernel
+ * and userspace. This includes syscalls and exceptions entry/exit.
+ *
+ * This is used by RCU to remove its dependency on the timer tick while a CPU
+ * runs in userspace.
+ *
+ *  Started by Frederic Weisbecker:
+ *
+ * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker 
+ *
+ * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
+ * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
+ *
+ */
+
 #include 
 #include 
 #include 
@@ -6,8 +22,8 @@
 
 struct context_tracking {
/*
-* When active is false, hooks are not set to
-* minimize overhead: TIF flags are cleared
+* When active is false, probes are unset in order
+* to minimize overhead: TIF flags are cleared
 * and calls to user_enter/exit are ignored. This
 * may be further optimized using static keys.
 */
@@ -24,6 +40,15 @@ static DEFINE_PER_CPU(struct context_tracking, 
context_tracking) = {
 #endif
 };
 
+/**
+ * user_enter - Inform the context tracking that the CPU is going to
+ *  enter userspace mode.
+ *
+ * This function must be called right before we switch from the kernel
+ * to userspace, when it's guaranteed the remaining kernel instructions
+ * to execute won't use any RCU read side critical section because this
+ * function sets RCU in extended quiescent state.
+ */
 void user_enter(void)
 {
unsigned long flags;
@@ -39,40 +64,70 @@ void user_enter(void)
if (in_interrupt())
return;
 
+   /* Kernel threads aren't supposed to go to userspace */
WARN_ON_ONCE(!current->mm);
 
local_irq_save(flags);
if (__this_cpu_read(context_tracking.active) &&
__this_cpu_read(context_tracking.state) != IN_USER) {
__this_cpu_write(context_tracking.state, IN_USER);
+   /*
+* At this stage, only low level arch entry code remains and
+* then we'll run in userspace. We can assume there won't be
+* any RCU read-side critical section until the next call to
+* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+* on the tick.
+*/
rcu_user_enter();
}
local_irq_restore(flags);
 }
 
+
+/**
+ * user_exit - Inform the context tracking that the CPU is
+ * exiting userspace mode and entering the kernel.
+ *
+ * This function must be called after we entered the kernel from userspace
+ * before any use of RCU read side critical section. This potentially include
+ * any high level kernel code like syscalls, exceptions, signal handling, 
etc...
+ *
+ * This call supports re-entrancy. This way it can be called from any exception
+ * handler without needing to know if we came from userspace or not.
+ */
 void user_exit(void)
 {
unsigned long flags;
 
-   /*
-* Some contexts may involve an exception occuring in an irq,
-* leading to that nesting:
-* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
-* This would mess up the dyntick_nesting count though. And rcu_irq_*()
-* helpers are enough to protect RCU uses inside the exception. So
-* just return immediately if we detect we are in an IRQ.
-*/
if (in_interrupt())
return;
 
local_irq_save(flags);
if (__this_cpu_read(context_tracking.state) == IN_USER) {
__this_cpu_write(context_tracking.state, IN_KERNEL);
+   /*
+* We are going to run code that may use RCU. Inform
+* RCU core about that (ie: we may need the tick again).
+*/
rcu_user_exit();
}
local_irq_restore(flags);
 }
 
+
+/**
+ * context_tracking_task_switch - context switch the syscall callbacks
+ * @prev: the task that is being switched out
+ * @next: the task that is being switched in
+ *
+ * The 

[PATCH] x86, numa: Use __pa_nodebug instead

2013-01-26 Thread Borislav Petkov
From: Borislav Petkov 

... and fix the following warning:

arch/x86/mm/numa.c: In function ‘setup_node_data’:
arch/x86/mm/numa.c:222:3: warning: passing argument 1 of ‘__phys_addr_nodebug’ 
makes integer from pointer without a cast [enabled by default]
In file included from 
/home/boris/w/kernel/linux-2.6/arch/x86/include/asm/page.h:11:0,
 from 
/home/boris/w/kernel/linux-2.6/arch/x86/include/asm/thread_info.h:11,
 from include/linux/thread_info.h:54,
 from include/linux/preempt.h:9,
 from include/linux/spinlock.h:50,
 from include/linux/mmzone.h:7,
 from include/linux/gfp.h:4,
 from include/linux/mm.h:8,
 from arch/x86/mm/numa.c:3:
/home/boris/w/kernel/linux-2.6/arch/x86/include/asm/page_64.h:12:29: note: 
expected ‘long unsigned int’ but argument is of type ‘void *’

Signed-off-by: Borislav Petkov 
Cc: Dave Hansen 
---
 arch/x86/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 76604eb9e4b0..b2313c6739f5 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -219,7 +219,7 @@ static void __init setup_node_data(int nid, u64 start, u64 
end)
 */
nd = alloc_remap(nid, nd_size);
if (nd) {
-   nd_pa = __phys_addr_nodebug(nd);
+   nd_pa = __pa_nodebug(nd);
remapped = true;
} else {
nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
-- 
1.8.1.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 01/10] rcu: Fix blimit type for trace_rcu_batch_start()

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

When the type of global variable blimit changed from int to long, the
type of the blimit argument of trace_rcu_batch_start() needed to have
changed.  This commit fixes this issue.

Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 include/trace/events/rcu.h |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b..f919498 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -393,7 +393,7 @@ TRACE_EVENT(rcu_kfree_callback,
  */
 TRACE_EVENT(rcu_batch_start,
 
-   TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
+   TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit),
 
TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
 
@@ -401,7 +401,7 @@ TRACE_EVENT(rcu_batch_start,
__field(char *, rcuname)
__field(long, qlen_lazy)
__field(long, qlen)
-   __field(int, blimit)
+   __field(long, blimit)
),
 
TP_fast_assign(
@@ -411,7 +411,7 @@ TRACE_EVENT(rcu_batch_start,
__entry->blimit = blimit;
),
 
-   TP_printk("%s CBs=%ld/%ld bl=%d",
+   TP_printk("%s CBs=%ld/%ld bl=%ld",
  __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
  __entry->blimit)
 );
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mfd: prcmu: delete pin control helpers

2013-01-26 Thread Samuel Ortiz
Hi Linus,

On Tue, Jan 22, 2013 at 02:25:35PM +0100, Linus Walleij wrote:
> On Tue, Jan 22, 2013 at 4:25 AM, Samuel Ortiz  wrote:
> > On Mon, Jan 07, 2013 at 09:13:18AM +0100, Linus Walleij wrote:
> >> From: Linus Walleij 
> >>
> >> These static inlines are duplicating the task now done by the
> >> Nomadik pinctrl drivers, so delete them from the prcmu static
> >> inlines, also delete the register definitions as these should
> >> only be known by the pinctrl driver.
> >>
> >> Cc: Loic Pallardy 
> >> Cc: Patrice Chotard 
> >> Cc: Michel Jaouen 
> >> Signed-off-by: Linus Walleij 
> >> ---
> >> Sam, this is a pure clean-up patch following the v3.8 merge
> >> window. These inlines are completely unused in the kernel and
> >> I'd simply apply it to the -rc series but it's up to you.
> >
> > Unless it really makes your life easier, I like to keep my -rc pull requests
> > for fixes only.
> > I'll apply it to my for-next branch for now.
> 
> OK in that case, can I take it through the ARM SoC tree
> with your ACK, because I have other cleanups that depend on
> this one?
Fair enough:
Acked-by: Samuel Ortiz 

Cheers,
Samuel.

-- 
Intel Open Source Technology Centre
http://oss.intel.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 3/4] Documentation: Memory barrier semantics of atomic_xchg()

2013-01-26 Thread Paul E. McKenney
From: Richard Braun 

Add atomic_xchg() to documentation for atomic operations and
memory barriers.

Signed-off-by: Richard Braun 
Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 Documentation/atomic_ops.txt  |2 ++
 Documentation/memory-barriers.txt |1 +
 2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 27f2b21..d9ca5be 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -253,6 +253,8 @@ This performs an atomic exchange operation on the atomic 
variable v, setting
 the given new value.  It returns the old value that the atomic variable v had
 just before the operation.
 
+atomic_xchg requires explicit memory barriers around the operation.
+
int atomic_cmpxchg(atomic_t *v, int old, int new);
 
 This performs an atomic compare exchange operation on the atomic value v,
diff --git a/Documentation/memory-barriers.txt 
b/Documentation/memory-barriers.txt
index 3c4e1b3..fa5d8a9 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1685,6 +1685,7 @@ explicit lock operations, described later).  These 
include:
 
xchg();
cmpxchg();
+   atomic_xchg();
atomic_cmpxchg();
atomic_inc_return();
atomic_dec_return();
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 1/4] tracing: Export trace_clock_local()

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

The rcutorture tests need to be able to trace the time of the
beginning of an RCU read-side critical section, and thus need access
to trace_clock_local().  This commit therefore adds a the needed
EXPORT_SYMBOL_GPL().

Signed-off-by: Paul E. McKenney 
Reviewed-by: Josh Triplett 
---
 kernel/trace/trace_clock.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 3947835..1bbb1b2 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -44,6 +44,7 @@ u64 notrace trace_clock_local(void)
 
return clock;
 }
+EXPORT_SYMBOL_GPL(trace_clock_local);
 
 /*
  * trace_clock(): 'between' trace clock. Not completely serialized,
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 4/4] rcu: Make rcutorture's shuffler task shuffle recently added tasks

2013-01-26 Thread Paul E. McKenney
From: "Paul E. McKenney" 

A number of kthreads have been added to rcutorture, but the shuffler
task was not informed of them, and thus did not shuffle them.  This
commit therefore adds the requisite shuffling.

Signed-off-by: Paul E. McKenney 
---
 kernel/rcutorture.c |   24 
 1 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a583f1c..3ebc8bf 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -846,7 +846,7 @@ static int rcu_torture_boost(void *arg)
/* Wait for the next test interval. */
oldstarttime = boost_starttime;
while (ULONG_CMP_LT(jiffies, oldstarttime)) {
-   schedule_timeout_uninterruptible(1);
+   schedule_timeout_interruptible(oldstarttime - jiffies);
rcu_stutter_wait("rcu_torture_boost");
if (kthread_should_stop() ||
fullstop != FULLSTOP_DONTSTOP)
@@ -1318,19 +1318,35 @@ static void rcu_torture_shuffle_tasks(void)
set_cpus_allowed_ptr(reader_tasks[i],
 shuffle_tmp_mask);
}
-
if (fakewriter_tasks) {
for (i = 0; i < nfakewriters; i++)
if (fakewriter_tasks[i])
set_cpus_allowed_ptr(fakewriter_tasks[i],
 shuffle_tmp_mask);
}
-
if (writer_task)
set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask);
-
if (stats_task)
set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask);
+   if (stutter_task)
+   set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask);
+   if (fqs_task)
+   set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask);
+   if (shutdown_task)
+   set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask);
+#ifdef CONFIG_HOTPLUG_CPU
+   if (onoff_task)
+   set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+   if (stall_task)
+   set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask);
+   if (barrier_cbs_tasks)
+   for (i = 0; i < n_barrier_cbs; i++)
+   if (barrier_cbs_tasks[i])
+   set_cpus_allowed_ptr(barrier_cbs_tasks[i],
+shuffle_tmp_mask);
+   if (barrier_task)
+   set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask);
 
if (rcu_idle_cpu == -1)
rcu_idle_cpu = num_online_cpus() - 1;
-- 
1.7.8

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH tip/core/rcu 0/4] v2 Documentation and rcutorture changes for 3.9

2013-01-26 Thread Paul E. McKenney
Hello!

The following are changes to documentation and rcutorture:

1.  Export trace_clock_local() in order to allow rcutorture event
tracing to emit the time of the beginning of the RCU read-side
critical section at the point where a failure is detected.
2.  Reduce rcutorture's read-side tracing to include only failures,
thus cutting the number of events down to something reasonable
(normally zero, in fact!).
3.  Add atomic_xchg() to the list of atomic operations and memory
barriers, courtesy of Richard Braun.
4.  Make rcutorture's shuffler task also shuffle recently added
rcutorture kthreads.

Changes since v1:

o   Fixed build problem in #2 located by Randy Dunlap based on
diagnosis from Steven Rostedt.

o   Added patch #4.

Thanx, Paul


 b/Documentation/atomic_ops.txt  |2 +
 b/Documentation/memory-barriers.txt |1 
 b/include/linux/rcupdate.h  |   13 ++--
 b/include/trace/events/rcu.h|   19 +---
 b/kernel/rcupdate.c |9 +++--
 b/kernel/rcutorture.c   |   55 
 b/kernel/trace/trace_clock.c|1 
 b/lib/Kconfig.debug |1 
 8 files changed, 79 insertions(+), 22 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [3.8-rc4 arm] SCSI_SYM53C8XX_2 module cannot register IRQ

2013-01-26 Thread Linus Walleij
On Sat, Jan 26, 2013 at 9:17 AM, Tetsuo Handa
 wrote:

> I did a blind git bisection (i.e. starting
>
>   $ qemu-system-arm -M versatilepb -hda hda.img -kernel arch/arm/boot/zImage 
> -append "root=/dev/sda1 init=/bin/sh" -nographic
>
> and watching "top" for %CPU usage of qemu-system-arm , assuming that it goes 
> to
> 100% only if detection of block device for / partition failed and kernel 
> called
> panic(), goes to 0% otherwise) in two patterns.
>
>   $ git bisect start HEAD b1112249 v3.7 v3.6 v3.5 v3.4 v3.3 v3.2 v3.1 v3.0 -- 
> arch/arm drivers/scsi/sym53c8xx_2/ drivers/scsi/*.[ch]
>
>   $ git bisect start v3.8-rc1 95e629b7 b8db6b8 810883f0 b10bca0b 14318efb 
> 414a6750e b1112249 v3.7 v3.6 v3.5 v3.4 v3.3 v3.2 v3.1 v3.0
>
> Both patterns resulted in that commit 07c9249f
> "ARM: 7554/1: VIC: use irq_domain_add_simple()" is the cause of
>
>   PCI: enabling device :00:0c.0 (0100 -> 0103)
>   sym0: <895a> rev 0x0 at pci :00:0c.0 irq 27
>   sym0: No NVRAM, ID 7, Fast-40, LVD, parity checking
>   sym0: request irq 27 failure
>   sym0: giving up ...
>
> message.
>
> Would you have a look?

I'm trying to reproduce this, but how do you reconfigure the kernel to
get PCI, SCSI and such stuff enabled?

The stock versatile_defconfig does not even have SCSI enabled...

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v10 00/11] PCI, ACPI: pci root bus hotplug support / pci match_driver

2013-01-26 Thread Yinghai Lu
On Fri, Jan 25, 2013 at 4:04 PM, Bjorn Helgaas  wrote:
> On Tue, Jan 22, 2013 at 3:19 PM, Yinghai Lu  wrote:

> I first pulled in
> "git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
> acpi-scan" again (to pci/acpi-scan2), added your acks, Rafael, and put
> this series on a pci/yinghai-root-bus branch based on pci/acpi-scan2.
>
> I reworked some of the changelogs a bit, but I don't think I made any
> code changes except that in [10/11] I just inlined the
> pci_bus_attach_device() code rather than making a new function, since
> it's small, there's only one caller, and I didn't think we needed any
> more pci_* and pci_bus_* functions than we already have.
>
> Let me know if I messed anything up.

Great, thanks for lot.

After that hit pci/next, will send out

for-pci-for-each-host-bridge

and

for-pci-for-each-add-res.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   >