[PATCH 1/2] ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp

2012-02-28 Thread Richard Zhao
If CONFIG_SMP, cpufreq skips loops_per_jiffy update, because different
arch has different per-cpu loops_per_jiffy definition.

Signed-off-by: Richard Zhao 
Acked-by: Russell King 
---
 arch/arm/kernel/smp.c |   54 +
 1 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cdeb727..4381bef 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -599,3 +600,56 @@ int setup_profiling_timer(unsigned int multiplier)
 {
return -EINVAL;
 }
+
+#ifdef CONFIG_CPU_FREQ
+
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref);
+static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq);
+static unsigned long global_l_p_j_ref;
+static unsigned long global_l_p_j_ref_freq;
+
+static int cpufreq_callback(struct notifier_block *nb,
+   unsigned long val, void *data)
+{
+   struct cpufreq_freqs *freq = data;
+   int cpu = freq->cpu;
+
+   if (freq->flags & CPUFREQ_CONST_LOOPS)
+   return NOTIFY_OK;
+
+   if (!per_cpu(l_p_j_ref, cpu)) {
+   per_cpu(l_p_j_ref, cpu) =
+   per_cpu(cpu_data, cpu).loops_per_jiffy;
+   per_cpu(l_p_j_ref_freq, cpu) = freq->old;
+   if (!global_l_p_j_ref) {
+   global_l_p_j_ref = loops_per_jiffy;
+   global_l_p_j_ref_freq = freq->old;
+   }
+   }
+
+   if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+   (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+   (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
+   loops_per_jiffy = cpufreq_scale(global_l_p_j_ref,
+   global_l_p_j_ref_freq,
+   freq->new);
+   per_cpu(cpu_data, cpu).loops_per_jiffy =
+   cpufreq_scale(per_cpu(l_p_j_ref, cpu),
+   per_cpu(l_p_j_ref_freq, cpu),
+   freq->new);
+   }
+   return NOTIFY_OK;
+}
+
+static struct notifier_block cpufreq_notifier = {
+   .notifier_call  = cpufreq_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+   return cpufreq_register_notifier(&cpufreq_notifier,
+   CPUFREQ_TRANSITION_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+#endif
-- 
1.7.5.4



___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 0/2] change lpj in arm smp common code

2012-02-28 Thread Richard Zhao
The two patches were originally in [PATCH V6 0/7] add a generic cpufreq driver.
I seperated them and hope they can go to upstream earlier.

Richard Zhao (2):
  ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp
  cpufreq: OMAP: remove loops_per_jiffy recalculate for smp

 arch/arm/kernel/smp.c  |   54 
 drivers/cpufreq/omap-cpufreq.c |   36 --
 2 files changed, 54 insertions(+), 36 deletions(-)

-- 
1.7.5.4



___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 2/2] cpufreq: OMAP: remove loops_per_jiffy recalculate for smp

2012-02-28 Thread Richard Zhao
arm registered cpufreq transition notifier to recalculate it.

Signed-off-by: Richard Zhao 
---
 drivers/cpufreq/omap-cpufreq.c |   36 
 1 files changed, 0 insertions(+), 36 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 5d04c57..17da4c4 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -37,16 +37,6 @@
 
 #include 
 
-#ifdef CONFIG_SMP
-struct lpj_info {
-   unsigned long   ref;
-   unsigned intfreq;
-};
-
-static DEFINE_PER_CPU(struct lpj_info, lpj_ref);
-static struct lpj_info global_lpj_ref;
-#endif
-
 static struct cpufreq_frequency_table *freq_table;
 static atomic_t freq_table_users = ATOMIC_INIT(0);
 static struct clk *mpu_clk;
@@ -118,32 +108,6 @@ static int omap_target(struct cpufreq_policy *policy,
ret = clk_set_rate(mpu_clk, freqs.new * 1000);
freqs.new = omap_getspeed(policy->cpu);
 
-#ifdef CONFIG_SMP
-   /*
-* Note that loops_per_jiffy is not updated on SMP systems in
-* cpufreq driver. So, update the per-CPU loops_per_jiffy value
-* on frequency transition. We need to update all dependent CPUs.
-*/
-   for_each_cpu(i, policy->cpus) {
-   struct lpj_info *lpj = &per_cpu(lpj_ref, i);
-   if (!lpj->freq) {
-   lpj->ref = per_cpu(cpu_data, i).loops_per_jiffy;
-   lpj->freq = freqs.old;
-   }
-
-   per_cpu(cpu_data, i).loops_per_jiffy =
-   cpufreq_scale(lpj->ref, lpj->freq, freqs.new);
-   }
-
-   /* And don't forget to adjust the global one */
-   if (!global_lpj_ref.freq) {
-   global_lpj_ref.ref = loops_per_jiffy;
-   global_lpj_ref.freq = freqs.old;
-   }
-   loops_per_jiffy = cpufreq_scale(global_lpj_ref.ref, global_lpj_ref.freq,
-   freqs.new);
-#endif
-
/* notifiers */
for_each_cpu(i, policy->cpus) {
freqs.cpu = i;
-- 
1.7.5.4



___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: 3, 2, 1....BLASTOFF

2012-02-28 Thread Guruswamy, Senthilvadivu
Zach,

On Wed, Feb 29, 2012 at 7:54 AM, Zach Pfeffer  wrote:
> The preliminary 12.03 Android plan is up.
>
> https://launchpad.net/linaro-android/+milestone/12.03
>
> ARM A15, A7 and A15/A7 Fast Models running Android, big.LITTLE
> testing, dual SD cards, unit tests, more enablement, improved Android
> SMP, and more!!!
Its a good news that SMP to HMP work has been taken up with Android here.
I am curious to know the project details since i was trying the same
in fastmodel.
Which RTSM_EB are you using for this bring up?
FastModel comes with only RTSM_VE of A15,A7 pack which has only cores
simulated, not even uart in it.

Regards,
Senthil

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR

2012-02-28 Thread Dmitry Antipov

On 02/28/2012 05:30 PM, Dan Carpenter wrote:


Could you include that in the changelog when the final version is
ready?


What changelog you're saying about?

Dmitry

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: ARM A9 oprofile

2012-02-28 Thread Dmitry Antipov

On 02/28/2012 05:27 PM, Ming Lei wrote:


OK, could you try the MLO and u-boot.bin under the link of
http://kernel.ubuntu.com/~ming/up to see if 'perf' may work well?


Is it really possible that the bootloader stuff affects perf/oprofile?


If still not, could you tell me what is the revision of your pandaboard?


The kernel and u-boot says that CPU is OMAP4430 ES2.2, the board box has the
label with PANDABOARD UEVM4430G-01-00-00, and /proc/cpuinfo is shown below.

Processor   : ARMv7 Processor rev 2 (v7l)
processor   : 0
BogoMIPS: 597.81

processor   : 1
BogoMIPS: 597.81

Features: swp half thumb fastmult vfp edsp thumbee neon vfpv3 tls
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x1
CPU part: 0xc09
CPU revision: 2

Hardware: OMAP4 Panda board
Revision: 0020
Serial  : 


or do you have any changes on the hardware?


No.


I am sure that several guys have tried the current omap4 pmu patch
and make perf work well on pandaboard.


Perf (in particular, "perf top") works for me too. Also I tried
"perf record -a -F 1000 sleep 200" while running the kernel module workload,
and have never seen "nobody cared" IRQ issues. You have said that oprofile
uses perf subsystem as a backend, so this looks even more strange.

Dmitry

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH 2/3] ARM: EXYNOS: Add clkdev lookup entry for lcd clock

2012-02-28 Thread Tushar Behera
Hi Kukjin,

On 12/01/2011 11:20 AM, Tushar Behera wrote:
> The framebuffer driver needs the clock named 'lcd' as its bus
> clock but the equivalent clock on Exynos4 is named as 'fimd'.
> Hence, create a clkdev lookup entry with the name 'lcd' that
> references the 'fimd' clock.
> 
> Signed-off-by: Tushar Behera 
> ---
>  arch/arm/mach-exynos/clock.c |   14 +-
>  1 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm/mach-exynos/clock.c b/arch/arm/mach-exynos/clock.c
> index 5d8d483..607ec28 100644
> --- a/arch/arm/mach-exynos/clock.c
> +++ b/arch/arm/mach-exynos/clock.c
> @@ -489,11 +489,6 @@ static struct clk init_clocks_off[] = {
>   .enable = exynos4_clk_ip_cam_ctrl,
>   .ctrlbit= (1 << 3),
>   }, {
> - .name   = "fimd",
> - .devname= "exynos4-fb.0",
> - .enable = exynos4_clk_ip_lcd0_ctrl,
> - .ctrlbit= (1 << 0),
> - }, {
>   .name   = "hsmmc",
>   .devname= "s3c-sdhci.0",
>   .parent = &clk_aclk_133.clk,
> @@ -782,6 +777,13 @@ static struct clk clk_pdma1 = {
>   .ctrlbit= (1 << 1),
>  };
>  
> +static struct clk clk_fimd0 = {
> + .name   = "fimd",
> + .devname= "exynos4-fb.0",
> + .enable = exynos4_clk_ip_lcd0_ctrl,
> + .ctrlbit= (1 << 0),
> +};
> +
>  struct clk *clkset_group_list[] = {
>   [0] = &clk_ext_xtal_mux,
>   [1] = &clk_xusbxti,
> @@ -1294,6 +1296,7 @@ static struct clksrc_clk *sysclks[] = {
>  static struct clk *clk_cdev[] = {
>   &clk_pdma0,
>   &clk_pdma1,
> + &clk_fimd0,
>  };
>  
>  static struct clksrc_clk *clksrc_cdev[] = {
> @@ -1318,6 +1321,7 @@ static struct clk_lookup exynos4_clk_lookup[] = {
>   CLKDEV_INIT("s3c-sdhci.3", "mmc_busclk.2", &clk_sclk_mmc3.clk),
>   CLKDEV_INIT("dma-pl330.0", "apb_pclk", &clk_pdma0),
>   CLKDEV_INIT("dma-pl330.1", "apb_pclk", &clk_pdma1),
> + CLKDEV_INIT("exynos4-fb.0", "lcd", &clk_fimd0),
>  };
>  
>  static int xtal_rate;

Would you please review this patch and let me know your opinion? Without
this patch, frame-buffer support on EXYNOS4 is broken.

-- 
Tushar Behera

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Benchmark summary for Linaro GCC

2012-02-28 Thread Michael Hope
The topic of benchmarking keeps coming up.  We're working on making
the next FSF release better, but it's a good idea to track how the
current Linaro GCC stacks up against other releases.  The summary is
at:
  https://wiki.linaro.org/Internal/ToolChain/Now

Included is how our current 4.6 release does against FSF 4.6, the
change over six months, and how the upcoming 4.7 release fairs.
There's also a comparison against other compilers including the Google
4.6 and Android 4.4 branches.

A PDF version is attached to the page.  The SPEC 2000 results are
still coming in so I'll update the page once they arrive.  Everything
is generated so we'll update this with each monthly release.

-- Michael

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


3, 2, 1....BLASTOFF

2012-02-28 Thread Zach Pfeffer
The preliminary 12.03 Android plan is up.

https://launchpad.net/linaro-android/+milestone/12.03

ARM A15, A7 and A15/A7 Fast Models running Android, big.LITTLE
testing, dual SD cards, unit tests, more enablement, improved Android
SMP, and more!!!

-- 
Zach Pfeffer
Android Platform Team Lead, Linaro Platform Teams
Linaro.org | Open source software for ARM SoCs
Follow Linaro: http://www.facebook.com/pages/Linaro
http://twitter.com/#!/linaroorg - http://www.linaro.org/linaro-blog

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Lee
>> Sounds reasonable.  In some cases it may be helpful to track state
>> demotion as well.  Since I'm still a noob and wearing my submission
>> training wheels, I'm trying to minimize things that fall outside of
>> this basic consolidation effort for this patch series.  But I added
>> Jon's suggestion to this cpuidle page which contains future cpuidle
>> items to consider adding:
>> https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts
>
> Yeah, I don't want to feature-bloat your submission more than
> necessary.  I'm happy for the usage counter stuff to get tackled at a
> later date, but you're still on board for setting last_residency to
> zero in this series, right?

Yes.

>
> Regards,
> Mike
>

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Turquette, Mike
On Tue, Feb 28, 2012 at 3:33 PM, Rob Lee  wrote:
>>
>> I brought this topic up internally and Jon suggested that the 'usage'
>> statistics that are reported in sysfs should also reflect failed
>> versus successful C-state transitions, which is a great idea.  This
>> could simply be achieved by renaming the current 'usage' count to
>> something like 'transitions_attempted' and then conditionally
>> increment a new counter within the 'if (entered_state >= 0)' block,
>> perhaps named, 'transition_succeeded'.
>>
>> This way the old 'usage' count paradigm is as accurate as the new
>> time-keeping code.  Being able to easily observe which C-state tend to
>> fail the most would be invaluable in tuning idle policy for maximum
>> effectiveness.
>>
>> Thoughts?
>
> Sounds reasonable.  In some cases it may be helpful to track state
> demotion as well.  Since I'm still a noob and wearing my submission
> training wheels, I'm trying to minimize things that fall outside of
> this basic consolidation effort for this patch series.  But I added
> Jon's suggestion to this cpuidle page which contains future cpuidle
> items to consider adding:
> https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts

Yeah, I don't want to feature-bloat your submission more than
necessary.  I'm happy for the usage counter stuff to get tackled at a
later date, but you're still on board for setting last_residency to
zero in this series, right?

Regards,
Mike

>
>>
>> Regards,
>> Mike
>>
>>>

 Regards,
 Mike

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Lee
>
> I brought this topic up internally and Jon suggested that the 'usage'
> statistics that are reported in sysfs should also reflect failed
> versus successful C-state transitions, which is a great idea.  This
> could simply be achieved by renaming the current 'usage' count to
> something like 'transitions_attempted' and then conditionally
> increment a new counter within the 'if (entered_state >= 0)' block,
> perhaps named, 'transition_succeeded'.
>
> This way the old 'usage' count paradigm is as accurate as the new
> time-keeping code.  Being able to easily observe which C-state tend to
> fail the most would be invaluable in tuning idle policy for maximum
> effectiveness.
>
> Thoughts?

Sounds reasonable.  In some cases it may be helpful to track state
demotion as well.  Since I'm still a noob and wearing my submission
training wheels, I'm trying to minimize things that fall outside of
this basic consolidation effort for this patch series.  But I added
Jon's suggestion to this cpuidle page which contains future cpuidle
items to consider adding:
https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts

>
> Regards,
> Mike
>
>>
>>>
>>> Regards,
>>> Mike

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Turquette, Mike
On Tue, Feb 28, 2012 at 7:50 AM, Rob Lee  wrote:
> On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike  wrote:
>> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee  wrote:
>>> +/**
>>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter 
>>> function
>>> + * @dev: pointer to a valid cpuidle_device object
>>> + * @drv: pointer to a valid cpuidle_driver object
>>> + * @index: index of the target cpuidle state.
>>> + */
>>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>>> +                               struct cpuidle_driver *drv, int index,
>>> +                               int (*enter)(struct cpuidle_device *dev,
>>> +                                       struct cpuidle_driver *drv, int 
>>> index))
>>> +{
>>> +       ktime_t time_start, time_end;
>>> +       s64 diff;
>>> +
>>> +       time_start = ktime_get();
>>> +
>>> +       index = enter(dev, drv, index);
>>> +
>>> +       time_end = ktime_get();
>>> +
>>> +       local_irq_enable();
>>> +
>>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>>> +       if (diff > INT_MAX)
>>> +               diff = INT_MAX;
>>> +
>>> +       dev->last_residency = (int) diff;
>>> +
>>> +       return index;
>>> +}
>>
>> Hi Rob,
>>
>> In a previous series I brought up the idea of not accounting for time
>> if a C-state transition fails.  My post on that thread can be found
>> here:
>> http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/
>>
>> How do you feel about adding something like the following?
>>
>> if (IS_ERR(index))
>>        dev->last_residency = 0;
>>        return index;
>>
>> Obviously it will up to the platforms to figure out how to propagate
>> that error up from their respective low power code.
>
> To be completely clear on what you're asking for, from
> cpuidle_idle_call in drivers/cpuidle/cpuidle.c:
>
> ...
>        target_state = &drv->states[next_state];
>
>        trace_power_start(POWER_CSTATE, next_state, dev->cpu);
>        trace_cpu_idle(next_state, dev->cpu);
>
>        entered_state = target_state->enter(dev, drv, next_state);
>
>        trace_power_end(dev->cpu);
>        trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
>
>        if (entered_state >= 0) {
>                /* Update cpuidle counters */
>                /* This can be moved to within driver enter routine
>                 * but that results in multiple copies of same code.
>                 */
>                dev->states_usage[entered_state].time +=
>                                (unsigned long long)dev->last_residency;
>                dev->states_usage[entered_state].usage++;
>        }
> ...
>
> Note the "if (entered_state >= 0)".  This ultimately prevents the
> cpuidle device time accounting upon an negative value being returned.
> So are you asking for the if(IS_ERR(index)) check to prevent the
> unnecessary last_residency time calculation in the wrapper, or to make
> sure a last_residency is zero upon failure?  (or both?)
>
> It seems like a bug (or lack or documentation at best) in the code
> that exists today to not zero out dev->last_residency upon a negative
> return value as this value is used by the governors upon the next
> idle.  So to ensure last_residency is 0 upon failure, I think it'd be
> best to add that to an new else statement immediately following the
> "if (entered_state >=))" so that any platform cpuidle driver that
> returns a negative will have the last_residency zeroed out, not just
> those that use en_core_tk_irqen.

+ Cc: Jon Hunter

Hi Rob,

I didn't review the code carefully enough to catch the 'if
(entered_state >= 0)' part, but that seems like a graceful way to
solve this problem by appending the 'else' statement on there and
seeting last_residency to zero.

I brought this topic up internally and Jon suggested that the 'usage'
statistics that are reported in sysfs should also reflect failed
versus successful C-state transitions, which is a great idea.  This
could simply be achieved by renaming the current 'usage' count to
something like 'transitions_attempted' and then conditionally
increment a new counter within the 'if (entered_state >= 0)' block,
perhaps named, 'transition_succeeded'.

This way the old 'usage' count paradigm is as accurate as the new
time-keeping code.  Being able to easily observe which C-state tend to
fail the most would be invaluable in tuning idle policy for maximum
effectiveness.

Thoughts?

Regards,
Mike

>
>>
>> Regards,
>> Mike

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: Sample big.LITTLE model boot images

2012-02-28 Thread Zach Pfeffer
gt;        ASYNC ?= FALSE
> }}}
>
> Plug in the kernel and initramfs images from the previous steps:
>
> {{{
> $ : >payload/fsimg
> $ cp ../../zBlob payload/kernel
> $ cp initrd.img payload/fsimg
> }}}
>
> ... and build:
>
> {{{
> $ make
> [...]
>  LD      img.axf
> }}}
>
> img.axf is the boot payload which is needed to start the model.  It
> contains the big.LITTLE switcher code and the kernel, along with a
> minimal bootloader.
>
>
> == Running ==
>
> OK, so now we have:
>
>  * A boot image containing the switcher and kernel: img.axf
>  * A filesystem MMC card image: mmc.bin
>  * A model binary
>
> Sample payload images can be found in
> http://people.linaro.org/~dmart/bl-images/model-images-20120228.tar.bz2
>
>
> Run like this:
>
> {{{
> $ ./RTSM_VE_Cortex-A15x4-A7x4 -C motherboard.mmc.p_mmc_file=mmc.bin -a
> coretile.cluster0.*=img.axf
> }}}
>
>
> This should be enough to boot to a prompt on the simulated UART.
>
> Beware though -- it can take up to 10 minutes or so to get there,
> depending on your machine.



-- 
Zach Pfeffer
Android Platform Team Lead, Linaro Platform Teams
Linaro.org | Open source software for ARM SoCs
Follow Linaro: http://www.facebook.com/pages/Linaro
http://twitter.com/#!/linaroorg - http://www.linaro.org/linaro-blog

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Sample big.LITTLE model boot images

2012-02-28 Thread Dave Martin
Hi all,

I've added some detailed info on the wiki about how to go about
building bootable images for the big.LITTLE components, here:

https://wiki.linaro.org/Internal/Projects/Big.Little.Switcher/ARMFastModelsHowto

For the benefit of anyone who can't see that page, I've also pasted it
here -- if anyone knows a better place to post this stuff, please let
me know!

Note that although the instructions give details of how to build a
filesystem image, there is essentially nothing b.L or model-specific
there -- it just serves as background for explaining how to work
around some issues which you may encounter.

These images are not supposed to present a fully working big.LITTLE
system -- they are there to help people understand how booting works,
where the sources are and how to work with the model.

Note also that the bootwrapper will change soon (when I get there) to
support loading of the kernel, initramfs from the host filesystem
using semihosting, just like the kvm bootwrapper, but this isn't
implemented for now.

Cheers
---Dave


== Filesystem ==

We need to create an SD card image with a filesystem on it, with the
rootfs partition matching the bootargs set previously.  The following
example generates a suitable image using developer.  The hardware pack
is largely irrelevant because we're not getting the kernel, modules,
bootloader or command-line from there -- but it keeps l-m-c happy.

{{{
$ wget 
http://releases.linaro.org/12.02/ubuntu/oneiric-images/developer/linaro-o-developer-tar-20120221-0.tar.gz
$ wget 
http://releases.linaro.org/12.02/ubuntu/oneiric-images/developer/hwpack_linaro-lt-vexpress-a9_20120221-1_armel_supported.tar.gz
}}}

Note that the ARM fast model's MMC interface emulation is currently
limited to 2G.  Make the image a bit smaller than that to be on the
safe side:

{{{
$ sudo linaro-media-create --dev vexpress --image_file mmc.bin
--image_size 2000M --hwpack
hwpack_linaro-lt-vexpress-a9_20120221-1_armel_suupported.tar.gz
--binary linaro-o-developer-tar-20120221-0.tar.gz
}}}

After generating the filesystem, you probably want to customize it:

 * Disable address layout randomization (mitigates a memory
consumption issue with the model, but not so important on host
machines with >4GB of RAM):
{{{
echo sys.kernel.randomize_va_space = 0 >>etc/sysctl.conf
}}}

 * Disable DHCP for the network (otherwise boot will stall):
{{{
sed -i '/auto.*eth0/d; s/^\(iface.*eth0.*\)dhcp/\1manual/'
etc/network/interfaces
}}}

 * Edit fstab to put the real device name into /etc/fstab in place of
UUID=.  This may not be necessary (mountall was getting
confused, but that may have been caused by not having an initramfs).


Finally, extract the initramfs image (whatever /initrd.img in the
filesystem image points to).



== Kernel, switcher and bootwrapper ==

Clone the big.LITTLE switcher:

{{{
$ git clone --branch gnu-build git://git.linaro.org/people/dmart/arm-virt-bl.git
}}}

Clone the kernel to run on the model.  (Eventually we shouldn't need a
special tree for this.  There are a few model-specific tweaks in this
tree, but nothing big.LITTLE-specific yet.)

{{{
$ git clone --branch arm/vexpressdt-rtsm
git://git.linaro.org/people/dmart/linux-2.6-arm.git
}}}


Now build the kernel:

{{{
$ cd linux-2.6-arm/

$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- vexpress_rtsm_defconfig
$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- zImage dtbs
}}}


The bootwrapper doesn't support device tree yet, so for now append a
suitable device tree blob to the zImage.  This should match the model
you intend to run on:

{{{
$ cat arch/arm/boot/zImage arch/arm/boot/vexprress-v2p-ca15x4-rtsm.dtb >../zBlob
}}}


Now, build the bootwrapper using the kernel you just built:

{{{
$ cd ../arm-virt-bl/bootwrapper
}}}

Configure some things:

In Makefile, add the following argument to BOOTARGS:
{{{
root=/dev/mmcblk0p2
}}}

Due to current bugginess introduced by the ARM->GNU toolchain
migration, the switcher is not currently stable.  For now, we turn off
autonomous asynchronous switching so that we can see the system
booting:

In big-little/Makefile, add change the line
{{{
ASYNC ?= TRUE
}}}
to
{{{
ASYNC ?= FALSE
}}}

Plug in the kernel and initramfs images from the previous steps:

{{{
$ : >payload/fsimg
$ cp ../../zBlob payload/kernel
$ cp initrd.img payload/fsimg
}}}

... and build:

{{{
$ make
[...]
  LD  img.axf
}}}

img.axf is the boot payload which is needed to start the model.  It
contains the big.LITTLE switcher code and the kernel, along with a
minimal bootloader.


== Running ==

OK, so now we have:

 * A boot image containing the switcher and kernel: img.axf
 * A filesystem MMC card image: mmc.bin
 * A model binary

Sample payload images can be found in
http://people.linaro.org/~dmart/bl-images/model-images-20120228.tar.bz2


Run like this:

{{{
$ ./RTSM_VE_Cortex-A15x4-A7x4 -C motherboard.mmc.p_mmc_file=mmc.bin -a
coretile.c

Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Lee
>>> Any reason that this code is in the header?  Why not in cpuidle.c?
>>>
>>
>> Not a strong reason.  I thought making it an inline would introduce
>> slightly less new execution when adding this code (realizing that
>> there are function calls immediately after, so the only benefit is the
>> reduce popping and pushing).  But it does require an extra copy of
>> this code for any platform driver that does not enable
>> en_core_tk_irqen and instead makes calls to it directly (like omap3).
>> For this case, I don't think the inline implementation should add
>> extra code from what exists today as it should simply replace the
>> existing platform time keeping calls to a standard one defined by the
>> core cpuidle.
>>
> But you will have multiple copies of the inlined code if platforms do
> use it. Or is it used only by the core cpuidle code? In that case, gcc
> can automatically inline static functions.

Used by some platforms as well.

>
> It seems a bit long to inline and this isn't performance critical (at
> least for the enter side).

Ok.  Unless there are further comments supporting the inline method,
I'll switch to non-inline for next version.  Thanks Mike and Rob for
the feedback.

>
> Rob
>
>> I don't have a strong preference with using the inline so if you or
>> others can give your opinion on which method to use and why, I'd be
>> glad to read it.
>>
>>> Regards,
>>> Mike
>>
>> ___
>> linux-arm-kernel mailing list
>> linux-arm-ker...@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Herring
On 02/28/2012 09:45 AM, Rob Lee wrote:
> Hey Mike,
> 
> On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike  wrote:
>> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee  wrote:
>>> +/**
>>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter 
>>> function
>>> + * @dev: pointer to a valid cpuidle_device object
>>> + * @drv: pointer to a valid cpuidle_driver object
>>> + * @index: index of the target cpuidle state.
>>> + */
>>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>>> +   struct cpuidle_driver *drv, int index,
>>> +   int (*enter)(struct cpuidle_device *dev,
>>> +   struct cpuidle_driver *drv, int 
>>> index))
>>> +{
>>> +   ktime_t time_start, time_end;
>>> +   s64 diff;
>>> +
>>> +   time_start = ktime_get();
>>> +
>>> +   index = enter(dev, drv, index);
>>> +
>>> +   time_end = ktime_get();
>>> +
>>> +   local_irq_enable();
>>> +
>>> +   diff = ktime_to_us(ktime_sub(time_end, time_start));
>>> +   if (diff > INT_MAX)
>>> +   diff = INT_MAX;
>>> +
>>> +   dev->last_residency = (int) diff;
>>> +
>>> +   return index;
>>> +}
>>
>> Any reason that this code is in the header?  Why not in cpuidle.c?
>>
> 
> Not a strong reason.  I thought making it an inline would introduce
> slightly less new execution when adding this code (realizing that
> there are function calls immediately after, so the only benefit is the
> reduce popping and pushing).  But it does require an extra copy of
> this code for any platform driver that does not enable
> en_core_tk_irqen and instead makes calls to it directly (like omap3).
> For this case, I don't think the inline implementation should add
> extra code from what exists today as it should simply replace the
> existing platform time keeping calls to a standard one defined by the
> core cpuidle.
> 
But you will have multiple copies of the inlined code if platforms do
use it. Or is it used only by the core cpuidle code? In that case, gcc
can automatically inline static functions.

It seems a bit long to inline and this isn't performance critical (at
least for the enter side).

Rob

> I don't have a strong preference with using the inline so if you or
> others can give your opinion on which method to use and why, I'd be
> glad to read it.
> 
>> Regards,
>> Mike
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Lee
On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike  wrote:
> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee  wrote:
>> +/**
>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>> + * @dev: pointer to a valid cpuidle_device object
>> + * @drv: pointer to a valid cpuidle_driver object
>> + * @index: index of the target cpuidle state.
>> + */
>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>> +                               struct cpuidle_driver *drv, int index,
>> +                               int (*enter)(struct cpuidle_device *dev,
>> +                                       struct cpuidle_driver *drv, int 
>> index))
>> +{
>> +       ktime_t time_start, time_end;
>> +       s64 diff;
>> +
>> +       time_start = ktime_get();
>> +
>> +       index = enter(dev, drv, index);
>> +
>> +       time_end = ktime_get();
>> +
>> +       local_irq_enable();
>> +
>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>> +       if (diff > INT_MAX)
>> +               diff = INT_MAX;
>> +
>> +       dev->last_residency = (int) diff;
>> +
>> +       return index;
>> +}
>
> Hi Rob,
>
> In a previous series I brought up the idea of not accounting for time
> if a C-state transition fails.  My post on that thread can be found
> here:
> http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/
>
> How do you feel about adding something like the following?
>
> if (IS_ERR(index))
>        dev->last_residency = 0;
>        return index;
>
> Obviously it will up to the platforms to figure out how to propagate
> that error up from their respective low power code.

To be completely clear on what you're asking for, from
cpuidle_idle_call in drivers/cpuidle/cpuidle.c:

...
target_state = &drv->states[next_state];

trace_power_start(POWER_CSTATE, next_state, dev->cpu);
trace_cpu_idle(next_state, dev->cpu);

entered_state = target_state->enter(dev, drv, next_state);

trace_power_end(dev->cpu);
trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);

if (entered_state >= 0) {
/* Update cpuidle counters */
/* This can be moved to within driver enter routine
 * but that results in multiple copies of same code.
 */
dev->states_usage[entered_state].time +=
(unsigned long long)dev->last_residency;
dev->states_usage[entered_state].usage++;
}
...

Note the "if (entered_state >= 0)".  This ultimately prevents the
cpuidle device time accounting upon an negative value being returned.
So are you asking for the if(IS_ERR(index)) check to prevent the
unnecessary last_residency time calculation in the wrapper, or to make
sure a last_residency is zero upon failure?  (or both?)

It seems like a bug (or lack or documentation at best) in the code
that exists today to not zero out dev->last_residency upon a negative
return value as this value is used by the governors upon the next
idle.  So to ensure last_residency is 0 upon failure, I think it'd be
best to add that to an new else statement immediately following the
"if (entered_state >=))" so that any platform cpuidle driver that
returns a negative will have the last_residency zeroed out, not just
those that use en_core_tk_irqen.

>
> Regards,
> Mike

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation

2012-02-28 Thread Rob Lee
Hey Mike,

On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike  wrote:
> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee  wrote:
>> +/**
>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>> + * @dev: pointer to a valid cpuidle_device object
>> + * @drv: pointer to a valid cpuidle_driver object
>> + * @index: index of the target cpuidle state.
>> + */
>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>> +                               struct cpuidle_driver *drv, int index,
>> +                               int (*enter)(struct cpuidle_device *dev,
>> +                                       struct cpuidle_driver *drv, int 
>> index))
>> +{
>> +       ktime_t time_start, time_end;
>> +       s64 diff;
>> +
>> +       time_start = ktime_get();
>> +
>> +       index = enter(dev, drv, index);
>> +
>> +       time_end = ktime_get();
>> +
>> +       local_irq_enable();
>> +
>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>> +       if (diff > INT_MAX)
>> +               diff = INT_MAX;
>> +
>> +       dev->last_residency = (int) diff;
>> +
>> +       return index;
>> +}
>
> Any reason that this code is in the header?  Why not in cpuidle.c?
>

Not a strong reason.  I thought making it an inline would introduce
slightly less new execution when adding this code (realizing that
there are function calls immediately after, so the only benefit is the
reduce popping and pushing).  But it does require an extra copy of
this code for any platform driver that does not enable
en_core_tk_irqen and instead makes calls to it directly (like omap3).
For this case, I don't think the inline implementation should add
extra code from what exists today as it should simply replace the
existing platform time keeping calls to a standard one defined by the
core cpuidle.

I don't have a strong preference with using the inline so if you or
others can give your opinion on which method to use and why, I'd be
glad to read it.

> Regards,
> Mike

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR

2012-02-28 Thread Dan Carpenter
On Tue, Feb 28, 2012 at 03:59:25PM +0400, Dmitry Antipov wrote:
> On 02/28/2012 01:44 PM, Dan Carpenter wrote:
> >On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote:
> >>  - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested;
> >>  - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes
> >>are requested;
> >>  - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR.
> >>
> >
> >Why?
> 
> 1) it was requested by the subsystem (co?)maintainer, see 
> http://lkml.org/lkml/2012/1/27/475;
> 2) this looks to be a convenient way to trace/debug zero-size allocation 
> errors (although
>I don't advocate it as a best way).

Could you include that in the changelog when the final version is
ready?

regards,
dan carpenter


signature.asc
Description: Digital signature
___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: ARM A9 oprofile

2012-02-28 Thread Ming Lei
On Tue, Feb 28, 2012 at 8:13 PM, Dmitry Antipov
 wrote:
> On 02/28/2012 04:45 AM, Ming Lei wrote:
>
>> Please try the uImage on the link below:
>>
>>       http://kernel.ubuntu.com/~ming/up/uImage-3.3-rc5-perf
>>
>
> No good news for the oprofile:

OK, could you try the MLO and u-boot.bin under the link of
http://kernel.ubuntu.com/~ming/up to see if 'perf' may work well?

If still not, could you tell me what is the revision of your pandaboard? or
do you have any changes on the hardware?

I am sure that several guys have tried the current omap4 pmu patch
and make perf work well on pandaboard.

>
>
> ...
> irq 34: nobody cared (try booting with the "irqpoll" option)
> [stack]
>
> Disabling IRQ #34
> irq 33: nobody cared (try booting with the "irqpoll" option)
> [stack]
> Disabling IRQ #33
> ...
>
> Could you also try an attached module in a loop like:
>
> while true; do insmod timeoutbench.ko && rmmod timeoutbench; done
>
> with oprofile running?

'perf top' can be run well with the output below:

   PerfTop:1036 irqs/sec  kernel:99.2% us: 1.0% guest kernel: 0.0%
guest us: 0.0% exact:  0.0% [1000Hz cycles],  (all, 2 CPUs)


44.87%  [kernel][k] _raw_spin_unlock_irqrestore
22.48%  [kernel][k] _raw_spin_unlock_irq
 7.41%  [kernel][k] del_timer_sync
 6.24%  [kernel][k] lock_acquire
 4.95%  [kernel][k] lock_release
 2.05%  [kernel][k] omap4_enter_idle
 1.81%  [kernel][k] finish_task_switch
 1.06%  [kernel][k] rcu_note_context_switch
 0.60%  [kernel][k] schedule_timeout
 0.57%  [kernel][k] memchr_inv
 0.54%  [kernel][k] __schedule
 0.54%  [kernel][k] thumbee_notifier
 0.53%  [kernel][k] sub_preempt_count


thanks,
--
Ming Lei

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Stefano Stabellini
On Tue, 28 Feb 2012, Ian Campbell wrote:
> On Tue, 2012-02-28 at 10:20 +, Dave Martin wrote:
> > On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote:
> > > On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote:
> > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings 
> > > > > available
> > > > > which do allow direct mov of an immediate into R12, but are 32 bit 
> > > > > Thumb
> > > > > instructions.
> > > > > 
> > > > > Should we use r7 instead to maximise instruction density for Thumb 
> > > > > code?
> > > > 
> > > > The difference seems trivial when put into context, even if you code a
> > > > special Thumb version of the code to maximise density (the Thumb-2 code
> > > > which gets built from assembler in the kernel is very suboptimal in
> > > > size, but there simply isn't a high proportion of asm code in the kernel
> > > > anyway.)  I wouldn't consider the ARM/Thumb differences as an important
> > > > factor when deciding on a register.
> > > 
> > > OK, that's useful information. thanks.
> > > 
> > > > One argument for _not_ using r12 for this purpose is that it is then
> > > > harder to put a generic "HVC" function (analogous to the "syscall"
> > > > syscall) out-of-line, since r12 could get destroyed by the call.
> > > 
> > > For an out of line syscall(2) wouldn't the syscall number either be in a
> > > standard C calling convention argument register or on the stack when the
> > > function was called, since it is just a normal argument at that point?
> > > As you point out it cannot be passed in r12 (and could never be, due to
> > > the clobbering).
> > > 
> > > The syscall function itself would have to move the arguments and syscall
> > > nr etc around before issuing the syscall.
> > > 
> > > I think the same is true of a similar hypercall(2)
> > > 
> > > > If you don't think you will ever care about putting HVC out of line
> > > > though, it may not matter.
> > 
> > If you have both inline and out-of-line hypercalls, it's hard to ensure
> > that you never have to shuffle the registers in either case.
> 
> Agreed.
> 
> I think we want to optimise for the inline case since those are the
> majority.

They are not just the majority, all of them are static inline at the
moment, even on x86 (where the number of hypercalls is much higher).

So yes, we should optimize for the inline case.


> The only non-inline case is the special "privcmd ioctl" which is the
> mechanism that allows the Xen toolstack to make hypercalls. It's
> somewhat akin to syscall(2). By the time you get to it you will already
> have done a system call for the ioctl, pulled the arguments from the
> ioctl argument structure etc, plus such hypercalls are not really
> performance critical.

Even the privcmd hypercall (privcmd_call) is a static inline function,
it is just that at the moment there is only one caller :)


> > Shuffling can be reduced but only at the expense of strange argument
> > ordering in some cases when calling from C -- the complexity is probably
> > not worth it.  Linux doesn't bother for its own syscalls.
> > 
> > Note that even in assembler, a branch from one section to a label in
> > another section may cause r12 to get destroyed, so you will need to be
> > careful about how you code the hypervisor trap handler.  However, this
> > is not different from coding exception handlers in general, so I don't
> > know that it constitutes a conclusive argument on its own.
> 
> We are happy to arrange that this doesn't occur on our trap entry paths,
> at least until the guest register state has been saved. Currently the
> hypercall dispatcher is in C and gets r12 from the on-stack saved state.
> We will likely eventually optimise the hypercall path directly in ASM
> and in that case we are happy to take steps to ensure we don't clobber
> r12 before we need it.

Yes, I don't think this should be an issue.


> > My instinctive preference would therefore be for r7 (which also seems to
> > be good enough for Linux syscalls) -- but it really depends how many
> > arguments you expect to need to support.
> 
> Apparently r7 is the frame pointer for gcc in thumb mode which I think
> is a good reason to avoid it.
> 
> We currently have some 5 argument hypercalls and there have been
> occasional suggestions for interfaces which use 6 -- although none of
> them have come to reality.
 
I don't have a very strong opinion on which register we should use, but
I would like to avoid r7 if it is already actively used by gcc.

The fact that r12 can be destroyed so easily is actually a good argument
for using it because it means it is less likely to contain useful data
that needs to be saved/restored by gcc.

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Stefano Stabellini
On Tue, 28 Feb 2012, Dave Martin wrote:
> > Given that Stefano is proposing to make the ISS a (per-hypervisor)
> > constant we could consider just defining the Thumb and non-Thumb
> > constants instead of doing all the construction with the __HVC_IMM stuff
> > -- that would remove a big bit of the macroization.
> 
> It's not quite as simple as that -- emitting instructions using data
> directives is not endianness safe, and even in the cases where .long gives
> the right result for ARM, it gives the wrong result for 32-bit Thumb
> instructions if the opcode is given in human-readable order.
> 
> I was trying to solve the same problem for the kvm guys with some global
> macros -- I'm aiming to get a patch posted soon, so I'll make sure
> you're on CC.
 
That would be great, thanks!

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: ARM A9 oprofile

2012-02-28 Thread Dmitry Antipov

On 02/28/2012 04:45 AM, Ming Lei wrote:


Please try the uImage on the link below:

   http://kernel.ubuntu.com/~ming/up/uImage-3.3-rc5-perf



No good news for the oprofile:

...
irq 34: nobody cared (try booting with the "irqpoll" option)
[stack]
Disabling IRQ #34
irq 33: nobody cared (try booting with the "irqpoll" option)
[stack]
Disabling IRQ #33
...

Could you also try an attached module in a loop like:

while true; do insmod timeoutbench.ko && rmmod timeoutbench; done

with oprofile running?

Dmitry
#include 
#include 
#include 
#include 
#include 
#include 
#include 

MODULE_LICENSE("GPL");

static int nrthreads = 128;
module_param(nrthreads, int, 0644);

static int loopcount = 1024;
module_param(loopcount, int, 0644);

static int usehrtime = 0;
module_param(usehrtime, int, 0644);

static int slack = 5;
module_param(slack, int, 0644);

static int msecs = 1;
module_param(msecs, int, 0644);

static DECLARE_COMPLETION(done);
static struct task_struct **threads;
static atomic_t nrunning;

static int timeoutbench_test(void *unused)
{
int i;
ktime_t expires = ktime_set(0, msecs * NSEC_PER_MSEC);

atomic_inc(&nrunning);

for (i = 0; !kthread_should_stop() && i < loopcount; i++) {
if (usehrtime) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_hrtimeout_range(&expires, slack, 
HRTIMER_MODE_REL);
}
else

schedule_timeout_uninterruptible(msecs_to_jiffies(msecs));
}

if (atomic_dec_and_test(&nrunning))
complete(&done);
return 0;
}

static int __init timeoutbench_init(void)
{
int i;

atomic_set(&nrunning, 0);

threads = kmalloc(nrthreads * sizeof(struct task_struct *), GFP_KERNEL);
if (!threads)
return -ENOMEM;

for (i = 0; i < nrthreads; i++) {
threads[i] = kthread_create(timeoutbench_test, NULL,
"timeoutbench_test/%d", i);
if (IS_ERR(threads[i])) {
int j, err = PTR_ERR(threads[i]);

for (j = 0; j < i; j++)
kthread_stop(threads[j]);
kfree(threads);
return err;
}
get_task_struct(threads[i]);
wake_up_process(threads[i]);
}
return 0;
}

static void __exit timeoutbench_exit(void)
{
int i;

wait_for_completion(&done);
for (i = 0; i < nrthreads; i++) {
kthread_stop(threads[i]);
put_task_struct(threads[i]);
}
kfree(threads);
}

module_init(timeoutbench_init);
module_exit(timeoutbench_exit);
___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR

2012-02-28 Thread Dmitry Antipov

On 02/28/2012 01:44 PM, Dan Carpenter wrote:

On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote:

  - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested;
  - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes
are requested;
  - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR.



Why?


1) it was requested by the subsystem (co?)maintainer, see 
http://lkml.org/lkml/2012/1/27/475;
2) this looks to be a convenient way to trace/debug zero-size allocation errors 
(although
   I don't advocate it as a best way).

Dmitry

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Ian Campbell
On Mon, 2012-02-27 at 21:05 +, Peter Maydell wrote:
> On 27 February 2012 16:27, Ian Campbell  wrote:
> > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> > immediate (which can only target r0..r7).
> >
> > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > instructions.
> >
> > Should we use r7 instead to maximise instruction density for Thumb code?
> 
> r7 is (used by gcc as) the Thumb frame pointer; I don't know if this
> makes it worth avoiding in this context.

I think it does.

It actually sounds as if using r12 is fine here, the impact on code
density should be pretty small -- there aren't really all that many call
sites which involve hypercalls.

By way of an example I measured an x86 kernel which should be using more
hypercalls due to pv paging etc and found that 0.014% of the lines in
"objdump -d" contained a call to the hypercall_page. (I know not all
lines of objdump -d output are instructions but it's a reasonable approx
IMHO).

So I think using 3 16 bit instructions slots instead of 2 won't make
much impact in practice.

Thanks,
Ian.


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Ian Campbell
On Tue, 2012-02-28 at 10:20 +, Dave Martin wrote:
> On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote:
> > On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote:
> > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > > instructions.
> > > > 
> > > > Should we use r7 instead to maximise instruction density for Thumb code?
> > > 
> > > The difference seems trivial when put into context, even if you code a
> > > special Thumb version of the code to maximise density (the Thumb-2 code
> > > which gets built from assembler in the kernel is very suboptimal in
> > > size, but there simply isn't a high proportion of asm code in the kernel
> > > anyway.)  I wouldn't consider the ARM/Thumb differences as an important
> > > factor when deciding on a register.
> > 
> > OK, that's useful information. thanks.
> > 
> > > One argument for _not_ using r12 for this purpose is that it is then
> > > harder to put a generic "HVC" function (analogous to the "syscall"
> > > syscall) out-of-line, since r12 could get destroyed by the call.
> > 
> > For an out of line syscall(2) wouldn't the syscall number either be in a
> > standard C calling convention argument register or on the stack when the
> > function was called, since it is just a normal argument at that point?
> > As you point out it cannot be passed in r12 (and could never be, due to
> > the clobbering).
> > 
> > The syscall function itself would have to move the arguments and syscall
> > nr etc around before issuing the syscall.
> > 
> > I think the same is true of a similar hypercall(2)
> > 
> > > If you don't think you will ever care about putting HVC out of line
> > > though, it may not matter.
> 
> If you have both inline and out-of-line hypercalls, it's hard to ensure
> that you never have to shuffle the registers in either case.

Agreed.

I think we want to optimise for the inline case since those are the
majority.

The only non-inline case is the special "privcmd ioctl" which is the
mechanism that allows the Xen toolstack to make hypercalls. It's
somewhat akin to syscall(2). By the time you get to it you will already
have done a system call for the ioctl, pulled the arguments from the
ioctl argument structure etc, plus such hypercalls are not really
performance critical.

> Shuffling can be reduced but only at the expense of strange argument
> ordering in some cases when calling from C -- the complexity is probably
> not worth it.  Linux doesn't bother for its own syscalls.
> 
> Note that even in assembler, a branch from one section to a label in
> another section may cause r12 to get destroyed, so you will need to be
> careful about how you code the hypervisor trap handler.  However, this
> is not different from coding exception handlers in general, so I don't
> know that it constitutes a conclusive argument on its own.

We are happy to arrange that this doesn't occur on our trap entry paths,
at least until the guest register state has been saved. Currently the
hypercall dispatcher is in C and gets r12 from the on-stack saved state.
We will likely eventually optimise the hypercall path directly in ASM
and in that case we are happy to take steps to ensure we don't clobber
r12 before we need it.

> My instinctive preference would therefore be for r7 (which also seems to
> be good enough for Linux syscalls) -- but it really depends how many
> arguments you expect to need to support.

Apparently r7 is the frame pointer for gcc in thumb mode which I think
is a good reason to avoid it.

We currently have some 5 argument hypercalls and there have been
occasional suggestions for interfaces which use 6 -- although none of
them have come to reality.

Ian.


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Ian Campbell
On Tue, 2012-02-28 at 09:46 +, Dave Martin wrote:
> On Mon, Feb 27, 2012 at 07:48:45PM +, Ian Campbell wrote:
> > Given that Stefano is proposing to make the ISS a (per-hypervisor)
> > constant we could consider just defining the Thumb and non-Thumb
> > constants instead of doing all the construction with the __HVC_IMM stuff
> > -- that would remove a big bit of the macroization.
> 
> It's not quite as simple as that -- emitting instructions using data
> directives is not endianness safe, and even in the cases where .long gives
> the right result for ARM, it gives the wrong result for 32-bit Thumb
> instructions if the opcode is given in human-readable order.

Urk, yes,..

> I was trying to solve the same problem for the kvm guys with some global
> macros -- I'm aiming to get a patch posted soon, so I'll make sure
> you're on CC.

Awesome, thanks!

Ian.


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR

2012-02-28 Thread Dan Carpenter
On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote:
>  - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested;
>  - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes
>are requested;
>  - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR.
> 

Why?

Also patch 2/2 should go in before patch 1/2 or it breaks things.

regards,
dan carpenter



signature.asc
Description: Digital signature
___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Dave Martin
On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote:
> On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote:
> > On Mon, Feb 27, 2012 at 04:27:23PM +, Ian Campbell wrote:
> > > On Thu, 2012-02-23 at 17:48 +, Stefano Stabellini wrote:
> > > > We need a register to pass the hypercall number because we might not
> > > > know it at compile time and HVC only takes an immediate argument.
> > > > 
> > > > Among the available registers r12 seems to be the best choice because it
> > > > is defined as "intra-procedure call scratch register".
> > > 
> > > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov
> > > immediate (which can only target r0..r7).
> > 
> > This is untrue.  The important instructions, like MOV Rd, Rn can access
> > all the regs.  But anyway, there is no such thing as a Thumb-1 kernel,
> > so we won't really care.
> 
> I did say "mov immediate", which is the one which matters when loading a
> constant hypercall number (the common case). AFAIK the "mov Rd, #imm" T1
> encoding cannot access all registers.
> 
> The "mov rd,rn" form only helps for syscall(2) like functions, which are
> unusual, at least for Xen, although as Stefano says, they do exist.

Apologies -- looks like I misread you here.  I agree, but it's probably
a minor issue nonetheless.

> 
> > > Since we support only ARMv7+ there are "T2" and "T3" encodings available
> > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb
> > > instructions.
> > > 
> > > Should we use r7 instead to maximise instruction density for Thumb code?
> > 
> > The difference seems trivial when put into context, even if you code a
> > special Thumb version of the code to maximise density (the Thumb-2 code
> > which gets built from assembler in the kernel is very suboptimal in
> > size, but there simply isn't a high proportion of asm code in the kernel
> > anyway.)  I wouldn't consider the ARM/Thumb differences as an important
> > factor when deciding on a register.
> 
> OK, that's useful information. thanks.
> 
> > One argument for _not_ using r12 for this purpose is that it is then
> > harder to put a generic "HVC" function (analogous to the "syscall"
> > syscall) out-of-line, since r12 could get destroyed by the call.
> 
> For an out of line syscall(2) wouldn't the syscall number either be in a
> standard C calling convention argument register or on the stack when the
> function was called, since it is just a normal argument at that point?
> As you point out it cannot be passed in r12 (and could never be, due to
> the clobbering).
> 
> The syscall function itself would have to move the arguments and syscall
> nr etc around before issuing the syscall.
> 
> I think the same is true of a similar hypercall(2)
> 
> > If you don't think you will ever care about putting HVC out of line
> > though, it may not matter.

If you have both inline and out-of-line hypercalls, it's hard to ensure
that you never have to shuffle the registers in either case.

Shuffling can be reduced but only at the expense of strange argument
ordering in some cases when calling from C -- the complexity is probably
not worth it.  Linux doesn't bother for its own syscalls.

Note that even in assembler, a branch from one section to a label in
another section may cause r12 to get destroyed, so you will need to be
careful about how you code the hypervisor trap handler.  However, this
is not different from coding exception handlers in general, so I don't
know that it constitutes a conclusive argument on its own.

My instinctive preference would therefore be for r7 (which also seems to
be good enough for Linux syscalls) -- but it really depends how many
arguments you expect to need to support.

Cheers
---Dave

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH v3 0/2] Device tree support for TWL regulators

2012-02-28 Thread Mark Brown
On Tue, Feb 28, 2012 at 11:11:48AM +0530, Rajendra Nayak wrote:

> changes have no dependencies with any other DT series. I will repost
> all of Tero/Peter and my changes (to add DT support to the driver) as
> one single series and drop the dts file updates, which I guess can go
> via Tony/OMAP tree.

Yes, that sounds like a good plan - the DTS changes are largely
orthogonal to the code changes and don't need to go via the same path
(this is true in general, the DTSs are pretty horrible for merge
issues).


signature.asc
Description: Digital signature
___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor

2012-02-28 Thread Dave Martin
On Mon, Feb 27, 2012 at 07:48:45PM +, Ian Campbell wrote:
> On Mon, 2012-02-27 at 17:53 +, Dave Martin wrote:
> > On Thu, Feb 23, 2012 at 05:48:22PM +, Stefano Stabellini wrote:
> > > We need a register to pass the hypercall number because we might not
> > > know it at compile time and HVC only takes an immediate argument.
> > > 
> > > Among the available registers r12 seems to be the best choice because it
> > > is defined as "intra-procedure call scratch register".
> > 
> > This would be massively simplified if you didn't try to inline the HVC.
> > Does it really need to be inline?
> >
> > > +#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG)
> > 
> > Please, do not do this.  It won't work in Thumb, where the encodings are
> > different.
> > 
> > It is reasonable to expect anyone building Xen to have reasonably new
> > tools, you you can justifiably use
> > 
> > AFLAGS_thisfile.o := -Wa,-march=armv7-a+virt
> > 
> > in the Makefile and just use the hvc instruction directly.
> 
> Our aim is for guest kernel binaries not to be specific to Xen -- i.e.
> they should be able to run on baremetal and other hypervisors as well.
> The differences should only be in the device-tree passed to the kernel.
> 
> > Of course, this is only practical if the HVC invocation is not inlined.
> 
> I suppose we could make the stub functions out of line, we just copied
> what Xen does on x86.
> 
> The only thing which springs to mind is that 5 argument hypercalls will
> end up pushing the fifth argument to the stack only to pop it back into
> r4 for the hypercall and IIRC it also needs to preserve r4 (callee saved
> reg) which is going to involve some small amount of code to move stuff
> around too.
> 
> So by inlining the functions we avoid some thunking because the compiler
> would know exactly what was happening at the hypercall site.

True ...

> 
> We don't currently have any 6 argument hypercalls but the same would
> extend there.
> 
> > If we can't avoid macro-ising HVC, we should do it globally, not locally
> > to the Xen code.  That way we at least keep all the horror in one place.
> 
> That sounds like a good idea to me.
> 
> Given that Stefano is proposing to make the ISS a (per-hypervisor)
> constant we could consider just defining the Thumb and non-Thumb
> constants instead of doing all the construction with the __HVC_IMM stuff
> -- that would remove a big bit of the macroization.

It's not quite as simple as that -- emitting instructions using data
directives is not endianness safe, and even in the cases where .long gives
the right result for ARM, it gives the wrong result for 32-bit Thumb
instructions if the opcode is given in human-readable order.

I was trying to solve the same problem for the kvm guys with some global
macros -- I'm aiming to get a patch posted soon, so I'll make sure
you're on CC.

Cheers
---Dave

___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 3/4] regulator: twl-regulator: Add fixed LDO for V1V8, V2V1 supply

2012-02-28 Thread Rajendra Nayak
From: Peter Ujfalusi 

V1V8 supply most common use is to provide VIO for the system.
V2V1 supply is used on SDP4430/PandaBoards to provide 2.1V to
twl6040, and also as an input to VCXIO_IN, VDAC_IN of twl6030.

Also update the bindings documentation with the new compatible
property for these additional LDOs.

Signed-off-by: Peter Ujfalusi 
Signed-off-by: Rajendra Nayak 
Cc: Samuel Ortiz 
Cc: Misael Lopez Cruz 
Cc: Santosh Shilimkar 
---
 .../bindings/regulator/twl-regulator.txt   |2 ++
 drivers/regulator/twl-regulator.c  |4 
 2 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt 
b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
index ba9d2cc..0c3395d 100644
--- a/Documentation/devicetree/bindings/regulator/twl-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
@@ -13,6 +13,8 @@ For twl6030 regulators/LDOs
   - "ti,twl6030-vcxio" for VCXIO LDO
   - "ti,twl6030-vdac" for VDAC LDO
   - "ti,twl6030-vusb" for VUSB LDO
+  - "ti,twl6030-v1v8" for V1V8 LDO
+  - "ti,twl6030-v2v1" for V2V1 LDO
   - "ti,twl6030-clk32kg" for CLK32KG RESOURCE
   - "ti,twl6030-vdd1" for VDD1 SMPS
   - "ti,twl6030-vdd2" for VDD2 SMPS
diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 2a13211..9cdfc38 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -1094,6 +1094,8 @@ TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0);
 TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0);
 TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0);
 TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0);
+TWL6030_FIXED_LDO(V1V8, 0x16, 1800, 0);
+TWL6030_FIXED_LDO(V2V1, 0x1c, 2100, 0);
 TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0);
 TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34);
 TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10);
@@ -1173,6 +1175,8 @@ static const struct of_device_id twl_of_match[] 
__devinitconst = {
TWLFIXED_OF_MATCH("ti,twl6030-vcxio", VCXIO),
TWLFIXED_OF_MATCH("ti,twl6030-vdac", VDAC),
TWLFIXED_OF_MATCH("ti,twl6030-vusb", VUSB),
+   TWLFIXED_OF_MATCH("ti,twl6030-v1v8", V1V8),
+   TWLFIXED_OF_MATCH("ti,twl6030-v2v1", V2V1),
TWLRES_OF_MATCH("ti,twl6030-clk32kg", CLK32KG),
TWLSMPS_OF_MATCH("ti,twl6025-smps3", SMPS3),
TWLSMPS_OF_MATCH("ti,twl6025-smps4", SMPS4),
-- 
1.7.1


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 2/4] regulator: twl: adapt twl-regulator driver to dt

2012-02-28 Thread Rajendra Nayak
Modify the twl regulator driver to extract the regulator_init_data from
device tree when passed, instead of getting it through platform_data
structures (on non-DT builds)

Also add documentation for TWL regulator specific bindings.

Signed-off-by: Rajendra Nayak 
---
 .../bindings/regulator/twl-regulator.txt   |   66 +
 drivers/regulator/twl-regulator.c  |  259 +---
 2 files changed, 238 insertions(+), 87 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/regulator/twl-regulator.txt

diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt 
b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
new file mode 100644
index 000..ba9d2cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
@@ -0,0 +1,66 @@
+TWL family of regulators
+
+Required properties:
+For twl6030 regulators/LDOs
+- compatible:
+  - "ti,twl6030-vaux1" for VAUX1 LDO
+  - "ti,twl6030-vaux2" for VAUX2 LDO
+  - "ti,twl6030-vaux3" for VAUX3 LDO
+  - "ti,twl6030-vmmc" for VMMC LDO
+  - "ti,twl6030-vpp" for VPP LDO
+  - "ti,twl6030-vusim" for VUSIM LDO
+  - "ti,twl6030-vana" for VANA LDO
+  - "ti,twl6030-vcxio" for VCXIO LDO
+  - "ti,twl6030-vdac" for VDAC LDO
+  - "ti,twl6030-vusb" for VUSB LDO
+  - "ti,twl6030-clk32kg" for CLK32KG RESOURCE
+  - "ti,twl6030-vdd1" for VDD1 SMPS
+  - "ti,twl6030-vdd2" for VDD2 SMPS
+  - "ti,twl6030-vdd3" for VDD3 SMPS
+For twl6025 regulators/LDOs
+- compatible:
+  - "ti,twl6025-ldo1" for LDO1 LDO
+  - "ti,twl6025-ldo2" for LDO2 LDO
+  - "ti,twl6025-ldo3" for LDO3 LDO
+  - "ti,twl6025-ldo4" for LDO4 LDO
+  - "ti,twl6025-ldo5" for LDO5 LDO
+  - "ti,twl6025-ldo6" for LDO6 LDO
+  - "ti,twl6025-ldo7" for LDO7 LDO
+  - "ti,twl6025-ldoln" for LDOLN LDO
+  - "ti,twl6025-ldousb" for LDOUSB LDO
+  - "ti,twl6025-smps3" for SMPS3 SMPS
+  - "ti,twl6025-smps4" for SMPS4 SMPS
+  - "ti,twl6025-vio" for VIO SMPS
+For twl4030 regulators/LDOs
+- compatible:
+  - "ti,twl4030-vaux1" for VAUX1 LDO
+  - "ti,twl4030-vaux2" for VAUX2 LDO
+  - "ti,twl5030-vaux2" for VAUX2 LDO
+  - "ti,twl4030-vaux3" for VAUX3 LDO
+  - "ti,twl4030-vaux4" for VAUX4 LDO
+  - "ti,twl4030-vmmc1" for VMMC1 LDO
+  - "ti,twl4030-vmmc2" for VMMC2 LDO
+  - "ti,twl4030-vpll1" for VPLL1 LDO
+  - "ti,twl4030-vpll2" for VPLL2 LDO
+  - "ti,twl4030-vsim" for VSIM LDO
+  - "ti,twl4030-vdac" for VDAC LDO
+  - "ti,twl4030-vintana2" for VINTANA2 LDO
+  - "ti,twl4030-vio" for VIO LDO
+  - "ti,twl4030-vdd1" for VDD1 SMPS
+  - "ti,twl4030-vdd2" for VDD2 SMPS
+  - "ti,twl4030-vintana1" for VINTANA1 LDO
+  - "ti,twl4030-vintdig" for VINTDIG LDO
+  - "ti,twl4030-vusb1v5" for VUSB1V5 LDO
+  - "ti,twl4030-vusb1v8" for VUSB1V8 LDO
+  - "ti,twl4030-vusb3v1" for VUSB3V1 LDO
+
+Optional properties:
+- Any optional property defined in bindings/regulator/regulator.txt
+
+Example:
+
+   xyz: regulator@0 {
+   compatible = "ti,twl6030-vaux1";
+   regulator-min-microvolt  = <100>;
+   regulator-max-microvolt  = <300>;
+   };
diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 8611282..2a13211 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -14,8 +14,11 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
+#include 
 #include 
 
 
@@ -920,7 +923,8 @@ static struct regulator_ops twlsmps_ops = {
TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
0x0, TWL6030, twl6030fixed_ops)
 
-#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { 
\
+#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \
+static struct twlreg_info TWL4030_INFO_##label = { \
.base = offset, \
.id = num, \
.table_len = ARRAY_SIZE(label##_VSEL_table), \
@@ -938,7 +942,7 @@ static struct regulator_ops twlsmps_ops = {
}
 
 #define TWL4030_ADJUSTABLE_SMPS(label, offset, num, turnon_delay, remap_conf) \
-   { \
+static struct twlreg_info TWL4030_INFO_##label = { \
.base = offset, \
.id = num, \
.delay = turnon_delay, \
@@ -952,7 +956,8 @@ static struct regulator_ops twlsmps_ops = {
}, \
}
 
-#define TWL6030_ADJUSTABLE_SMPS(label) { \
+#define TWL6030_ADJUSTABLE_SMPS(label) \
+static struct twlreg_info TWL6030_INFO_##label = { \
.desc = { \
.name = #label, \
.id = TWL6030_REG_##label, \
@@ -962,7 +967,8 @@ static struct regulator_ops twlsmps_ops = {
}, \
}
 
-#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
+#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) \
+static struct twlreg_info TWL6030_INFO_##label = { \
.base = offset, \
.min_mV = min_mVolts, \
.max_mV = max_mVolts, \
@@ -976,7 +982,8 @@ static struct regulator_ops twlsmps_ops = {

[PATCH 1/4] regulator: twl6030: add support for vdd1, vdd2 and vdd3 regulators

2012-02-28 Thread Rajendra Nayak
From: Tero Kristo 

vdd1 and vdd2 are now common regulators for twl4030 and twl6030. Also
added vdd3 as a new regulator for twl6030. twl6030 vdd1...vdd3 smps
regulator voltages can only be controlled through the smartreflex
voltage channel, thus the support for the voltage_get and set is
minimal and requires external controller.

Signed-off-by: Tero Kristo 
Signed-off-by: Rajendra Nayak 
Cc: Mark Brown 
Cc: Liam Girdwood 
Cc: Samuel Ortiz 
Cc: Kevin Hilman 
---
 drivers/mfd/twl-core.c|   15 ++
 drivers/regulator/twl-regulator.c |   39 +
 include/linux/i2c/twl.h   |5 ++-
 3 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 6c13d9f..e1d3a64 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -951,6 +951,21 @@ add_children(struct twl4030_platform_data *pdata, unsigned 
long features)
/* twl6030 regulators */
if (twl_has_regulator() && twl_class_is_6030() &&
!(features & TWL6025_SUBCLASS)) {
+   child = add_regulator(TWL6030_REG_VDD1, pdata->vdd1,
+   features);
+   if (IS_ERR(child))
+   return PTR_ERR(child);
+
+   child = add_regulator(TWL6030_REG_VDD2, pdata->vdd2,
+   features);
+   if (IS_ERR(child))
+   return PTR_ERR(child);
+
+   child = add_regulator(TWL6030_REG_VDD3, pdata->vdd3,
+   features);
+   if (IS_ERR(child))
+   return PTR_ERR(child);
+
child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc,
features);
if (IS_ERR(child))
diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 7ff8bb2..8611282 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -561,6 +561,32 @@ static struct regulator_ops twl4030smps_ops = {
.get_voltage= twl4030smps_get_voltage,
 };
 
+static int twl6030coresmps_set_voltage(struct regulator_dev *rdev, int min_uV,
+   int max_uV, unsigned *selector)
+{
+   struct twlreg_info *info = rdev_get_drvdata(rdev);
+
+   if (info->set_voltage)
+   return info->set_voltage(info->data, min_uV);
+
+   return -ENODEV;
+}
+
+static int twl6030coresmps_get_voltage(struct regulator_dev *rdev)
+{
+   struct twlreg_info *info = rdev_get_drvdata(rdev);
+
+   if (info->get_voltage)
+   return info->get_voltage(info->data);
+
+   return -ENODEV;
+}
+
+static struct regulator_ops twl6030coresmps_ops = {
+   .set_voltage= twl6030coresmps_set_voltage,
+   .get_voltage= twl6030coresmps_get_voltage,
+};
+
 static int twl6030ldo_list_voltage(struct regulator_dev *rdev, unsigned index)
 {
struct twlreg_info  *info = rdev_get_drvdata(rdev);
@@ -926,6 +952,16 @@ static struct regulator_ops twlsmps_ops = {
}, \
}
 
+#define TWL6030_ADJUSTABLE_SMPS(label) { \
+   .desc = { \
+   .name = #label, \
+   .id = TWL6030_REG_##label, \
+   .ops = &twl6030coresmps_ops, \
+   .type = REGULATOR_VOLTAGE, \
+   .owner = THIS_MODULE, \
+   }, \
+   }
+
 #define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
.base = offset, \
.min_mV = min_mVolts, \
@@ -1027,6 +1063,9 @@ static struct twlreg_info twl_regs[] = {
/* 6030 REG with base as PMC Slave Misc : 0x0030 */
/* Turnon-delay and remap configuration values for 6030 are not
   verified since the specification is not public */
+   TWL6030_ADJUSTABLE_SMPS(VDD1),
+   TWL6030_ADJUSTABLE_SMPS(VDD2),
+   TWL6030_ADJUSTABLE_SMPS(VDD3),
TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300),
TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300),
TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300),
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 08a82d3..f66c031 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -712,6 +712,9 @@ struct twl4030_platform_data {
struct regulator_init_data  *vaux1;
struct regulator_init_data  *vaux2;
struct regulator_init_data  *vaux3;
+   struct regulator_init_data  *vdd1;
+   struct regulator_init_data  *vdd2;
+   struct regulator_init_data  *vdd3;
/* TWL4030 LDO regulators */
struct regulator_init_data  *vpll1;
struct regulator_init_data  *vpll2;
@@ -720,8 +723,6 @@ struct twl4030_platform_data {
struct regulator_init_data  *vsim;
struct regulator_init_data  *vaux4;

[PATCH 4/4] MFD: twl-core: regulator configuration for twl6030 V1V8, V2V1 SMPS

2012-02-28 Thread Rajendra Nayak
From: Peter Ujfalusi 

To be able to attach consumers to these supplies from board
files we need to have regulator_init_data for them.

Signed-off-by: Peter Ujfalusi 
Signed-off-by: Rajendra Nayak 
Cc: Samuel Ortiz 
Cc: Misael Lopez Cruz 
Cc: Santosh Shilimkar 
---
 drivers/mfd/twl-core.c  |   10 ++
 include/linux/i2c/twl.h |2 ++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index e1d3a64..6cb1061 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -966,6 +966,16 @@ add_children(struct twl4030_platform_data *pdata, unsigned 
long features)
if (IS_ERR(child))
return PTR_ERR(child);
 
+   child = add_regulator(TWL6030_REG_V1V8, pdata->v1v8,
+   features);
+   if (IS_ERR(child))
+   return PTR_ERR(child);
+
+   child = add_regulator(TWL6030_REG_V2V1, pdata->v2v1,
+   features);
+   if (IS_ERR(child))
+   return PTR_ERR(child);
+
child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc,
features);
if (IS_ERR(child))
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index f66c031..7fcab23 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -734,6 +734,8 @@ struct twl4030_platform_data {
struct regulator_init_data  *vcxio;
struct regulator_init_data  *vusb;
struct regulator_init_data  *clk32kg;
+   struct regulator_init_data  *v1v8;
+   struct regulator_init_data  *v2v1;
/* TWL6025 LDO regulators */
struct regulator_init_data  *ldo1;
struct regulator_init_data  *ldo2;
-- 
1.7.1


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 0/4] twl-regulator DT adaptation and updates to add new regulators

2012-02-28 Thread Rajendra Nayak
Hi Mark,

Here is a consolidated series which adds DT support for twl regulator
driver and adds support for VDD1/2/3 regulator and support for
fixed LDO V1V8 and V2V1. The patches are based on -next and tested
on omap3 beagle and omap4 panda boards.

I have dropped the patch updating the dts entries for twl4030 and
twl6030 because of other dependencies and will submit them via
the OMAP tree. The driver DT changes however do not have any dependency
on the dts file updates (except that regulator support will not work with DT,
but will not break DT build or boot) and no functionality would change
in the non-DT case.

regards,
Rajendra

Peter Ujfalusi (2):
  regulator: twl-regulator: Add fixed LDO for V1V8, V2V1 supply
  MFD: twl-core: regulator configuration for twl6030 V1V8, V2V1 SMPS

Rajendra Nayak (1):
  regulator: twl: adapt twl-regulator driver to dt

Tero Kristo (1):
  regulator: twl6030: add support for vdd1, vdd2 and vdd3 regulators

 .../bindings/regulator/twl-regulator.txt   |   68 +
 drivers/mfd/twl-core.c |   25 ++
 drivers/regulator/twl-regulator.c  |  294 ++--
 include/linux/i2c/twl.h|7 +-
 4 files changed, 309 insertions(+), 85 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/regulator/twl-regulator.txt


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 2/2] module: use ZERO_OR_NULL_PTR allocation pointer checking

2012-02-28 Thread Dmitry Antipov
Use ZERO_OR_NULL_PTR allocation pointer checking where allocation
function may return ZERO_SIZE_PTR.
---
 kernel/module.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 2c93276..ae438db 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2322,14 +2322,14 @@ static void dynamic_debug_remove(struct _ddebug *debug)
 
 void * __weak module_alloc(unsigned long size)
 {
-   return size == 0 ? NULL : vmalloc_exec(size);
+   return vmalloc_exec(size);
 }
 
 static void *module_alloc_update_bounds(unsigned long size)
 {
void *ret = module_alloc(size);
 
-   if (ret) {
+   if (likely(!ZERO_OR_NULL_PTR(ret))) {
mutex_lock(&module_mutex);
/* Update module bounds. */
if ((unsigned long)ret < module_addr_min)
@@ -2638,7 +2638,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
 * leak.
 */
kmemleak_not_leak(ptr);
-   if (!ptr)
+   if (unlikely(ZERO_OR_NULL_PTR(ptr)))
return -ENOMEM;
 
memset(ptr, 0, mod->core_size);
@@ -2652,7 +2652,7 @@ static int move_module(struct module *mod, struct 
load_info *info)
 * after the module is initialized.
 */
kmemleak_ignore(ptr);
-   if (!ptr && mod->init_size) {
+   if (unlikely(ZERO_OR_NULL_PTR(ptr))) {
module_free(mod, mod->module_core);
return -ENOMEM;
}
-- 
1.7.7.6


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR

2012-02-28 Thread Dmitry Antipov
 - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested;
 - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes
   are requested;
 - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR.

Signed-off-by: Dmitry Antipov 
---
 mm/vmalloc.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86ce9a5..040a9cd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1456,7 +1456,7 @@ static void __vunmap(const void *addr, int 
deallocate_pages)
 {
struct vm_struct *area;
 
-   if (!addr)
+   if (unlikely(ZERO_OR_NULL_PTR(addr)))
return;
 
if ((PAGE_SIZE-1) & (unsigned long)addr) {
@@ -1548,7 +1548,9 @@ void *vmap(struct page **pages, unsigned int count,
 
might_sleep();
 
-   if (count > totalram_pages)
+   if (unlikely(!count))
+   return ZERO_SIZE_PTR;
+   if (unlikely(count > totalram_pages))
return NULL;
 
area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1648,8 +1650,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned 
long align,
void *addr;
unsigned long real_size = size;
 
+   if (unlikely(!size))
+   return ZERO_SIZE_PTR;
size = PAGE_ALIGN(size);
-   if (!size || (size >> PAGE_SHIFT) > totalram_pages)
+   if (unlikely((size >> PAGE_SHIFT) > totalram_pages))
goto fail;
 
area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
-- 
1.7.7.6


___
linaro-dev mailing list
linaro-dev@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-dev


[RFC PATCH] module: debugging check for runaway kthreads

2012-02-28 Thread Dmitry Antipov
Debugging option CONFIG_MODULE_KTHREAD_CHECK provides a way to check
whether all kernel threads created by the module and have used module
code as a thread worker function are really exited when the module is
unloaded. The following pseudo-code contains example of an error which
is likely to be catched with this debugging check:

static struct task_struct *tsk;
static DECLARE_COMPLETION(done);

static void *func(void *unused)
{
while (!kthread_should_stop())
  real_work();
complete(&done);
}

static int __init modinit(void)
{
tsk = kthread_run(func, NULL, "func");
return IS_ERR(tsk) ? PTR_ERR(tsk) : 0;
}

static void __exit modexit(void)
{
wait_for_completion(&done);
}

Signed-off-by: Dmitry Antipov 
---
 include/linux/kthread.h |5 +
 init/Kconfig|9 +
 kernel/kthread.c|   24 
 kernel/module.c |   45 +
 4 files changed, 83 insertions(+), 0 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 0714b24..33897c3 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -13,6 +13,11 @@ struct task_struct *kthread_create_on_node(int 
(*threadfn)(void *data),
 #define kthread_create(threadfn, data, namefmt, arg...) \
kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
 
+#ifdef CONFIG_MODULE_KTHREAD_CHECK
+unsigned long get_kthread_func(struct task_struct *tsk);
+#else
+#define get_kthread_func(tsk, addr, mod) (0)
+#endif
 
 /**
  * kthread_run - create and wake a thread.
diff --git a/init/Kconfig b/init/Kconfig
index 3f42cd6..fa7c6e0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1397,6 +1397,15 @@ config MODULE_FORCE_UNLOAD
  rmmod).  This is mainly for kernel developers and desperate users.
  If unsure, say N.
 
+config MODULE_KTHREAD_CHECK
+   bool "Check for runaway kernel threads at module unload"
+   depends on MODULE_UNLOAD && EXPERIMENTAL && DEBUG_KERNEL
+   help
+ This option allows you to check whether all kernel threads created
+ by the module and have used module code as a thread worker function
+ are really exited when the module is unloaded. This is mainly for
+ module developers. If insure, say N.
+
 config MODVERSIONS
bool "Module versioning support"
help
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3d3de63..5c53817 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -38,6 +38,9 @@ struct kthread_create_info
 
 struct kthread {
int should_stop;
+#ifdef CONFIG_MODULE_KTHREAD_CHECK
+   void *fn;
+#endif
void *data;
struct completion exited;
 };
@@ -45,6 +48,24 @@ struct kthread {
 #define to_kthread(tsk)\
container_of((tsk)->vfork_done, struct kthread, exited)
 
+#ifdef CONFIG_MODULE_KTHREAD_CHECK
+
+unsigned long get_kthread_func(struct task_struct *tsk)
+{
+   struct kthread *kt;
+   unsigned long addr;
+
+   get_task_struct(tsk);
+   BUG_ON(!(tsk->flags & PF_KTHREAD));
+   kt = to_kthread(tsk);
+   barrier();
+   addr = tsk->vfork_done ? (unsigned long)kt->fn : 0UL;
+   put_task_struct(tsk);
+   return addr;
+}
+
+#endif /* CONFIG_MODULE_KTHREAD_CHECK */
+
 /**
  * kthread_should_stop - should this kthread return now?
  *
@@ -106,6 +127,9 @@ static int kthread(void *_create)
int ret;
 
self.should_stop = 0;
+#ifdef CONFIG_MODULE_KTHREAD_CHECK
+   self.fn = threadfn;
+#endif
self.data = data;
init_completion(&self.exited);
current->vfork_done = &self.exited;
diff --git a/kernel/module.c b/kernel/module.c
index 2c93276..fe6637b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -764,6 +765,49 @@ static void wait_for_zero_refcount(struct module *mod)
mutex_lock(&module_mutex);
 }
 
+#ifdef CONFIG_KALLSYMS
+static const char *get_ksymbol(struct module *mod, unsigned long addr,
+  unsigned long *size, unsigned long *offset);
+#else
+#define get_ksymbol(mod, addr, size, offset) NULL
+#endif
+
+#ifdef CONFIG_MODULE_KTHREAD_CHECK
+
+static void check_kthreads(struct module *mod)
+{
+   unsigned long flags;
+   struct task_struct *g, *p;
+
+   read_lock_irqsave(&tasklist_lock, flags);
+   do_each_thread(g, p) {
+   const char *name;
+   unsigned long addr, offset, size;
+
+   /* Note kthreadd is special. Other kthreads should
+  have their 'struct kthread' on the stack until
+  do_exit() calls schedule() for the last time. */
+   if (p->mm || p == kthreadd_task)
+   continue;
+
+   addr = get_kthread_func(p);
+   if (__module_text_address(addr) == mod) {
+   name = get_ksymbol(mod, ad

[RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks

2012-02-28 Thread Dmitry Antipov
This patch proposes a system-wide sysctl-aware default for the
high-resolution timer slack value, which may be changed from 0
to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task
values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited
across fork(); instead, newborn task uses system-wide value by
default, and newborn thread uses it's group leader value.

Signed-off-by: Dmitry Antipov 
---
 Documentation/sysctl/kernel.txt |8 
 include/linux/hrtimer.h |   11 +++
 include/linux/init_task.h   |2 +-
 include/linux/sched.h   |   11 ---
 kernel/fork.c   |9 +++--
 kernel/futex.c  |4 ++--
 kernel/hrtimer.c|   10 +++---
 kernel/sys.c|8 +---
 kernel/sysctl.c |   10 ++
 9 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d78841..83b63ed 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -606,6 +606,14 @@ can be ORed together:
 
 ==
 
+timer_slack:
+
+This value can be used to query and set the default slack for
+high-resolution timers, in nanoseconds. The default value is 50
+microseconds, and can be changed from 0 nanoseconds to 1 millisecond.
+
+==
+
 unknown_nmi_panic:
 
 The value in this file affects behavior of handling NMI. When the
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..b9da137 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -24,6 +24,16 @@
 #include 
 #include 
 
+/*
+ * Default system-wide and per-task hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_DEFAULT_SLACK 5
+
+/*
+ * Reasonable limit for hrtimer slack, in nanoseconds.
+ */
+#define HRTIMER_MAX_SLACK 100
+
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
 
@@ -323,6 +333,7 @@ extern ktime_t ktime_get_monotonic_offset(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+extern int default_timer_slack_ns;
 
 /* Exported timer functions: */
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9c66b1a..b29be0d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -178,7 +178,7 @@ extern struct cred init_cred;
.journal_info   = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers),  \
.pi_lock= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),\
-   .timer_slack_ns = 5, /* 50 usec default slack */\
+   .timer_slack_ns = HRTIMER_DEFAULT_SLACK,\
.pids = {   \
[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),\
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),   \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d379a6..aa0a806 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,11 +1551,11 @@ struct task_struct {
struct latency_record latency_record[LT_SAVECOUNT];
 #endif
/*
-* time slack values; these are used to round up poll() and
-* select() etc timeout values. These are in nanoseconds.
+* High-resolution timer slack value, in nanoseconds.
+* Used to round up poll()/select(), nanosleep, futex
+* waiting, etc. timeout values of non-realtime tasks.
 */
unsigned long timer_slack_ns;
-   unsigned long default_timer_slack_ns;
 
struct list_head*scm_work_list;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -2628,6 +2628,11 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+static inline unsigned long task_timer_slack(struct task_struct *tsk)
+{
+   return rt_task(tsk) ? 0 : tsk->timer_slack_ns;
+}
+
 /*
  * Thread group CPU time accounting.
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index e2cd3e2..0f9a983 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1167,8 +1167,13 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 #if defined(SPLIT_RSS_COUNTING)
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
-
-   p->default_timer_slack_ns = current->timer_slack_ns;
+   /* 
+* New thread inherits the slack from the group
+* leader. New process uses system-default slack.
+*/
+   p->timer_slack_ns = (clone_flags & CLONE_THREAD) ?
+   current->group_leader->timer_slack_ns :
+   default_timer_slack_ns;
 
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 1614be2..a0d302d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1887,7 +1887,7 @@ static int futex_wait(u32 __user *