[PATCH 1/2] ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp
If CONFIG_SMP, cpufreq skips loops_per_jiffy update, because different arch has different per-cpu loops_per_jiffy definition. Signed-off-by: Richard Zhao Acked-by: Russell King --- arch/arm/kernel/smp.c | 54 + 1 files changed, 54 insertions(+), 0 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cdeb727..4381bef 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -599,3 +600,56 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, l_p_j_ref); +static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); +static unsigned long global_l_p_j_ref; +static unsigned long global_l_p_j_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; + + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + if (!per_cpu(l_p_j_ref, cpu)) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!global_l_p_j_ref) { + global_l_p_j_ref = loops_per_jiffy; + global_l_p_j_ref_freq = freq->old; + } + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, + global_l_p_j_ref_freq, + freq->new); + per_cpu(cpu_data, cpu).loops_per_jiffy = + cpufreq_scale(per_cpu(l_p_j_ref, cpu), + per_cpu(l_p_j_ref_freq, cpu), + freq->new); + } + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif -- 1.7.5.4 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 0/2] change lpj in arm smp common code
The two patches were originally in [PATCH V6 0/7] add a generic cpufreq driver. I seperated them and hope they can go to upstream earlier. Richard Zhao (2): ARM: add cpufreq transiton notifier to adjust loops_per_jiffy for smp cpufreq: OMAP: remove loops_per_jiffy recalculate for smp arch/arm/kernel/smp.c | 54 drivers/cpufreq/omap-cpufreq.c | 36 -- 2 files changed, 54 insertions(+), 36 deletions(-) -- 1.7.5.4 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 2/2] cpufreq: OMAP: remove loops_per_jiffy recalculate for smp
arm registered cpufreq transition notifier to recalculate it. Signed-off-by: Richard Zhao --- drivers/cpufreq/omap-cpufreq.c | 36 1 files changed, 0 insertions(+), 36 deletions(-) diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 5d04c57..17da4c4 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -37,16 +37,6 @@ #include -#ifdef CONFIG_SMP -struct lpj_info { - unsigned long ref; - unsigned intfreq; -}; - -static DEFINE_PER_CPU(struct lpj_info, lpj_ref); -static struct lpj_info global_lpj_ref; -#endif - static struct cpufreq_frequency_table *freq_table; static atomic_t freq_table_users = ATOMIC_INIT(0); static struct clk *mpu_clk; @@ -118,32 +108,6 @@ static int omap_target(struct cpufreq_policy *policy, ret = clk_set_rate(mpu_clk, freqs.new * 1000); freqs.new = omap_getspeed(policy->cpu); -#ifdef CONFIG_SMP - /* -* Note that loops_per_jiffy is not updated on SMP systems in -* cpufreq driver. So, update the per-CPU loops_per_jiffy value -* on frequency transition. We need to update all dependent CPUs. -*/ - for_each_cpu(i, policy->cpus) { - struct lpj_info *lpj = &per_cpu(lpj_ref, i); - if (!lpj->freq) { - lpj->ref = per_cpu(cpu_data, i).loops_per_jiffy; - lpj->freq = freqs.old; - } - - per_cpu(cpu_data, i).loops_per_jiffy = - cpufreq_scale(lpj->ref, lpj->freq, freqs.new); - } - - /* And don't forget to adjust the global one */ - if (!global_lpj_ref.freq) { - global_lpj_ref.ref = loops_per_jiffy; - global_lpj_ref.freq = freqs.old; - } - loops_per_jiffy = cpufreq_scale(global_lpj_ref.ref, global_lpj_ref.freq, - freqs.new); -#endif - /* notifiers */ for_each_cpu(i, policy->cpus) { freqs.cpu = i; -- 1.7.5.4 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: 3, 2, 1....BLASTOFF
Zach, On Wed, Feb 29, 2012 at 7:54 AM, Zach Pfeffer wrote: > The preliminary 12.03 Android plan is up. > > https://launchpad.net/linaro-android/+milestone/12.03 > > ARM A15, A7 and A15/A7 Fast Models running Android, big.LITTLE > testing, dual SD cards, unit tests, more enablement, improved Android > SMP, and more!!! Its a good news that SMP to HMP work has been taken up with Android here. I am curious to know the project details since i was trying the same in fastmodel. Which RTSM_EB are you using for this bring up? FastModel comes with only RTSM_VE of A15,A7 pack which has only cores simulated, not even uart in it. Regards, Senthil ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR
On 02/28/2012 05:30 PM, Dan Carpenter wrote: Could you include that in the changelog when the final version is ready? What changelog you're saying about? Dmitry ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: ARM A9 oprofile
On 02/28/2012 05:27 PM, Ming Lei wrote: OK, could you try the MLO and u-boot.bin under the link of http://kernel.ubuntu.com/~ming/up to see if 'perf' may work well? Is it really possible that the bootloader stuff affects perf/oprofile? If still not, could you tell me what is the revision of your pandaboard? The kernel and u-boot says that CPU is OMAP4430 ES2.2, the board box has the label with PANDABOARD UEVM4430G-01-00-00, and /proc/cpuinfo is shown below. Processor : ARMv7 Processor rev 2 (v7l) processor : 0 BogoMIPS: 597.81 processor : 1 BogoMIPS: 597.81 Features: swp half thumb fastmult vfp edsp thumbee neon vfpv3 tls CPU implementer : 0x41 CPU architecture: 7 CPU variant : 0x1 CPU part: 0xc09 CPU revision: 2 Hardware: OMAP4 Panda board Revision: 0020 Serial : or do you have any changes on the hardware? No. I am sure that several guys have tried the current omap4 pmu patch and make perf work well on pandaboard. Perf (in particular, "perf top") works for me too. Also I tried "perf record -a -F 1000 sleep 200" while running the kernel module workload, and have never seen "nobody cared" IRQ issues. You have said that oprofile uses perf subsystem as a backend, so this looks even more strange. Dmitry ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH 2/3] ARM: EXYNOS: Add clkdev lookup entry for lcd clock
Hi Kukjin, On 12/01/2011 11:20 AM, Tushar Behera wrote: > The framebuffer driver needs the clock named 'lcd' as its bus > clock but the equivalent clock on Exynos4 is named as 'fimd'. > Hence, create a clkdev lookup entry with the name 'lcd' that > references the 'fimd' clock. > > Signed-off-by: Tushar Behera > --- > arch/arm/mach-exynos/clock.c | 14 +- > 1 files changed, 9 insertions(+), 5 deletions(-) > > diff --git a/arch/arm/mach-exynos/clock.c b/arch/arm/mach-exynos/clock.c > index 5d8d483..607ec28 100644 > --- a/arch/arm/mach-exynos/clock.c > +++ b/arch/arm/mach-exynos/clock.c > @@ -489,11 +489,6 @@ static struct clk init_clocks_off[] = { > .enable = exynos4_clk_ip_cam_ctrl, > .ctrlbit= (1 << 3), > }, { > - .name = "fimd", > - .devname= "exynos4-fb.0", > - .enable = exynos4_clk_ip_lcd0_ctrl, > - .ctrlbit= (1 << 0), > - }, { > .name = "hsmmc", > .devname= "s3c-sdhci.0", > .parent = &clk_aclk_133.clk, > @@ -782,6 +777,13 @@ static struct clk clk_pdma1 = { > .ctrlbit= (1 << 1), > }; > > +static struct clk clk_fimd0 = { > + .name = "fimd", > + .devname= "exynos4-fb.0", > + .enable = exynos4_clk_ip_lcd0_ctrl, > + .ctrlbit= (1 << 0), > +}; > + > struct clk *clkset_group_list[] = { > [0] = &clk_ext_xtal_mux, > [1] = &clk_xusbxti, > @@ -1294,6 +1296,7 @@ static struct clksrc_clk *sysclks[] = { > static struct clk *clk_cdev[] = { > &clk_pdma0, > &clk_pdma1, > + &clk_fimd0, > }; > > static struct clksrc_clk *clksrc_cdev[] = { > @@ -1318,6 +1321,7 @@ static struct clk_lookup exynos4_clk_lookup[] = { > CLKDEV_INIT("s3c-sdhci.3", "mmc_busclk.2", &clk_sclk_mmc3.clk), > CLKDEV_INIT("dma-pl330.0", "apb_pclk", &clk_pdma0), > CLKDEV_INIT("dma-pl330.1", "apb_pclk", &clk_pdma1), > + CLKDEV_INIT("exynos4-fb.0", "lcd", &clk_fimd0), > }; > > static int xtal_rate; Would you please review this patch and let me know your opinion? Without this patch, frame-buffer support on EXYNOS4 is broken. -- Tushar Behera ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Benchmark summary for Linaro GCC
The topic of benchmarking keeps coming up. We're working on making the next FSF release better, but it's a good idea to track how the current Linaro GCC stacks up against other releases. The summary is at: https://wiki.linaro.org/Internal/ToolChain/Now Included is how our current 4.6 release does against FSF 4.6, the change over six months, and how the upcoming 4.7 release fairs. There's also a comparison against other compilers including the Google 4.6 and Android 4.4 branches. A PDF version is attached to the page. The SPEC 2000 results are still coming in so I'll update the page once they arrive. Everything is generated so we'll update this with each monthly release. -- Michael ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
3, 2, 1....BLASTOFF
The preliminary 12.03 Android plan is up. https://launchpad.net/linaro-android/+milestone/12.03 ARM A15, A7 and A15/A7 Fast Models running Android, big.LITTLE testing, dual SD cards, unit tests, more enablement, improved Android SMP, and more!!! -- Zach Pfeffer Android Platform Team Lead, Linaro Platform Teams Linaro.org | Open source software for ARM SoCs Follow Linaro: http://www.facebook.com/pages/Linaro http://twitter.com/#!/linaroorg - http://www.linaro.org/linaro-blog ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
>> Sounds reasonable. In some cases it may be helpful to track state >> demotion as well. Since I'm still a noob and wearing my submission >> training wheels, I'm trying to minimize things that fall outside of >> this basic consolidation effort for this patch series. But I added >> Jon's suggestion to this cpuidle page which contains future cpuidle >> items to consider adding: >> https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts > > Yeah, I don't want to feature-bloat your submission more than > necessary. I'm happy for the usage counter stuff to get tackled at a > later date, but you're still on board for setting last_residency to > zero in this series, right? Yes. > > Regards, > Mike > ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
On Tue, Feb 28, 2012 at 3:33 PM, Rob Lee wrote: >> >> I brought this topic up internally and Jon suggested that the 'usage' >> statistics that are reported in sysfs should also reflect failed >> versus successful C-state transitions, which is a great idea. This >> could simply be achieved by renaming the current 'usage' count to >> something like 'transitions_attempted' and then conditionally >> increment a new counter within the 'if (entered_state >= 0)' block, >> perhaps named, 'transition_succeeded'. >> >> This way the old 'usage' count paradigm is as accurate as the new >> time-keeping code. Being able to easily observe which C-state tend to >> fail the most would be invaluable in tuning idle policy for maximum >> effectiveness. >> >> Thoughts? > > Sounds reasonable. In some cases it may be helpful to track state > demotion as well. Since I'm still a noob and wearing my submission > training wheels, I'm trying to minimize things that fall outside of > this basic consolidation effort for this patch series. But I added > Jon's suggestion to this cpuidle page which contains future cpuidle > items to consider adding: > https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts Yeah, I don't want to feature-bloat your submission more than necessary. I'm happy for the usage counter stuff to get tackled at a later date, but you're still on board for setting last_residency to zero in this series, right? Regards, Mike > >> >> Regards, >> Mike >> >>> Regards, Mike ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
> > I brought this topic up internally and Jon suggested that the 'usage' > statistics that are reported in sysfs should also reflect failed > versus successful C-state transitions, which is a great idea. This > could simply be achieved by renaming the current 'usage' count to > something like 'transitions_attempted' and then conditionally > increment a new counter within the 'if (entered_state >= 0)' block, > perhaps named, 'transition_succeeded'. > > This way the old 'usage' count paradigm is as accurate as the new > time-keeping code. Being able to easily observe which C-state tend to > fail the most would be invaluable in tuning idle policy for maximum > effectiveness. > > Thoughts? Sounds reasonable. In some cases it may be helpful to track state demotion as well. Since I'm still a noob and wearing my submission training wheels, I'm trying to minimize things that fall outside of this basic consolidation effort for this patch series. But I added Jon's suggestion to this cpuidle page which contains future cpuidle items to consider adding: https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts > > Regards, > Mike > >> >>> >>> Regards, >>> Mike ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
On Tue, Feb 28, 2012 at 7:50 AM, Rob Lee wrote: > On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike wrote: >> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee wrote: >>> +/** >>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter >>> function >>> + * @dev: pointer to a valid cpuidle_device object >>> + * @drv: pointer to a valid cpuidle_driver object >>> + * @index: index of the target cpuidle state. >>> + */ >>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, >>> + struct cpuidle_driver *drv, int index, >>> + int (*enter)(struct cpuidle_device *dev, >>> + struct cpuidle_driver *drv, int >>> index)) >>> +{ >>> + ktime_t time_start, time_end; >>> + s64 diff; >>> + >>> + time_start = ktime_get(); >>> + >>> + index = enter(dev, drv, index); >>> + >>> + time_end = ktime_get(); >>> + >>> + local_irq_enable(); >>> + >>> + diff = ktime_to_us(ktime_sub(time_end, time_start)); >>> + if (diff > INT_MAX) >>> + diff = INT_MAX; >>> + >>> + dev->last_residency = (int) diff; >>> + >>> + return index; >>> +} >> >> Hi Rob, >> >> In a previous series I brought up the idea of not accounting for time >> if a C-state transition fails. My post on that thread can be found >> here: >> http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/ >> >> How do you feel about adding something like the following? >> >> if (IS_ERR(index)) >> dev->last_residency = 0; >> return index; >> >> Obviously it will up to the platforms to figure out how to propagate >> that error up from their respective low power code. > > To be completely clear on what you're asking for, from > cpuidle_idle_call in drivers/cpuidle/cpuidle.c: > > ... > target_state = &drv->states[next_state]; > > trace_power_start(POWER_CSTATE, next_state, dev->cpu); > trace_cpu_idle(next_state, dev->cpu); > > entered_state = target_state->enter(dev, drv, next_state); > > trace_power_end(dev->cpu); > trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); > > if (entered_state >= 0) { > /* Update cpuidle counters */ > /* This can be moved to within driver enter routine > * but that results in multiple copies of same code. > */ > dev->states_usage[entered_state].time += > (unsigned long long)dev->last_residency; > dev->states_usage[entered_state].usage++; > } > ... > > Note the "if (entered_state >= 0)". This ultimately prevents the > cpuidle device time accounting upon an negative value being returned. > So are you asking for the if(IS_ERR(index)) check to prevent the > unnecessary last_residency time calculation in the wrapper, or to make > sure a last_residency is zero upon failure? (or both?) > > It seems like a bug (or lack or documentation at best) in the code > that exists today to not zero out dev->last_residency upon a negative > return value as this value is used by the governors upon the next > idle. So to ensure last_residency is 0 upon failure, I think it'd be > best to add that to an new else statement immediately following the > "if (entered_state >=))" so that any platform cpuidle driver that > returns a negative will have the last_residency zeroed out, not just > those that use en_core_tk_irqen. + Cc: Jon Hunter Hi Rob, I didn't review the code carefully enough to catch the 'if (entered_state >= 0)' part, but that seems like a graceful way to solve this problem by appending the 'else' statement on there and seeting last_residency to zero. I brought this topic up internally and Jon suggested that the 'usage' statistics that are reported in sysfs should also reflect failed versus successful C-state transitions, which is a great idea. This could simply be achieved by renaming the current 'usage' count to something like 'transitions_attempted' and then conditionally increment a new counter within the 'if (entered_state >= 0)' block, perhaps named, 'transition_succeeded'. This way the old 'usage' count paradigm is as accurate as the new time-keeping code. Being able to easily observe which C-state tend to fail the most would be invaluable in tuning idle policy for maximum effectiveness. Thoughts? Regards, Mike > >> >> Regards, >> Mike ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: Sample big.LITTLE model boot images
gt; ASYNC ?= FALSE > }}} > > Plug in the kernel and initramfs images from the previous steps: > > {{{ > $ : >payload/fsimg > $ cp ../../zBlob payload/kernel > $ cp initrd.img payload/fsimg > }}} > > ... and build: > > {{{ > $ make > [...] > LD img.axf > }}} > > img.axf is the boot payload which is needed to start the model. It > contains the big.LITTLE switcher code and the kernel, along with a > minimal bootloader. > > > == Running == > > OK, so now we have: > > * A boot image containing the switcher and kernel: img.axf > * A filesystem MMC card image: mmc.bin > * A model binary > > Sample payload images can be found in > http://people.linaro.org/~dmart/bl-images/model-images-20120228.tar.bz2 > > > Run like this: > > {{{ > $ ./RTSM_VE_Cortex-A15x4-A7x4 -C motherboard.mmc.p_mmc_file=mmc.bin -a > coretile.cluster0.*=img.axf > }}} > > > This should be enough to boot to a prompt on the simulated UART. > > Beware though -- it can take up to 10 minutes or so to get there, > depending on your machine. -- Zach Pfeffer Android Platform Team Lead, Linaro Platform Teams Linaro.org | Open source software for ARM SoCs Follow Linaro: http://www.facebook.com/pages/Linaro http://twitter.com/#!/linaroorg - http://www.linaro.org/linaro-blog ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Sample big.LITTLE model boot images
Hi all, I've added some detailed info on the wiki about how to go about building bootable images for the big.LITTLE components, here: https://wiki.linaro.org/Internal/Projects/Big.Little.Switcher/ARMFastModelsHowto For the benefit of anyone who can't see that page, I've also pasted it here -- if anyone knows a better place to post this stuff, please let me know! Note that although the instructions give details of how to build a filesystem image, there is essentially nothing b.L or model-specific there -- it just serves as background for explaining how to work around some issues which you may encounter. These images are not supposed to present a fully working big.LITTLE system -- they are there to help people understand how booting works, where the sources are and how to work with the model. Note also that the bootwrapper will change soon (when I get there) to support loading of the kernel, initramfs from the host filesystem using semihosting, just like the kvm bootwrapper, but this isn't implemented for now. Cheers ---Dave == Filesystem == We need to create an SD card image with a filesystem on it, with the rootfs partition matching the bootargs set previously. The following example generates a suitable image using developer. The hardware pack is largely irrelevant because we're not getting the kernel, modules, bootloader or command-line from there -- but it keeps l-m-c happy. {{{ $ wget http://releases.linaro.org/12.02/ubuntu/oneiric-images/developer/linaro-o-developer-tar-20120221-0.tar.gz $ wget http://releases.linaro.org/12.02/ubuntu/oneiric-images/developer/hwpack_linaro-lt-vexpress-a9_20120221-1_armel_supported.tar.gz }}} Note that the ARM fast model's MMC interface emulation is currently limited to 2G. Make the image a bit smaller than that to be on the safe side: {{{ $ sudo linaro-media-create --dev vexpress --image_file mmc.bin --image_size 2000M --hwpack hwpack_linaro-lt-vexpress-a9_20120221-1_armel_suupported.tar.gz --binary linaro-o-developer-tar-20120221-0.tar.gz }}} After generating the filesystem, you probably want to customize it: * Disable address layout randomization (mitigates a memory consumption issue with the model, but not so important on host machines with >4GB of RAM): {{{ echo sys.kernel.randomize_va_space = 0 >>etc/sysctl.conf }}} * Disable DHCP for the network (otherwise boot will stall): {{{ sed -i '/auto.*eth0/d; s/^\(iface.*eth0.*\)dhcp/\1manual/' etc/network/interfaces }}} * Edit fstab to put the real device name into /etc/fstab in place of UUID=. This may not be necessary (mountall was getting confused, but that may have been caused by not having an initramfs). Finally, extract the initramfs image (whatever /initrd.img in the filesystem image points to). == Kernel, switcher and bootwrapper == Clone the big.LITTLE switcher: {{{ $ git clone --branch gnu-build git://git.linaro.org/people/dmart/arm-virt-bl.git }}} Clone the kernel to run on the model. (Eventually we shouldn't need a special tree for this. There are a few model-specific tweaks in this tree, but nothing big.LITTLE-specific yet.) {{{ $ git clone --branch arm/vexpressdt-rtsm git://git.linaro.org/people/dmart/linux-2.6-arm.git }}} Now build the kernel: {{{ $ cd linux-2.6-arm/ $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- vexpress_rtsm_defconfig $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- zImage dtbs }}} The bootwrapper doesn't support device tree yet, so for now append a suitable device tree blob to the zImage. This should match the model you intend to run on: {{{ $ cat arch/arm/boot/zImage arch/arm/boot/vexprress-v2p-ca15x4-rtsm.dtb >../zBlob }}} Now, build the bootwrapper using the kernel you just built: {{{ $ cd ../arm-virt-bl/bootwrapper }}} Configure some things: In Makefile, add the following argument to BOOTARGS: {{{ root=/dev/mmcblk0p2 }}} Due to current bugginess introduced by the ARM->GNU toolchain migration, the switcher is not currently stable. For now, we turn off autonomous asynchronous switching so that we can see the system booting: In big-little/Makefile, add change the line {{{ ASYNC ?= TRUE }}} to {{{ ASYNC ?= FALSE }}} Plug in the kernel and initramfs images from the previous steps: {{{ $ : >payload/fsimg $ cp ../../zBlob payload/kernel $ cp initrd.img payload/fsimg }}} ... and build: {{{ $ make [...] LD img.axf }}} img.axf is the boot payload which is needed to start the model. It contains the big.LITTLE switcher code and the kernel, along with a minimal bootloader. == Running == OK, so now we have: * A boot image containing the switcher and kernel: img.axf * A filesystem MMC card image: mmc.bin * A model binary Sample payload images can be found in http://people.linaro.org/~dmart/bl-images/model-images-20120228.tar.bz2 Run like this: {{{ $ ./RTSM_VE_Cortex-A15x4-A7x4 -C motherboard.mmc.p_mmc_file=mmc.bin -a coretile.c
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
>>> Any reason that this code is in the header? Why not in cpuidle.c? >>> >> >> Not a strong reason. I thought making it an inline would introduce >> slightly less new execution when adding this code (realizing that >> there are function calls immediately after, so the only benefit is the >> reduce popping and pushing). But it does require an extra copy of >> this code for any platform driver that does not enable >> en_core_tk_irqen and instead makes calls to it directly (like omap3). >> For this case, I don't think the inline implementation should add >> extra code from what exists today as it should simply replace the >> existing platform time keeping calls to a standard one defined by the >> core cpuidle. >> > But you will have multiple copies of the inlined code if platforms do > use it. Or is it used only by the core cpuidle code? In that case, gcc > can automatically inline static functions. Used by some platforms as well. > > It seems a bit long to inline and this isn't performance critical (at > least for the enter side). Ok. Unless there are further comments supporting the inline method, I'll switch to non-inline for next version. Thanks Mike and Rob for the feedback. > > Rob > >> I don't have a strong preference with using the inline so if you or >> others can give your opinion on which method to use and why, I'd be >> glad to read it. >> >>> Regards, >>> Mike >> >> ___ >> linux-arm-kernel mailing list >> linux-arm-ker...@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel > ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
On 02/28/2012 09:45 AM, Rob Lee wrote: > Hey Mike, > > On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike wrote: >> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee wrote: >>> +/** >>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter >>> function >>> + * @dev: pointer to a valid cpuidle_device object >>> + * @drv: pointer to a valid cpuidle_driver object >>> + * @index: index of the target cpuidle state. >>> + */ >>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, >>> + struct cpuidle_driver *drv, int index, >>> + int (*enter)(struct cpuidle_device *dev, >>> + struct cpuidle_driver *drv, int >>> index)) >>> +{ >>> + ktime_t time_start, time_end; >>> + s64 diff; >>> + >>> + time_start = ktime_get(); >>> + >>> + index = enter(dev, drv, index); >>> + >>> + time_end = ktime_get(); >>> + >>> + local_irq_enable(); >>> + >>> + diff = ktime_to_us(ktime_sub(time_end, time_start)); >>> + if (diff > INT_MAX) >>> + diff = INT_MAX; >>> + >>> + dev->last_residency = (int) diff; >>> + >>> + return index; >>> +} >> >> Any reason that this code is in the header? Why not in cpuidle.c? >> > > Not a strong reason. I thought making it an inline would introduce > slightly less new execution when adding this code (realizing that > there are function calls immediately after, so the only benefit is the > reduce popping and pushing). But it does require an extra copy of > this code for any platform driver that does not enable > en_core_tk_irqen and instead makes calls to it directly (like omap3). > For this case, I don't think the inline implementation should add > extra code from what exists today as it should simply replace the > existing platform time keeping calls to a standard one defined by the > core cpuidle. > But you will have multiple copies of the inlined code if platforms do use it. Or is it used only by the core cpuidle code? In that case, gcc can automatically inline static functions. It seems a bit long to inline and this isn't performance critical (at least for the enter side). Rob > I don't have a strong preference with using the inline so if you or > others can give your opinion on which method to use and why, I'd be > glad to read it. > >> Regards, >> Mike > > ___ > linux-arm-kernel mailing list > linux-arm-ker...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike wrote: > On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee wrote: >> +/** >> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function >> + * @dev: pointer to a valid cpuidle_device object >> + * @drv: pointer to a valid cpuidle_driver object >> + * @index: index of the target cpuidle state. >> + */ >> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, >> + struct cpuidle_driver *drv, int index, >> + int (*enter)(struct cpuidle_device *dev, >> + struct cpuidle_driver *drv, int >> index)) >> +{ >> + ktime_t time_start, time_end; >> + s64 diff; >> + >> + time_start = ktime_get(); >> + >> + index = enter(dev, drv, index); >> + >> + time_end = ktime_get(); >> + >> + local_irq_enable(); >> + >> + diff = ktime_to_us(ktime_sub(time_end, time_start)); >> + if (diff > INT_MAX) >> + diff = INT_MAX; >> + >> + dev->last_residency = (int) diff; >> + >> + return index; >> +} > > Hi Rob, > > In a previous series I brought up the idea of not accounting for time > if a C-state transition fails. My post on that thread can be found > here: > http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/ > > How do you feel about adding something like the following? > > if (IS_ERR(index)) > dev->last_residency = 0; > return index; > > Obviously it will up to the platforms to figure out how to propagate > that error up from their respective low power code. To be completely clear on what you're asking for, from cpuidle_idle_call in drivers/cpuidle/cpuidle.c: ... target_state = &drv->states[next_state]; trace_power_start(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle(next_state, dev->cpu); entered_state = target_state->enter(dev, drv, next_state); trace_power_end(dev->cpu); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); if (entered_state >= 0) { /* Update cpuidle counters */ /* This can be moved to within driver enter routine * but that results in multiple copies of same code. */ dev->states_usage[entered_state].time += (unsigned long long)dev->last_residency; dev->states_usage[entered_state].usage++; } ... Note the "if (entered_state >= 0)". This ultimately prevents the cpuidle device time accounting upon an negative value being returned. So are you asking for the if(IS_ERR(index)) check to prevent the unnecessary last_residency time calculation in the wrapper, or to make sure a last_residency is zero upon failure? (or both?) It seems like a bug (or lack or documentation at best) in the code that exists today to not zero out dev->last_residency upon a negative return value as this value is used by the governors upon the next idle. So to ensure last_residency is 0 upon failure, I think it'd be best to add that to an new else statement immediately following the "if (entered_state >=))" so that any platform cpuidle driver that returns a negative will have the last_residency zeroed out, not just those that use en_core_tk_irqen. > > Regards, > Mike ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v5 1/9] cpuidle: Add commonly used functionality for consolidation
Hey Mike, On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike wrote: > On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee wrote: >> +/** >> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function >> + * @dev: pointer to a valid cpuidle_device object >> + * @drv: pointer to a valid cpuidle_driver object >> + * @index: index of the target cpuidle state. >> + */ >> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, >> + struct cpuidle_driver *drv, int index, >> + int (*enter)(struct cpuidle_device *dev, >> + struct cpuidle_driver *drv, int >> index)) >> +{ >> + ktime_t time_start, time_end; >> + s64 diff; >> + >> + time_start = ktime_get(); >> + >> + index = enter(dev, drv, index); >> + >> + time_end = ktime_get(); >> + >> + local_irq_enable(); >> + >> + diff = ktime_to_us(ktime_sub(time_end, time_start)); >> + if (diff > INT_MAX) >> + diff = INT_MAX; >> + >> + dev->last_residency = (int) diff; >> + >> + return index; >> +} > > Any reason that this code is in the header? Why not in cpuidle.c? > Not a strong reason. I thought making it an inline would introduce slightly less new execution when adding this code (realizing that there are function calls immediately after, so the only benefit is the reduce popping and pushing). But it does require an extra copy of this code for any platform driver that does not enable en_core_tk_irqen and instead makes calls to it directly (like omap3). For this case, I don't think the inline implementation should add extra code from what exists today as it should simply replace the existing platform time keeping calls to a standard one defined by the core cpuidle. I don't have a strong preference with using the inline so if you or others can give your opinion on which method to use and why, I'd be glad to read it. > Regards, > Mike ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR
On Tue, Feb 28, 2012 at 03:59:25PM +0400, Dmitry Antipov wrote: > On 02/28/2012 01:44 PM, Dan Carpenter wrote: > >On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote: > >> - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested; > >> - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes > >>are requested; > >> - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR. > >> > > > >Why? > > 1) it was requested by the subsystem (co?)maintainer, see > http://lkml.org/lkml/2012/1/27/475; > 2) this looks to be a convenient way to trace/debug zero-size allocation > errors (although >I don't advocate it as a best way). Could you include that in the changelog when the final version is ready? regards, dan carpenter signature.asc Description: Digital signature ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: ARM A9 oprofile
On Tue, Feb 28, 2012 at 8:13 PM, Dmitry Antipov wrote: > On 02/28/2012 04:45 AM, Ming Lei wrote: > >> Please try the uImage on the link below: >> >> http://kernel.ubuntu.com/~ming/up/uImage-3.3-rc5-perf >> > > No good news for the oprofile: OK, could you try the MLO and u-boot.bin under the link of http://kernel.ubuntu.com/~ming/up to see if 'perf' may work well? If still not, could you tell me what is the revision of your pandaboard? or do you have any changes on the hardware? I am sure that several guys have tried the current omap4 pmu patch and make perf work well on pandaboard. > > > ... > irq 34: nobody cared (try booting with the "irqpoll" option) > [stack] > > Disabling IRQ #34 > irq 33: nobody cared (try booting with the "irqpoll" option) > [stack] > Disabling IRQ #33 > ... > > Could you also try an attached module in a loop like: > > while true; do insmod timeoutbench.ko && rmmod timeoutbench; done > > with oprofile running? 'perf top' can be run well with the output below: PerfTop:1036 irqs/sec kernel:99.2% us: 1.0% guest kernel: 0.0% guest us: 0.0% exact: 0.0% [1000Hz cycles], (all, 2 CPUs) 44.87% [kernel][k] _raw_spin_unlock_irqrestore 22.48% [kernel][k] _raw_spin_unlock_irq 7.41% [kernel][k] del_timer_sync 6.24% [kernel][k] lock_acquire 4.95% [kernel][k] lock_release 2.05% [kernel][k] omap4_enter_idle 1.81% [kernel][k] finish_task_switch 1.06% [kernel][k] rcu_note_context_switch 0.60% [kernel][k] schedule_timeout 0.57% [kernel][k] memchr_inv 0.54% [kernel][k] __schedule 0.54% [kernel][k] thumbee_notifier 0.53% [kernel][k] sub_preempt_count thanks, -- Ming Lei ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Tue, 28 Feb 2012, Ian Campbell wrote: > On Tue, 2012-02-28 at 10:20 +, Dave Martin wrote: > > On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote: > > > On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote: > > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings > > > > > available > > > > > which do allow direct mov of an immediate into R12, but are 32 bit > > > > > Thumb > > > > > instructions. > > > > > > > > > > Should we use r7 instead to maximise instruction density for Thumb > > > > > code? > > > > > > > > The difference seems trivial when put into context, even if you code a > > > > special Thumb version of the code to maximise density (the Thumb-2 code > > > > which gets built from assembler in the kernel is very suboptimal in > > > > size, but there simply isn't a high proportion of asm code in the kernel > > > > anyway.) I wouldn't consider the ARM/Thumb differences as an important > > > > factor when deciding on a register. > > > > > > OK, that's useful information. thanks. > > > > > > > One argument for _not_ using r12 for this purpose is that it is then > > > > harder to put a generic "HVC" function (analogous to the "syscall" > > > > syscall) out-of-line, since r12 could get destroyed by the call. > > > > > > For an out of line syscall(2) wouldn't the syscall number either be in a > > > standard C calling convention argument register or on the stack when the > > > function was called, since it is just a normal argument at that point? > > > As you point out it cannot be passed in r12 (and could never be, due to > > > the clobbering). > > > > > > The syscall function itself would have to move the arguments and syscall > > > nr etc around before issuing the syscall. > > > > > > I think the same is true of a similar hypercall(2) > > > > > > > If you don't think you will ever care about putting HVC out of line > > > > though, it may not matter. > > > > If you have both inline and out-of-line hypercalls, it's hard to ensure > > that you never have to shuffle the registers in either case. > > Agreed. > > I think we want to optimise for the inline case since those are the > majority. They are not just the majority, all of them are static inline at the moment, even on x86 (where the number of hypercalls is much higher). So yes, we should optimize for the inline case. > The only non-inline case is the special "privcmd ioctl" which is the > mechanism that allows the Xen toolstack to make hypercalls. It's > somewhat akin to syscall(2). By the time you get to it you will already > have done a system call for the ioctl, pulled the arguments from the > ioctl argument structure etc, plus such hypercalls are not really > performance critical. Even the privcmd hypercall (privcmd_call) is a static inline function, it is just that at the moment there is only one caller :) > > Shuffling can be reduced but only at the expense of strange argument > > ordering in some cases when calling from C -- the complexity is probably > > not worth it. Linux doesn't bother for its own syscalls. > > > > Note that even in assembler, a branch from one section to a label in > > another section may cause r12 to get destroyed, so you will need to be > > careful about how you code the hypervisor trap handler. However, this > > is not different from coding exception handlers in general, so I don't > > know that it constitutes a conclusive argument on its own. > > We are happy to arrange that this doesn't occur on our trap entry paths, > at least until the guest register state has been saved. Currently the > hypercall dispatcher is in C and gets r12 from the on-stack saved state. > We will likely eventually optimise the hypercall path directly in ASM > and in that case we are happy to take steps to ensure we don't clobber > r12 before we need it. Yes, I don't think this should be an issue. > > My instinctive preference would therefore be for r7 (which also seems to > > be good enough for Linux syscalls) -- but it really depends how many > > arguments you expect to need to support. > > Apparently r7 is the frame pointer for gcc in thumb mode which I think > is a good reason to avoid it. > > We currently have some 5 argument hypercalls and there have been > occasional suggestions for interfaces which use 6 -- although none of > them have come to reality. I don't have a very strong opinion on which register we should use, but I would like to avoid r7 if it is already actively used by gcc. The fact that r12 can be destroyed so easily is actually a good argument for using it because it means it is less likely to contain useful data that needs to be saved/restored by gcc. ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Tue, 28 Feb 2012, Dave Martin wrote: > > Given that Stefano is proposing to make the ISS a (per-hypervisor) > > constant we could consider just defining the Thumb and non-Thumb > > constants instead of doing all the construction with the __HVC_IMM stuff > > -- that would remove a big bit of the macroization. > > It's not quite as simple as that -- emitting instructions using data > directives is not endianness safe, and even in the cases where .long gives > the right result for ARM, it gives the wrong result for 32-bit Thumb > instructions if the opcode is given in human-readable order. > > I was trying to solve the same problem for the kvm guys with some global > macros -- I'm aiming to get a patch posted soon, so I'll make sure > you're on CC. That would be great, thanks! ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: ARM A9 oprofile
On 02/28/2012 04:45 AM, Ming Lei wrote: Please try the uImage on the link below: http://kernel.ubuntu.com/~ming/up/uImage-3.3-rc5-perf No good news for the oprofile: ... irq 34: nobody cared (try booting with the "irqpoll" option) [stack] Disabling IRQ #34 irq 33: nobody cared (try booting with the "irqpoll" option) [stack] Disabling IRQ #33 ... Could you also try an attached module in a loop like: while true; do insmod timeoutbench.ko && rmmod timeoutbench; done with oprofile running? Dmitry #include #include #include #include #include #include #include MODULE_LICENSE("GPL"); static int nrthreads = 128; module_param(nrthreads, int, 0644); static int loopcount = 1024; module_param(loopcount, int, 0644); static int usehrtime = 0; module_param(usehrtime, int, 0644); static int slack = 5; module_param(slack, int, 0644); static int msecs = 1; module_param(msecs, int, 0644); static DECLARE_COMPLETION(done); static struct task_struct **threads; static atomic_t nrunning; static int timeoutbench_test(void *unused) { int i; ktime_t expires = ktime_set(0, msecs * NSEC_PER_MSEC); atomic_inc(&nrunning); for (i = 0; !kthread_should_stop() && i < loopcount; i++) { if (usehrtime) { set_current_state(TASK_UNINTERRUPTIBLE); schedule_hrtimeout_range(&expires, slack, HRTIMER_MODE_REL); } else schedule_timeout_uninterruptible(msecs_to_jiffies(msecs)); } if (atomic_dec_and_test(&nrunning)) complete(&done); return 0; } static int __init timeoutbench_init(void) { int i; atomic_set(&nrunning, 0); threads = kmalloc(nrthreads * sizeof(struct task_struct *), GFP_KERNEL); if (!threads) return -ENOMEM; for (i = 0; i < nrthreads; i++) { threads[i] = kthread_create(timeoutbench_test, NULL, "timeoutbench_test/%d", i); if (IS_ERR(threads[i])) { int j, err = PTR_ERR(threads[i]); for (j = 0; j < i; j++) kthread_stop(threads[j]); kfree(threads); return err; } get_task_struct(threads[i]); wake_up_process(threads[i]); } return 0; } static void __exit timeoutbench_exit(void) { int i; wait_for_completion(&done); for (i = 0; i < nrthreads; i++) { kthread_stop(threads[i]); put_task_struct(threads[i]); } kfree(threads); } module_init(timeoutbench_init); module_exit(timeoutbench_exit); ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR
On 02/28/2012 01:44 PM, Dan Carpenter wrote: On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote: - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested; - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes are requested; - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR. Why? 1) it was requested by the subsystem (co?)maintainer, see http://lkml.org/lkml/2012/1/27/475; 2) this looks to be a convenient way to trace/debug zero-size allocation errors (although I don't advocate it as a best way). Dmitry ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Mon, 2012-02-27 at 21:05 +, Peter Maydell wrote: > On 27 February 2012 16:27, Ian Campbell wrote: > > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov > > immediate (which can only target r0..r7). > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb > > instructions. > > > > Should we use r7 instead to maximise instruction density for Thumb code? > > r7 is (used by gcc as) the Thumb frame pointer; I don't know if this > makes it worth avoiding in this context. I think it does. It actually sounds as if using r12 is fine here, the impact on code density should be pretty small -- there aren't really all that many call sites which involve hypercalls. By way of an example I measured an x86 kernel which should be using more hypercalls due to pv paging etc and found that 0.014% of the lines in "objdump -d" contained a call to the hypercall_page. (I know not all lines of objdump -d output are instructions but it's a reasonable approx IMHO). So I think using 3 16 bit instructions slots instead of 2 won't make much impact in practice. Thanks, Ian. ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Tue, 2012-02-28 at 10:20 +, Dave Martin wrote: > On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote: > > On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote: > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available > > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb > > > > instructions. > > > > > > > > Should we use r7 instead to maximise instruction density for Thumb code? > > > > > > The difference seems trivial when put into context, even if you code a > > > special Thumb version of the code to maximise density (the Thumb-2 code > > > which gets built from assembler in the kernel is very suboptimal in > > > size, but there simply isn't a high proportion of asm code in the kernel > > > anyway.) I wouldn't consider the ARM/Thumb differences as an important > > > factor when deciding on a register. > > > > OK, that's useful information. thanks. > > > > > One argument for _not_ using r12 for this purpose is that it is then > > > harder to put a generic "HVC" function (analogous to the "syscall" > > > syscall) out-of-line, since r12 could get destroyed by the call. > > > > For an out of line syscall(2) wouldn't the syscall number either be in a > > standard C calling convention argument register or on the stack when the > > function was called, since it is just a normal argument at that point? > > As you point out it cannot be passed in r12 (and could never be, due to > > the clobbering). > > > > The syscall function itself would have to move the arguments and syscall > > nr etc around before issuing the syscall. > > > > I think the same is true of a similar hypercall(2) > > > > > If you don't think you will ever care about putting HVC out of line > > > though, it may not matter. > > If you have both inline and out-of-line hypercalls, it's hard to ensure > that you never have to shuffle the registers in either case. Agreed. I think we want to optimise for the inline case since those are the majority. The only non-inline case is the special "privcmd ioctl" which is the mechanism that allows the Xen toolstack to make hypercalls. It's somewhat akin to syscall(2). By the time you get to it you will already have done a system call for the ioctl, pulled the arguments from the ioctl argument structure etc, plus such hypercalls are not really performance critical. > Shuffling can be reduced but only at the expense of strange argument > ordering in some cases when calling from C -- the complexity is probably > not worth it. Linux doesn't bother for its own syscalls. > > Note that even in assembler, a branch from one section to a label in > another section may cause r12 to get destroyed, so you will need to be > careful about how you code the hypervisor trap handler. However, this > is not different from coding exception handlers in general, so I don't > know that it constitutes a conclusive argument on its own. We are happy to arrange that this doesn't occur on our trap entry paths, at least until the guest register state has been saved. Currently the hypercall dispatcher is in C and gets r12 from the on-stack saved state. We will likely eventually optimise the hypercall path directly in ASM and in that case we are happy to take steps to ensure we don't clobber r12 before we need it. > My instinctive preference would therefore be for r7 (which also seems to > be good enough for Linux syscalls) -- but it really depends how many > arguments you expect to need to support. Apparently r7 is the frame pointer for gcc in thumb mode which I think is a good reason to avoid it. We currently have some 5 argument hypercalls and there have been occasional suggestions for interfaces which use 6 -- although none of them have come to reality. Ian. ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Tue, 2012-02-28 at 09:46 +, Dave Martin wrote: > On Mon, Feb 27, 2012 at 07:48:45PM +, Ian Campbell wrote: > > Given that Stefano is proposing to make the ISS a (per-hypervisor) > > constant we could consider just defining the Thumb and non-Thumb > > constants instead of doing all the construction with the __HVC_IMM stuff > > -- that would remove a big bit of the macroization. > > It's not quite as simple as that -- emitting instructions using data > directives is not endianness safe, and even in the cases where .long gives > the right result for ARM, it gives the wrong result for 32-bit Thumb > instructions if the opcode is given in human-readable order. Urk, yes,.. > I was trying to solve the same problem for the kvm guys with some global > macros -- I'm aiming to get a patch posted soon, so I'll make sure > you're on CC. Awesome, thanks! Ian. ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR
On Tue, Feb 28, 2012 at 01:33:59PM +0400, Dmitry Antipov wrote: > - Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested; > - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes >are requested; > - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR. > Why? Also patch 2/2 should go in before patch 1/2 or it breaks things. regards, dan carpenter signature.asc Description: Digital signature ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Mon, Feb 27, 2012 at 07:33:39PM +, Ian Campbell wrote: > On Mon, 2012-02-27 at 18:03 +, Dave Martin wrote: > > On Mon, Feb 27, 2012 at 04:27:23PM +, Ian Campbell wrote: > > > On Thu, 2012-02-23 at 17:48 +, Stefano Stabellini wrote: > > > > We need a register to pass the hypercall number because we might not > > > > know it at compile time and HVC only takes an immediate argument. > > > > > > > > Among the available registers r12 seems to be the best choice because it > > > > is defined as "intra-procedure call scratch register". > > > > > > R12 is not accessible from the 16 bit "T1" Thumb encoding of mov > > > immediate (which can only target r0..r7). > > > > This is untrue. The important instructions, like MOV Rd, Rn can access > > all the regs. But anyway, there is no such thing as a Thumb-1 kernel, > > so we won't really care. > > I did say "mov immediate", which is the one which matters when loading a > constant hypercall number (the common case). AFAIK the "mov Rd, #imm" T1 > encoding cannot access all registers. > > The "mov rd,rn" form only helps for syscall(2) like functions, which are > unusual, at least for Xen, although as Stefano says, they do exist. Apologies -- looks like I misread you here. I agree, but it's probably a minor issue nonetheless. > > > > Since we support only ARMv7+ there are "T2" and "T3" encodings available > > > which do allow direct mov of an immediate into R12, but are 32 bit Thumb > > > instructions. > > > > > > Should we use r7 instead to maximise instruction density for Thumb code? > > > > The difference seems trivial when put into context, even if you code a > > special Thumb version of the code to maximise density (the Thumb-2 code > > which gets built from assembler in the kernel is very suboptimal in > > size, but there simply isn't a high proportion of asm code in the kernel > > anyway.) I wouldn't consider the ARM/Thumb differences as an important > > factor when deciding on a register. > > OK, that's useful information. thanks. > > > One argument for _not_ using r12 for this purpose is that it is then > > harder to put a generic "HVC" function (analogous to the "syscall" > > syscall) out-of-line, since r12 could get destroyed by the call. > > For an out of line syscall(2) wouldn't the syscall number either be in a > standard C calling convention argument register or on the stack when the > function was called, since it is just a normal argument at that point? > As you point out it cannot be passed in r12 (and could never be, due to > the clobbering). > > The syscall function itself would have to move the arguments and syscall > nr etc around before issuing the syscall. > > I think the same is true of a similar hypercall(2) > > > If you don't think you will ever care about putting HVC out of line > > though, it may not matter. If you have both inline and out-of-line hypercalls, it's hard to ensure that you never have to shuffle the registers in either case. Shuffling can be reduced but only at the expense of strange argument ordering in some cases when calling from C -- the complexity is probably not worth it. Linux doesn't bother for its own syscalls. Note that even in assembler, a branch from one section to a label in another section may cause r12 to get destroyed, so you will need to be careful about how you code the hypervisor trap handler. However, this is not different from coding exception handlers in general, so I don't know that it constitutes a conclusive argument on its own. My instinctive preference would therefore be for r7 (which also seems to be good enough for Linux syscalls) -- but it really depends how many arguments you expect to need to support. Cheers ---Dave ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH v3 0/2] Device tree support for TWL regulators
On Tue, Feb 28, 2012 at 11:11:48AM +0530, Rajendra Nayak wrote: > changes have no dependencies with any other DT series. I will repost > all of Tero/Peter and my changes (to add DT support to the driver) as > one single series and drop the dts file updates, which I guess can go > via Tony/OMAP tree. Yes, that sounds like a good plan - the DTS changes are largely orthogonal to the code changes and don't need to go via the same path (this is true in general, the DTSs are pretty horrible for merge issues). signature.asc Description: Digital signature ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
Re: [PATCH-WIP 01/13] xen/arm: use r12 to pass the hypercall number to the hypervisor
On Mon, Feb 27, 2012 at 07:48:45PM +, Ian Campbell wrote: > On Mon, 2012-02-27 at 17:53 +, Dave Martin wrote: > > On Thu, Feb 23, 2012 at 05:48:22PM +, Stefano Stabellini wrote: > > > We need a register to pass the hypercall number because we might not > > > know it at compile time and HVC only takes an immediate argument. > > > > > > Among the available registers r12 seems to be the best choice because it > > > is defined as "intra-procedure call scratch register". > > > > This would be massively simplified if you didn't try to inline the HVC. > > Does it really need to be inline? > > > > > +#define __HYPERCALL ".word 0xe1400070 + " __HVC_IMM(XEN_HYPERCALL_TAG) > > > > Please, do not do this. It won't work in Thumb, where the encodings are > > different. > > > > It is reasonable to expect anyone building Xen to have reasonably new > > tools, you you can justifiably use > > > > AFLAGS_thisfile.o := -Wa,-march=armv7-a+virt > > > > in the Makefile and just use the hvc instruction directly. > > Our aim is for guest kernel binaries not to be specific to Xen -- i.e. > they should be able to run on baremetal and other hypervisors as well. > The differences should only be in the device-tree passed to the kernel. > > > Of course, this is only practical if the HVC invocation is not inlined. > > I suppose we could make the stub functions out of line, we just copied > what Xen does on x86. > > The only thing which springs to mind is that 5 argument hypercalls will > end up pushing the fifth argument to the stack only to pop it back into > r4 for the hypercall and IIRC it also needs to preserve r4 (callee saved > reg) which is going to involve some small amount of code to move stuff > around too. > > So by inlining the functions we avoid some thunking because the compiler > would know exactly what was happening at the hypercall site. True ... > > We don't currently have any 6 argument hypercalls but the same would > extend there. > > > If we can't avoid macro-ising HVC, we should do it globally, not locally > > to the Xen code. That way we at least keep all the horror in one place. > > That sounds like a good idea to me. > > Given that Stefano is proposing to make the ISS a (per-hypervisor) > constant we could consider just defining the Thumb and non-Thumb > constants instead of doing all the construction with the __HVC_IMM stuff > -- that would remove a big bit of the macroization. It's not quite as simple as that -- emitting instructions using data directives is not endianness safe, and even in the cases where .long gives the right result for ARM, it gives the wrong result for 32-bit Thumb instructions if the opcode is given in human-readable order. I was trying to solve the same problem for the kvm guys with some global macros -- I'm aiming to get a patch posted soon, so I'll make sure you're on CC. Cheers ---Dave ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 3/4] regulator: twl-regulator: Add fixed LDO for V1V8, V2V1 supply
From: Peter Ujfalusi V1V8 supply most common use is to provide VIO for the system. V2V1 supply is used on SDP4430/PandaBoards to provide 2.1V to twl6040, and also as an input to VCXIO_IN, VDAC_IN of twl6030. Also update the bindings documentation with the new compatible property for these additional LDOs. Signed-off-by: Peter Ujfalusi Signed-off-by: Rajendra Nayak Cc: Samuel Ortiz Cc: Misael Lopez Cruz Cc: Santosh Shilimkar --- .../bindings/regulator/twl-regulator.txt |2 ++ drivers/regulator/twl-regulator.c |4 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt index ba9d2cc..0c3395d 100644 --- a/Documentation/devicetree/bindings/regulator/twl-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt @@ -13,6 +13,8 @@ For twl6030 regulators/LDOs - "ti,twl6030-vcxio" for VCXIO LDO - "ti,twl6030-vdac" for VDAC LDO - "ti,twl6030-vusb" for VUSB LDO + - "ti,twl6030-v1v8" for V1V8 LDO + - "ti,twl6030-v2v1" for V2V1 LDO - "ti,twl6030-clk32kg" for CLK32KG RESOURCE - "ti,twl6030-vdd1" for VDD1 SMPS - "ti,twl6030-vdd2" for VDD2 SMPS diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 2a13211..9cdfc38 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -1094,6 +1094,8 @@ TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0); TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0); TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0); TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0); +TWL6030_FIXED_LDO(V1V8, 0x16, 1800, 0); +TWL6030_FIXED_LDO(V2V1, 0x1c, 2100, 0); TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0); TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34); TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10); @@ -1173,6 +1175,8 @@ static const struct of_device_id twl_of_match[] __devinitconst = { TWLFIXED_OF_MATCH("ti,twl6030-vcxio", VCXIO), TWLFIXED_OF_MATCH("ti,twl6030-vdac", VDAC), TWLFIXED_OF_MATCH("ti,twl6030-vusb", VUSB), + TWLFIXED_OF_MATCH("ti,twl6030-v1v8", V1V8), + TWLFIXED_OF_MATCH("ti,twl6030-v2v1", V2V1), TWLRES_OF_MATCH("ti,twl6030-clk32kg", CLK32KG), TWLSMPS_OF_MATCH("ti,twl6025-smps3", SMPS3), TWLSMPS_OF_MATCH("ti,twl6025-smps4", SMPS4), -- 1.7.1 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 2/4] regulator: twl: adapt twl-regulator driver to dt
Modify the twl regulator driver to extract the regulator_init_data from device tree when passed, instead of getting it through platform_data structures (on non-DT builds) Also add documentation for TWL regulator specific bindings. Signed-off-by: Rajendra Nayak --- .../bindings/regulator/twl-regulator.txt | 66 + drivers/regulator/twl-regulator.c | 259 +--- 2 files changed, 238 insertions(+), 87 deletions(-) create mode 100644 Documentation/devicetree/bindings/regulator/twl-regulator.txt diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt new file mode 100644 index 000..ba9d2cc --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt @@ -0,0 +1,66 @@ +TWL family of regulators + +Required properties: +For twl6030 regulators/LDOs +- compatible: + - "ti,twl6030-vaux1" for VAUX1 LDO + - "ti,twl6030-vaux2" for VAUX2 LDO + - "ti,twl6030-vaux3" for VAUX3 LDO + - "ti,twl6030-vmmc" for VMMC LDO + - "ti,twl6030-vpp" for VPP LDO + - "ti,twl6030-vusim" for VUSIM LDO + - "ti,twl6030-vana" for VANA LDO + - "ti,twl6030-vcxio" for VCXIO LDO + - "ti,twl6030-vdac" for VDAC LDO + - "ti,twl6030-vusb" for VUSB LDO + - "ti,twl6030-clk32kg" for CLK32KG RESOURCE + - "ti,twl6030-vdd1" for VDD1 SMPS + - "ti,twl6030-vdd2" for VDD2 SMPS + - "ti,twl6030-vdd3" for VDD3 SMPS +For twl6025 regulators/LDOs +- compatible: + - "ti,twl6025-ldo1" for LDO1 LDO + - "ti,twl6025-ldo2" for LDO2 LDO + - "ti,twl6025-ldo3" for LDO3 LDO + - "ti,twl6025-ldo4" for LDO4 LDO + - "ti,twl6025-ldo5" for LDO5 LDO + - "ti,twl6025-ldo6" for LDO6 LDO + - "ti,twl6025-ldo7" for LDO7 LDO + - "ti,twl6025-ldoln" for LDOLN LDO + - "ti,twl6025-ldousb" for LDOUSB LDO + - "ti,twl6025-smps3" for SMPS3 SMPS + - "ti,twl6025-smps4" for SMPS4 SMPS + - "ti,twl6025-vio" for VIO SMPS +For twl4030 regulators/LDOs +- compatible: + - "ti,twl4030-vaux1" for VAUX1 LDO + - "ti,twl4030-vaux2" for VAUX2 LDO + - "ti,twl5030-vaux2" for VAUX2 LDO + - "ti,twl4030-vaux3" for VAUX3 LDO + - "ti,twl4030-vaux4" for VAUX4 LDO + - "ti,twl4030-vmmc1" for VMMC1 LDO + - "ti,twl4030-vmmc2" for VMMC2 LDO + - "ti,twl4030-vpll1" for VPLL1 LDO + - "ti,twl4030-vpll2" for VPLL2 LDO + - "ti,twl4030-vsim" for VSIM LDO + - "ti,twl4030-vdac" for VDAC LDO + - "ti,twl4030-vintana2" for VINTANA2 LDO + - "ti,twl4030-vio" for VIO LDO + - "ti,twl4030-vdd1" for VDD1 SMPS + - "ti,twl4030-vdd2" for VDD2 SMPS + - "ti,twl4030-vintana1" for VINTANA1 LDO + - "ti,twl4030-vintdig" for VINTDIG LDO + - "ti,twl4030-vusb1v5" for VUSB1V5 LDO + - "ti,twl4030-vusb1v8" for VUSB1V8 LDO + - "ti,twl4030-vusb3v1" for VUSB3V1 LDO + +Optional properties: +- Any optional property defined in bindings/regulator/regulator.txt + +Example: + + xyz: regulator@0 { + compatible = "ti,twl6030-vaux1"; + regulator-min-microvolt = <100>; + regulator-max-microvolt = <300>; + }; diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 8611282..2a13211 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -14,8 +14,11 @@ #include #include #include +#include +#include #include #include +#include #include @@ -920,7 +923,8 @@ static struct regulator_ops twlsmps_ops = { TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \ 0x0, TWL6030, twl6030fixed_ops) -#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \ +#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \ +static struct twlreg_info TWL4030_INFO_##label = { \ .base = offset, \ .id = num, \ .table_len = ARRAY_SIZE(label##_VSEL_table), \ @@ -938,7 +942,7 @@ static struct regulator_ops twlsmps_ops = { } #define TWL4030_ADJUSTABLE_SMPS(label, offset, num, turnon_delay, remap_conf) \ - { \ +static struct twlreg_info TWL4030_INFO_##label = { \ .base = offset, \ .id = num, \ .delay = turnon_delay, \ @@ -952,7 +956,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_ADJUSTABLE_SMPS(label) { \ +#define TWL6030_ADJUSTABLE_SMPS(label) \ +static struct twlreg_info TWL6030_INFO_##label = { \ .desc = { \ .name = #label, \ .id = TWL6030_REG_##label, \ @@ -962,7 +967,8 @@ static struct regulator_ops twlsmps_ops = { }, \ } -#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ +#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) \ +static struct twlreg_info TWL6030_INFO_##label = { \ .base = offset, \ .min_mV = min_mVolts, \ .max_mV = max_mVolts, \ @@ -976,7 +982,8 @@ static struct regulator_ops twlsmps_ops = {
[PATCH 1/4] regulator: twl6030: add support for vdd1, vdd2 and vdd3 regulators
From: Tero Kristo vdd1 and vdd2 are now common regulators for twl4030 and twl6030. Also added vdd3 as a new regulator for twl6030. twl6030 vdd1...vdd3 smps regulator voltages can only be controlled through the smartreflex voltage channel, thus the support for the voltage_get and set is minimal and requires external controller. Signed-off-by: Tero Kristo Signed-off-by: Rajendra Nayak Cc: Mark Brown Cc: Liam Girdwood Cc: Samuel Ortiz Cc: Kevin Hilman --- drivers/mfd/twl-core.c| 15 ++ drivers/regulator/twl-regulator.c | 39 + include/linux/i2c/twl.h |5 ++- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index 6c13d9f..e1d3a64 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -951,6 +951,21 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features) /* twl6030 regulators */ if (twl_has_regulator() && twl_class_is_6030() && !(features & TWL6025_SUBCLASS)) { + child = add_regulator(TWL6030_REG_VDD1, pdata->vdd1, + features); + if (IS_ERR(child)) + return PTR_ERR(child); + + child = add_regulator(TWL6030_REG_VDD2, pdata->vdd2, + features); + if (IS_ERR(child)) + return PTR_ERR(child); + + child = add_regulator(TWL6030_REG_VDD3, pdata->vdd3, + features); + if (IS_ERR(child)) + return PTR_ERR(child); + child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc, features); if (IS_ERR(child)) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 7ff8bb2..8611282 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -561,6 +561,32 @@ static struct regulator_ops twl4030smps_ops = { .get_voltage= twl4030smps_get_voltage, }; +static int twl6030coresmps_set_voltage(struct regulator_dev *rdev, int min_uV, + int max_uV, unsigned *selector) +{ + struct twlreg_info *info = rdev_get_drvdata(rdev); + + if (info->set_voltage) + return info->set_voltage(info->data, min_uV); + + return -ENODEV; +} + +static int twl6030coresmps_get_voltage(struct regulator_dev *rdev) +{ + struct twlreg_info *info = rdev_get_drvdata(rdev); + + if (info->get_voltage) + return info->get_voltage(info->data); + + return -ENODEV; +} + +static struct regulator_ops twl6030coresmps_ops = { + .set_voltage= twl6030coresmps_set_voltage, + .get_voltage= twl6030coresmps_get_voltage, +}; + static int twl6030ldo_list_voltage(struct regulator_dev *rdev, unsigned index) { struct twlreg_info *info = rdev_get_drvdata(rdev); @@ -926,6 +952,16 @@ static struct regulator_ops twlsmps_ops = { }, \ } +#define TWL6030_ADJUSTABLE_SMPS(label) { \ + .desc = { \ + .name = #label, \ + .id = TWL6030_REG_##label, \ + .ops = &twl6030coresmps_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + }, \ + } + #define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \ .base = offset, \ .min_mV = min_mVolts, \ @@ -1027,6 +1063,9 @@ static struct twlreg_info twl_regs[] = { /* 6030 REG with base as PMC Slave Misc : 0x0030 */ /* Turnon-delay and remap configuration values for 6030 are not verified since the specification is not public */ + TWL6030_ADJUSTABLE_SMPS(VDD1), + TWL6030_ADJUSTABLE_SMPS(VDD2), + TWL6030_ADJUSTABLE_SMPS(VDD3), TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300), TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300), TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300), diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index 08a82d3..f66c031 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -712,6 +712,9 @@ struct twl4030_platform_data { struct regulator_init_data *vaux1; struct regulator_init_data *vaux2; struct regulator_init_data *vaux3; + struct regulator_init_data *vdd1; + struct regulator_init_data *vdd2; + struct regulator_init_data *vdd3; /* TWL4030 LDO regulators */ struct regulator_init_data *vpll1; struct regulator_init_data *vpll2; @@ -720,8 +723,6 @@ struct twl4030_platform_data { struct regulator_init_data *vsim; struct regulator_init_data *vaux4;
[PATCH 4/4] MFD: twl-core: regulator configuration for twl6030 V1V8, V2V1 SMPS
From: Peter Ujfalusi To be able to attach consumers to these supplies from board files we need to have regulator_init_data for them. Signed-off-by: Peter Ujfalusi Signed-off-by: Rajendra Nayak Cc: Samuel Ortiz Cc: Misael Lopez Cruz Cc: Santosh Shilimkar --- drivers/mfd/twl-core.c | 10 ++ include/linux/i2c/twl.h |2 ++ 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c index e1d3a64..6cb1061 100644 --- a/drivers/mfd/twl-core.c +++ b/drivers/mfd/twl-core.c @@ -966,6 +966,16 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features) if (IS_ERR(child)) return PTR_ERR(child); + child = add_regulator(TWL6030_REG_V1V8, pdata->v1v8, + features); + if (IS_ERR(child)) + return PTR_ERR(child); + + child = add_regulator(TWL6030_REG_V2V1, pdata->v2v1, + features); + if (IS_ERR(child)) + return PTR_ERR(child); + child = add_regulator(TWL6030_REG_VMMC, pdata->vmmc, features); if (IS_ERR(child)) diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index f66c031..7fcab23 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -734,6 +734,8 @@ struct twl4030_platform_data { struct regulator_init_data *vcxio; struct regulator_init_data *vusb; struct regulator_init_data *clk32kg; + struct regulator_init_data *v1v8; + struct regulator_init_data *v2v1; /* TWL6025 LDO regulators */ struct regulator_init_data *ldo1; struct regulator_init_data *ldo2; -- 1.7.1 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 0/4] twl-regulator DT adaptation and updates to add new regulators
Hi Mark, Here is a consolidated series which adds DT support for twl regulator driver and adds support for VDD1/2/3 regulator and support for fixed LDO V1V8 and V2V1. The patches are based on -next and tested on omap3 beagle and omap4 panda boards. I have dropped the patch updating the dts entries for twl4030 and twl6030 because of other dependencies and will submit them via the OMAP tree. The driver DT changes however do not have any dependency on the dts file updates (except that regulator support will not work with DT, but will not break DT build or boot) and no functionality would change in the non-DT case. regards, Rajendra Peter Ujfalusi (2): regulator: twl-regulator: Add fixed LDO for V1V8, V2V1 supply MFD: twl-core: regulator configuration for twl6030 V1V8, V2V1 SMPS Rajendra Nayak (1): regulator: twl: adapt twl-regulator driver to dt Tero Kristo (1): regulator: twl6030: add support for vdd1, vdd2 and vdd3 regulators .../bindings/regulator/twl-regulator.txt | 68 + drivers/mfd/twl-core.c | 25 ++ drivers/regulator/twl-regulator.c | 294 ++-- include/linux/i2c/twl.h|7 +- 4 files changed, 309 insertions(+), 85 deletions(-) create mode 100644 Documentation/devicetree/bindings/regulator/twl-regulator.txt ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 2/2] module: use ZERO_OR_NULL_PTR allocation pointer checking
Use ZERO_OR_NULL_PTR allocation pointer checking where allocation function may return ZERO_SIZE_PTR. --- kernel/module.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 2c93276..ae438db 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2322,14 +2322,14 @@ static void dynamic_debug_remove(struct _ddebug *debug) void * __weak module_alloc(unsigned long size) { - return size == 0 ? NULL : vmalloc_exec(size); + return vmalloc_exec(size); } static void *module_alloc_update_bounds(unsigned long size) { void *ret = module_alloc(size); - if (ret) { + if (likely(!ZERO_OR_NULL_PTR(ret))) { mutex_lock(&module_mutex); /* Update module bounds. */ if ((unsigned long)ret < module_addr_min) @@ -2638,7 +2638,7 @@ static int move_module(struct module *mod, struct load_info *info) * leak. */ kmemleak_not_leak(ptr); - if (!ptr) + if (unlikely(ZERO_OR_NULL_PTR(ptr))) return -ENOMEM; memset(ptr, 0, mod->core_size); @@ -2652,7 +2652,7 @@ static int move_module(struct module *mod, struct load_info *info) * after the module is initialized. */ kmemleak_ignore(ptr); - if (!ptr && mod->init_size) { + if (unlikely(ZERO_OR_NULL_PTR(ptr))) { module_free(mod, mod->module_core); return -ENOMEM; } -- 1.7.7.6 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[PATCH 1/2] vmalloc: use ZERO_SIZE_PTR / ZERO_OR_NULL_PTR
- Fix vmap() to return ZERO_SIZE_PTR if 0 pages are requested; - fix __vmalloc_node_range() to return ZERO_SIZE_PTR if 0 bytes are requested; - fix __vunmap() to check passed pointer with ZERO_OR_NULL_PTR. Signed-off-by: Dmitry Antipov --- mm/vmalloc.c | 10 +++--- 1 files changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 86ce9a5..040a9cd 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1456,7 +1456,7 @@ static void __vunmap(const void *addr, int deallocate_pages) { struct vm_struct *area; - if (!addr) + if (unlikely(ZERO_OR_NULL_PTR(addr))) return; if ((PAGE_SIZE-1) & (unsigned long)addr) { @@ -1548,7 +1548,9 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); - if (count > totalram_pages) + if (unlikely(!count)) + return ZERO_SIZE_PTR; + if (unlikely(count > totalram_pages)) return NULL; area = get_vm_area_caller((count << PAGE_SHIFT), flags, @@ -1648,8 +1650,10 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, void *addr; unsigned long real_size = size; + if (unlikely(!size)) + return ZERO_SIZE_PTR; size = PAGE_ALIGN(size); - if (!size || (size >> PAGE_SHIFT) > totalram_pages) + if (unlikely((size >> PAGE_SHIFT) > totalram_pages)) goto fail; area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST, -- 1.7.7.6 ___ linaro-dev mailing list linaro-dev@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-dev
[RFC PATCH] module: debugging check for runaway kthreads
Debugging option CONFIG_MODULE_KTHREAD_CHECK provides a way to check whether all kernel threads created by the module and have used module code as a thread worker function are really exited when the module is unloaded. The following pseudo-code contains example of an error which is likely to be catched with this debugging check: static struct task_struct *tsk; static DECLARE_COMPLETION(done); static void *func(void *unused) { while (!kthread_should_stop()) real_work(); complete(&done); } static int __init modinit(void) { tsk = kthread_run(func, NULL, "func"); return IS_ERR(tsk) ? PTR_ERR(tsk) : 0; } static void __exit modexit(void) { wait_for_completion(&done); } Signed-off-by: Dmitry Antipov --- include/linux/kthread.h |5 + init/Kconfig|9 + kernel/kthread.c| 24 kernel/module.c | 45 + 4 files changed, 83 insertions(+), 0 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0714b24..33897c3 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -13,6 +13,11 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), #define kthread_create(threadfn, data, namefmt, arg...) \ kthread_create_on_node(threadfn, data, -1, namefmt, ##arg) +#ifdef CONFIG_MODULE_KTHREAD_CHECK +unsigned long get_kthread_func(struct task_struct *tsk); +#else +#define get_kthread_func(tsk, addr, mod) (0) +#endif /** * kthread_run - create and wake a thread. diff --git a/init/Kconfig b/init/Kconfig index 3f42cd6..fa7c6e0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1397,6 +1397,15 @@ config MODULE_FORCE_UNLOAD rmmod). This is mainly for kernel developers and desperate users. If unsure, say N. +config MODULE_KTHREAD_CHECK + bool "Check for runaway kernel threads at module unload" + depends on MODULE_UNLOAD && EXPERIMENTAL && DEBUG_KERNEL + help + This option allows you to check whether all kernel threads created + by the module and have used module code as a thread worker function + are really exited when the module is unloaded. This is mainly for + module developers. If insure, say N. + config MODVERSIONS bool "Module versioning support" help diff --git a/kernel/kthread.c b/kernel/kthread.c index 3d3de63..5c53817 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -38,6 +38,9 @@ struct kthread_create_info struct kthread { int should_stop; +#ifdef CONFIG_MODULE_KTHREAD_CHECK + void *fn; +#endif void *data; struct completion exited; }; @@ -45,6 +48,24 @@ struct kthread { #define to_kthread(tsk)\ container_of((tsk)->vfork_done, struct kthread, exited) +#ifdef CONFIG_MODULE_KTHREAD_CHECK + +unsigned long get_kthread_func(struct task_struct *tsk) +{ + struct kthread *kt; + unsigned long addr; + + get_task_struct(tsk); + BUG_ON(!(tsk->flags & PF_KTHREAD)); + kt = to_kthread(tsk); + barrier(); + addr = tsk->vfork_done ? (unsigned long)kt->fn : 0UL; + put_task_struct(tsk); + return addr; +} + +#endif /* CONFIG_MODULE_KTHREAD_CHECK */ + /** * kthread_should_stop - should this kthread return now? * @@ -106,6 +127,9 @@ static int kthread(void *_create) int ret; self.should_stop = 0; +#ifdef CONFIG_MODULE_KTHREAD_CHECK + self.fn = threadfn; +#endif self.data = data; init_completion(&self.exited); current->vfork_done = &self.exited; diff --git a/kernel/module.c b/kernel/module.c index 2c93276..fe6637b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -764,6 +765,49 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } +#ifdef CONFIG_KALLSYMS +static const char *get_ksymbol(struct module *mod, unsigned long addr, + unsigned long *size, unsigned long *offset); +#else +#define get_ksymbol(mod, addr, size, offset) NULL +#endif + +#ifdef CONFIG_MODULE_KTHREAD_CHECK + +static void check_kthreads(struct module *mod) +{ + unsigned long flags; + struct task_struct *g, *p; + + read_lock_irqsave(&tasklist_lock, flags); + do_each_thread(g, p) { + const char *name; + unsigned long addr, offset, size; + + /* Note kthreadd is special. Other kthreads should + have their 'struct kthread' on the stack until + do_exit() calls schedule() for the last time. */ + if (p->mm || p == kthreadd_task) + continue; + + addr = get_kthread_func(p); + if (__module_text_address(addr) == mod) { + name = get_ksymbol(mod, ad
[RFC PATCH] hrtimers: system-wide and per-task hrtimer slacks
This patch proposes a system-wide sysctl-aware default for the high-resolution timer slack value, which may be changed from 0 to HRTIMER_MAX_SLACK nanoseconds. Default system-wide and per-task values are HRTIMER_DEFAULT_SLACK. Per-task value isn't inherited across fork(); instead, newborn task uses system-wide value by default, and newborn thread uses it's group leader value. Signed-off-by: Dmitry Antipov --- Documentation/sysctl/kernel.txt |8 include/linux/hrtimer.h | 11 +++ include/linux/init_task.h |2 +- include/linux/sched.h | 11 --- kernel/fork.c |9 +++-- kernel/futex.c |4 ++-- kernel/hrtimer.c| 10 +++--- kernel/sys.c|8 +--- kernel/sysctl.c | 10 ++ 9 files changed, 59 insertions(+), 14 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d78841..83b63ed 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -606,6 +606,14 @@ can be ORed together: == +timer_slack: + +This value can be used to query and set the default slack for +high-resolution timers, in nanoseconds. The default value is 50 +microseconds, and can be changed from 0 nanoseconds to 1 millisecond. + +== + unknown_nmi_panic: The value in this file affects behavior of handling NMI. When the diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0dc30..b9da137 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -24,6 +24,16 @@ #include #include +/* + * Default system-wide and per-task hrtimer slack, in nanoseconds. + */ +#define HRTIMER_DEFAULT_SLACK 5 + +/* + * Reasonable limit for hrtimer slack, in nanoseconds. + */ +#define HRTIMER_MAX_SLACK 100 + struct hrtimer_clock_base; struct hrtimer_cpu_base; @@ -323,6 +333,7 @@ extern ktime_t ktime_get_monotonic_offset(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +extern int default_timer_slack_ns; /* Exported timer functions: */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c66b1a..b29be0d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -178,7 +178,7 @@ extern struct cred init_cred; .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),\ - .timer_slack_ns = 5, /* 50 usec default slack */\ + .timer_slack_ns = HRTIMER_DEFAULT_SLACK,\ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID),\ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d379a6..aa0a806 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1551,11 +1551,11 @@ struct task_struct { struct latency_record latency_record[LT_SAVECOUNT]; #endif /* -* time slack values; these are used to round up poll() and -* select() etc timeout values. These are in nanoseconds. +* High-resolution timer slack value, in nanoseconds. +* Used to round up poll()/select(), nanosleep, futex +* waiting, etc. timeout values of non-realtime tasks. */ unsigned long timer_slack_ns; - unsigned long default_timer_slack_ns; struct list_head*scm_work_list; #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -2628,6 +2628,11 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +static inline unsigned long task_timer_slack(struct task_struct *tsk) +{ + return rt_task(tsk) ? 0 : tsk->timer_slack_ns; +} + /* * Thread group CPU time accounting. */ diff --git a/kernel/fork.c b/kernel/fork.c index e2cd3e2..0f9a983 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1167,8 +1167,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif - - p->default_timer_slack_ns = current->timer_slack_ns; + /* +* New thread inherits the slack from the group +* leader. New process uses system-default slack. +*/ + p->timer_slack_ns = (clone_flags & CLONE_THREAD) ? + current->group_leader->timer_slack_ns : + default_timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); diff --git a/kernel/futex.c b/kernel/futex.c index 1614be2..a0d302d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1887,7 +1887,7 @@ static int futex_wait(u32 __user *