date:20121108

[PATCH] of/fdt: add const to char *pathp in of_scan_flat_dt()

2012-11-08 Thread Jingoo Han

Fixed build warning as below:

drivers/of/fdt.c: In function 'of_scan_flat_dt':
drivers/of/fdt.c:490:10: warning: assignment discards 'const' qualifier from 
pointer target type [enabled by default]

Signed-off-by: Jingoo Han 
---
This patch is based on linux-next-20121109 code tree.

 drivers/of/fdt.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 0ca0f9e..738228f 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -459,7 +459,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 
do {
u32 tag = be32_to_cpup((__be32 *)p);
-   char *pathp;
+   const char *pathp;
 
p += 4;
if (tag == OF_DT_END_NODE) {
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 04/10] pwm: pwm-tiecap: Add device-tree binding support for APWM driver

2012-11-08 Thread Thierry Reding

On Thu, Nov 08, 2012 at 01:23:11PM +0530, Philip, Avinash wrote:
> This patch
> 1. Add support for device-tree binding for ECAP APWM driver.
> 2. Set size of pwm-cells set to 3 to support PWM channel number, PWM
>period & polarity configuration from device tree.
> 3. Add enable/disable clock gating in PWM subsystem common config space.
> 4. When here set .owner member in platform_driver structure to
>THIS_MODULE.
> 
> Signed-off-by: Philip, Avinash 
> Cc:   Grant Likely 
> Cc: Rob Herring 
> Cc: Rob Landley 
> ---
> Changes since v1:
>   - Add separate patch for pinctrl support
>   - Add conditional check for PWM subsystem clock enable.
>   - Combined with HWMOD changes & DT bindings.
>   - Remove the custom of xlate support.
> 
> :00 100644 000... fe24cac... A
> Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
> :100644 100644 d6d4cf0... 0d43266... Mdrivers/pwm/pwm-tiecap.c
>  .../devicetree/bindings/pwm/pwm-tiecap.txt |   22 +
>  drivers/pwm/pwm-tiecap.c   |   48 
> +++-
>  2 files changed, 69 insertions(+), 1 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt 
> b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
> new file mode 100644
> index 000..fe24cac
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
> @@ -0,0 +1,22 @@
> +TI SOC ECAP based APWM controller
> +
> +Required properties:
> +- compatible: Must be "ti,am33xx-ecap"
> +- #pwm-cells: Should be 3. Number of cells being used to specify PWM 
> property.
> +  First cell specifies the per-chip index of the PWM to use, the second
> +  cell is the period cycle in nanoseconds and bit 0 in the third cell is

I think this should be "period in nanoseconds". I haven't heard "period
cycle" before.

> +  used to encode the polarity of PWM output.

Maybe you should explicitly say how this is encoded.

> +- reg: physical base address and size of the registers map.
> +
> +Optional properties:
> +- ti,hwmods: Name of the hwmod associated to the ECAP:
> +  "ecap",  being the 0-based instance number from the HW spec
> +
> +Example:
> +
> +ecap0: ecap@0 {
> + compatible = "ti,am33xx-ecap";
> + #pwm-cells = <3>;
> + reg = <0x48300100 0x80>;
> + ti,hwmods = "ecap0";
> +};
> diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
> index d6d4cf0..0d43266 100644
> --- a/drivers/pwm/pwm-tiecap.c
> +++ b/drivers/pwm/pwm-tiecap.c
> @@ -25,6 +25,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
> +#include "tipwmss.h"
>  
>  /* ECAP registers and bits definitions */
>  #define CAP1 0x08
> @@ -37,6 +40,13 @@
>  #define ECCTL2_SYNC_SEL_DISA (BIT(7) | BIT(6))
>  #define ECCTL2_TSCTR_FREERUN BIT(4)
>  
> +#define ECAPCLK_EN   BIT(0)
> +#define ECAPCLK_STOP_REQ BIT(1)

This one doesn't seem to align with the rest. Also, why is bit 0 called
_EN and bit 1 _STOP_REQ? Couldn't they be made more consistent, i.e.
_START and _STOP? Or _ENABLE and _DISABLE?

> +
> +#define ECAPCLK_EN_ACK   BIT(0)
> +
> +#define PWM_CELL_SIZE3

You don't need a define for this.

> +
>  struct ecap_pwm_chip {
>   struct pwm_chip chip;
>   unsigned intclk_rate;
> @@ -184,6 +194,16 @@ static const struct pwm_ops ecap_pwm_ops = {
>   .owner  = THIS_MODULE,
>  };
>  
> +#ifdef CONFIG_OF
> +static const struct of_device_id ecap_of_match[] = {
> + {
> + .compatible = "ti,am33xx-ecap",
> + },
> + {},
> +};
> +MODULE_DEVICE_TABLE(of, ecap_of_match);
> +#endif
> +

I'm not sure if I remember correctly, but wasn't AM33xx support supposed
to be DT only? In that case you don't need the CONFIG_OF guards.

>  static int __devinit ecap_pwm_probe(struct platform_device *pdev)

__devinit can go away.

>  {
>   int ret;
> @@ -211,6 +231,7 @@ static int __devinit ecap_pwm_probe(struct 
> platform_device *pdev)
>  
>   pc->chip.dev = &pdev->dev;
>   pc->chip.ops = &ecap_pwm_ops;
> + pc->chip.of_pwm_n_cells = PWM_CELL_SIZE;
>   pc->chip.base = -1;
>   pc->chip.npwm = 1;
>  
> @@ -231,14 +252,37 @@ static int __devinit ecap_pwm_probe(struct 
> platform_device *pdev)
>   }
>  
>   pm_runtime_enable(&pdev->dev);
> + pm_runtime_get_sync(&pdev->dev);

Maybe put a blank line after this for readability.

> + if (!(pwmss_submodule_state_change(pdev->dev.parent, ECAPCLK_EN) &
> + ECAPCLK_EN_ACK)) {

This is very hard to read, can you split this up into something like the
following please?

status = pwmss_submodule_state_change(pdev->dev.parent, ECAPCLK_EN);
if (!(status & ECAPCLK_EN_ACK)) {
...
}

> + dev_err(&pdev->dev, "PWMSS config space clock enable 
> failure\n");
> + ret = -EINVAL;
> + goto pwmss_clk_failure;
> + }
> + pm_runtime_put_sync(&pdev->dev);

An

Re: [PATCH] virtio-scsi: Fix incorrect lock release order in virtscsi_kick_cmd

2012-11-08 Thread Wanlong Gao

On 11/09/2012 02:29 PM, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger 
> 
> This patch fixes a regression bug in virtscsi_kick_cmd() that relinquishes
> the acquired spinlocks in the incorrect order using the wrong spin_unlock
> macros, namely releasing vq->vq_lock before tgt->tgt_lock while invoking
> the calls to virtio_ring.c:virtqueue_add_buf() and friends.
> 
> This bug was originally introduced in v3.5-rc7 code with:
> 
> commit 2bd37f0fde99cbf8b78fb55f1128e8c3a63cf1da
> Author: Paolo Bonzini 
> Date:   Wed Jun 13 16:56:34 2012 +0200
> 
> [SCSI] virtio-scsi: split scatterlist per target
> 
> Go ahead and make sure that vq->vq_lock is relinquished w/ spin_unlock
> first, then release tgt->tgt_lock w/ spin_unlock_irqrestore.

Did you hit any error? I don't think this order is wrong.

Thanks,
Wanlong Gao

> 
> Cc: Paolo Bonzini 
> Cc: James Bottomley 
> Cc: Christoph Hellwig 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Nicholas Bellinger 
> ---
>  drivers/scsi/virtio_scsi.c |4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index 595af1a..b2abb8a 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -417,11 +417,11 @@ static int virtscsi_kick_cmd(struct 
> virtio_scsi_target_state *tgt,
>  
>   spin_lock(&vq->vq_lock);
>   ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
> - spin_unlock(&tgt->tgt_lock);
> + spin_unlock(&vq->vq_lock);
>   if (ret >= 0)
>   ret = virtqueue_kick_prepare(vq->vq);
>  
> - spin_unlock_irqrestore(&vq->vq_lock, flags);
> + spin_unlock_irqrestore(&tgt->tgt_lock, flags);
>  
>   if (ret > 0)
>   virtqueue_notify(vq->vq);
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] mm: mmap: remove unused variable

2012-11-08 Thread Jingoo Han

Fixed build warning as below:

arch/arm/mm/mmap.c: In function 'arch_get_unmapped_area':
arch/arm/mm/mmap.c:60:16: warning: unused variable 'start_addr' 
[-Wunused-variable]

Signed-off-by: Jingoo Han 
---
This patch is based on linux-next-20121109 code tree.

 arch/arm/mm/mmap.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index f4fec6d..10062ce 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -57,7 +57,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
-   unsigned long start_addr;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
struct vm_unmapped_area_info info;
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] loop: Limit the number of requests in the bio list

2012-11-08 Thread Jens Axboe

On 2012-11-08 20:14, Andrew Morton wrote:
> On Tue, 16 Oct 2012 11:21:45 +0200
> Lukas Czerner  wrote:
> 
>> Currently there is not limitation of number of requests in the loop bio
>> list. This can lead into some nasty situations when the caller spawns
>> tons of bio requests taking huge amount of memory. This is even more
>> obvious with discard where blkdev_issue_discard() will submit all bios
>> for the range and wait for them to finish afterwards. On really big loop
>> devices and slow backing file system this can lead to OOM situation as
>> reported by Dave Chinner.
>>
>> With this patch we will wait in loop_make_request() if the number of
>> bios in the loop bio list would exceed 'nr_requests' number of requests.
>> We'll wake up the process as we process the bios form the list. Some
>> threshold hysteresis is in place to avoid high frequency oscillation.
>>
> 
> What's happening with this?

Sorry I didn't reply to this yet. My initial thought is that we had
something like this for loop back in the 2.4 days, and it was deadlock
prone. Can't seem to remember all the details on that yet.

v2 is a nice improvement, though. With 1:1 bio and wakeups, you would
get tons of context switches. The batched approach is much better.

Lukas, have you beaten on this with a file backed loop and heavy traffic
on a file system on top?

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] misc: apds9802als: Fix checking timeout in als_wait_for_data_ready()

2012-11-08 Thread Axel Lin

In the case of timeout waiting for data ready, the retry variable is -1.
This also fixes a bug: current code returns -ETIMEDOUT if latest retry success
( which means retry is 0 when exiting the while loop ).

Signed-off-by: Axel Lin 
---
 drivers/misc/apds9802als.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
index 0314773..94923d2 100644
--- a/drivers/misc/apds9802als.c
+++ b/drivers/misc/apds9802als.c
@@ -68,7 +68,7 @@ static int als_wait_for_data_ready(struct device *dev)
ret = i2c_smbus_read_byte_data(client, 0x86);
} while (!(ret & 0x80) && retry--);
 
-   if (!retry) {
+   if (retry < 0) {
dev_warn(dev, "timeout waiting for data ready\n");
return -ETIMEDOUT;
}
-- 
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/8] aoe: avoid running request handler on plugged queue

2012-11-08 Thread Jens Axboe

On 2012-11-09 01:17, Ed Cashin wrote:
> Calling the request handler directly on a plugged queue defeats
> the performance improvements provided by the plugging mechanism.
> Use the __blk_run_queue function instead of calling the request
> handler directly, so that we don't interfere with the block
> layer's ability to plug the queue.

Thanks Ed, applied!

-- 
Jens Axboe

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 01/10] PWMSS: Add PWM Subsystem driver for parent<->child relationship

2012-11-08 Thread Thierry Reding

On Thu, Nov 08, 2012 at 01:23:08PM +0530, Philip, Avinash wrote:
> diff --git a/Documentation/devicetree/bindings/pwm/tipwmss.txt 
> b/Documentation/devicetree/bindings/pwm/tipwmss.txt
> new file mode 100644
> index 000..b6c2814
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pwm/tipwmss.txt
> @@ -0,0 +1,30 @@
[...]
> +Also child nodes should also populated under PWMSS DT node.
> +Example:

Maybe put an blank line between these two lines for readability?

> +pwmss0: pwmss@4830 {
> + compatible = "ti,am33xx-pwmss";
> + reg = <0x4830 0x10
> + 0x48300100 0x80
> + 0x48300180 0x80
> + 0x48300200 0x80>;

I don't think you should list the register spaces of the children here.
From what I understand, all regions listed in the reg property are
supposed to be requested by the corresponding driver and therefore
cannot be used by any other device.

> + ti,hwmods = "epwmss0";
> + #address-cells = <1>;
> + #size-cells = <1>;
> + status = "disabled";
> + ranges;

I think to represent which memory regions go to the children, you should
put them in this ranges property, which would then look like this:

ranges = <0x48300100 0x48300100 0x80   /* ECAP */
  0x48300180 0x48300180 0x80   /* EQEP */
  0x48300200 0x48300200 0x80>; /* EHRPWM */

> +
> + /* child nodes go here */
> +};

Maybe you should actually list a full set of children here?

> diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
> index 6e556c7..384a346 100644
> --- a/drivers/pwm/Kconfig
> +++ b/drivers/pwm/Kconfig
> @@ -136,6 +136,7 @@ config PWM_TEGRA
>  config  PWM_TIECAP
>   tristate "ECAP PWM support"
>   depends on SOC_AM33XX
> + select PWM_TIPWMSS
>   help
> PWM driver support for the ECAP APWM controller found on AM33XX
> TI SOC
> @@ -146,6 +147,7 @@ config  PWM_TIECAP
>  config  PWM_TIEHRPWM
>   tristate "EHRPWM PWM support"
>   depends on SOC_AM33XX
> + select PWM_TIPWMSS
>   help
> PWM driver support for the EHRPWM controller found on AM33XX
> TI SOC
> @@ -153,6 +155,15 @@ config  PWM_TIEHRPWM
> To compile this driver as a module, choose M here: the module
> will be called pwm-tiehrpwm.
>  
> +config  PWM_TIPWMSS
> + tristate "TI PWM Subsytem parent support"
> + depends on SOC_AM33XX && (PWM_TIEHRPWM || PWM_TIECAP)

Since you select the symbol from the PWM_TIECAP and PWM_TIEHRPWM symbols
there is no need to depend on them, right? Oh, but maybe that's to make
sure the symbol is deselected automatically if neither user is selected.

Perhaps this should actually be a hidden symbol (i.e. leave away the
prompt string in the tristate option) since it's purely a dependency and
not useful of its own.

> + help
> +   PWM Subsystem driver support for AM33xx SOC.
> +
> +   PWM submodules require PWM config space access from submodule
> +   drivers and require common parent driver support.
> +
>  config PWM_TWL6030
>   tristate "TWL6030 PWM support"
>   depends on TWL4030_CORE
> diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
> index 3b3f4c9..55f6fb2 100644
> --- a/drivers/pwm/Makefile
> +++ b/drivers/pwm/Makefile
> @@ -12,5 +12,6 @@ obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o
>  obj-$(CONFIG_PWM_TEGRA)  += pwm-tegra.o
>  obj-$(CONFIG_PWM_TIECAP) += pwm-tiecap.o
>  obj-$(CONFIG_PWM_TIEHRPWM)   += pwm-tiehrpwm.o
> +obj-$(CONFIG_PWM_TIPWMSS)+= tipwmss.o

This should have a pwm- prefix as well.

>  obj-$(CONFIG_PWM_TWL6030)+= pwm-twl6030.o
>  obj-$(CONFIG_PWM_VT8500) += pwm-vt8500.o
> diff --git a/drivers/pwm/tipwmss.c b/drivers/pwm/tipwmss.c
> new file mode 100644
> index 000..c188348
> --- /dev/null
> +++ b/drivers/pwm/tipwmss.c
> @@ -0,0 +1,142 @@
[...]
> +#include "tipwmss.h"
> +
> +#define PWMSS_CLKCONFIG  0x8 /* Clock gaitng reg, for PWM 
> submodules */

"gating"

> +#define PWMSS_CLKSTATUS  0xc /* Clock gating status reg */
> +
> +struct pwmss_info {
> + void __iomem*mmio_base;
> + struct mutexpwmss_lock;
> + u16 pwmss_clkconfig;

The indentation looks weird on this last field.

> +};
> +
> +u16 pwmss_submodule_state_change(struct device *dev, int set)
> +{
> + struct pwmss_info *info = dev_get_drvdata(dev);
> + u16 val;
> +
> + val = readw(info->mmio_base + PWMSS_CLKCONFIG);
> + val |= set;
> + mutex_lock(&info->pwmss_lock);
> + writew(val , info->mmio_base + PWMSS_CLKCONFIG);
> + mutex_unlock(&info->pwmss_lock);

The mutex needs to span the whole read-modify-write sequence, not just
the write.

Also, how do you clear this state?

> + return readw(info->mmio_base + PWMSS_CLKSTATUS);
> +}
> +EXPORT_SYMBOL(pwmss_submodule_state_change);
> +
> +static const struct of_device_id pwmss_of_match[] = {
> + {
> + .compatible = "ti,am33xx-pwmss",
>

RE: [PATCH 2/3] [SCSI] mvsas: fix shift in mvs_94xx_free_reg_set()

2012-11-08 Thread Xiangliang Yu


> On 11/6/12 7:06 AM, James Bottomley wrote:
> >
> > Why is this necessary?  As I read the reg set assignment code, it finds
> > a free bit in the 64 bit register and uses that ... which can never be
> > greater than 64 so there's no need for the check.
> 
> This patch just tries to be more defensive for bit(reg_set) with a
> broken reg_set value.  I agree with you that it's not that necessary.

Agree with James, and just need to do NOT operation one time

> 
> > The other two look OK (probably redone as a single patch with a stable
> > tag), but I'd like the input of the mvs people since it seems with the
> > current code, we only use 32 bit regsets and probably hang if we go over
> > that.  The bug fix is either to enable the full 64 if it works, or
> > possibly cap at 32 ... what works with all released devices?
> 
> Thanks for reviewing.  Yeah we'd better to wait for the input from
> the mvs people.

About patch 3, I check the ffz code and found it will check ~0 conditions.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] PCI: Fix bit definitions of PCI_EXP_LNKCAP2 register

2012-11-08 Thread Jingoo Han

According to the PCIe 3.0 spec, PCI_EXP_LNKCAP2_SLS_2_5GB is
1st bit of PCI_EXP_LNKCAP2 register, not 0th bit. So, the bit
definition of supported link speed vector should be fixed.

Signed-off-by: Jingoo Han 
---
 include/uapi/linux/pci_regs.h |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 20ae747..14a3184 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -544,9 +544,9 @@
 #define  PCI_EXP_OBFF_WAKE_EN  0x6000  /* OBFF using WAKE# signaling */
 #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44  /* v2 endpoints end here */
 #define PCI_EXP_LNKCAP244  /* Link Capability 2 */
-#define  PCI_EXP_LNKCAP2_SLS_2_5GB 0x01/* Current Link Speed 2.5GT/s */
-#define  PCI_EXP_LNKCAP2_SLS_5_0GB 0x02/* Current Link Speed 5.0GT/s */
-#define  PCI_EXP_LNKCAP2_SLS_8_0GB 0x04/* Current Link Speed 8.0GT/s */
+#define  PCI_EXP_LNKCAP2_SLS_2_5GB 0x02/* Current Link Speed 2.5GT/s */
+#define  PCI_EXP_LNKCAP2_SLS_5_0GB 0x04/* Current Link Speed 5.0GT/s */
+#define  PCI_EXP_LNKCAP2_SLS_8_0GB 0x08/* Current Link Speed 8.0GT/s */
 #define  PCI_EXP_LNKCAP2_CROSSLINK 0x100 /* Crosslink supported */
 #define PCI_EXP_LNKCTL248  /* Link Control 2 */
 #define PCI_EXP_LNKSTA250  /* Link Status 2 */
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] virtio-scsi: Fix incorrect lock release order in virtscsi_kick_cmd

2012-11-08 Thread Nicholas A. Bellinger

From: Nicholas Bellinger 

This patch fixes a regression bug in virtscsi_kick_cmd() that relinquishes
the acquired spinlocks in the incorrect order using the wrong spin_unlock
macros, namely releasing vq->vq_lock before tgt->tgt_lock while invoking
the calls to virtio_ring.c:virtqueue_add_buf() and friends.

This bug was originally introduced in v3.5-rc7 code with:

commit 2bd37f0fde99cbf8b78fb55f1128e8c3a63cf1da
Author: Paolo Bonzini 
Date:   Wed Jun 13 16:56:34 2012 +0200

[SCSI] virtio-scsi: split scatterlist per target

Go ahead and make sure that vq->vq_lock is relinquished w/ spin_unlock
first, then release tgt->tgt_lock w/ spin_unlock_irqrestore.

Cc: Paolo Bonzini 
Cc: James Bottomley 
Cc: Christoph Hellwig 
Cc: sta...@vger.kernel.org
Signed-off-by: Nicholas Bellinger 
---
 drivers/scsi/virtio_scsi.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 595af1a..b2abb8a 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -417,11 +417,11 @@ static int virtscsi_kick_cmd(struct 
virtio_scsi_target_state *tgt,
 
spin_lock(&vq->vq_lock);
ret = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
-   spin_unlock(&tgt->tgt_lock);
+   spin_unlock(&vq->vq_lock);
if (ret >= 0)
ret = virtqueue_kick_prepare(vq->vq);
 
-   spin_unlock_irqrestore(&vq->vq_lock, flags);
+   spin_unlock_irqrestore(&tgt->tgt_lock, flags);
 
if (ret > 0)
virtqueue_notify(vq->vq);
-- 
1.7.2.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [next:akpm 157/313] arch/tile/mm/hugetlbpage.c:256:20: error: 'mm' undeclared

2012-11-08 Thread Michel Lespinasse

On Fri, Nov 09, 2012 at 01:51:54PM +0800, kbuild test robot wrote:
> tree:   git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm
> head:   6703d59701f7592c897b975de3952ba6bbb98b83
> commit: 921a29df6a2bab7c1bfbb11eb5648b92bdc250e8 [157/313] mm: use 
> vm_unmapped_area() in hugetlbfs on tile architecture
> config: make ARCH=tile tilegx_defconfig
> 
> All error/warnings:
> 
> arch/tile/mm/hugetlbpage.c: In function 'hugetlb_get_unmapped_area_topdown':
> arch/tile/mm/hugetlbpage.c:256:20: error: 'mm' undeclared (first use in this 
> function)
> arch/tile/mm/hugetlbpage.c:256:20: note: each undeclared identifier is 
> reported only once for each function it appears in

commit 86234092170b43771c3f6257cb320ff6e2c10c52
Author: Michel Lespinasse 
Date:   Thu Nov 8 22:13:58 2012 -0800

fix mm: use vm_unmapped_area() in hugetlbfs on tile architecture

diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 6f74cce053e1..650ccff8378c 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -253,7 +253,7 @@ static unsigned long 
hugetlb_get_unmapped_area_topdown(struct file *file,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
-   info.high_limit = mm->mmap_base;
+   info.high_limit = current->mm->mmap_base;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [next:akpm 136/313] mm/mmap.c:1878:6: error: 'mm' undeclared

2012-11-08 Thread Michel Lespinasse

On Fri, Nov 09, 2012 at 01:59:21PM +0800, kbuild test robot wrote:
> tree:   git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm
> head:   6703d59701f7592c897b975de3952ba6bbb98b83
> commit: db877c95d306d688818542d49e9b63eb7a3b0894 [136/313] mm: augment vma 
> rbtree with rb_subtree_gap
> config: make ARCH=ia64 alldefconfig
> 
> All error/warnings:
> 
> mm/mmap.c: In function 'expand_upwards':
> mm/mmap.c:1878:6: error: 'mm' undeclared (first use in this function)
> mm/mmap.c:1878:6: note: each undeclared identifier is reported only once for 
> each function it appears in

commit 34550b95185c1ecfa8882664744c14edda385868
Author: Michel Lespinasse 
Date:   Thu Nov 8 22:14:34 2012 -0800

fix mm: augment vma rbtree with rb_subtree_gap

diff --git a/mm/mmap.c b/mm/mmap.c
index d12c69eaf23f..0b8f9d83e2e2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2032,7 +2032,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned 
long address)
if (vma->vm_next)
vma_gap_update(vma->vm_next);
else
-   mm->highest_vm_end = address;
+   vma->vm_mm->highest_vm_end = address;
perf_event_mmap(vma);
}
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/4] thermal: Add new thermal trend type to support quick cooling

2012-11-08 Thread Amit Kachhap

On 9 November 2012 09:21, Zhang Rui  wrote:
> On Thu, 2012-11-08 at 11:56 +0530, Amit Kachhap wrote:
>> On 8 November 2012 11:31, Zhang Rui  wrote:
>> > On Thu, 2012-11-08 at 09:56 +0530, Amit Daniel Kachhap wrote:
>> >> This modification adds 2 new thermal trend type THERMAL_TREND_RAISE_FULL
>> >> and THERMAL_TREND_DROP_FULL. This thermal trend can be used to quickly
>> >> jump to the upper or lower cooling level instead of incremental increase
>> >> or decrease.
>> >
>> > IMO, what we need is a new more aggressive cooling governor which always
>> > uses upper limit when the temperature is raising and lower limit when
>> > the temperature is dropping.
>> Yes I agree that a new aggressive governor is the best approach but
>> then i thought adding a new trend type is a simple solution to achieve
>> this and since most of the governor logic might be same as the
>> step-wise governor. I have no objection in doing it through governor.
>> >
> hmmm,
> I think a more proper way is to set the cooling state to upper limit
> when it overheats and reduce the cooling state step by step when the
> temperature drops.

No actually I was thinking of having a  simple governor with a feature
like it only sets to upper level and lower level. Also since the
temperature sensor is capable of interrupting for both increase in
threshold(say 100C)  and fall in threshold (say 90C), so polling or
step increments is not needed at all.
Currently stepwise governor governor does that so we might change the
macro name as,
THERMAL_TREND_RAISE_STEP,
THERMAL_TREND_DROP_STEP,
THERMAL_TREND_RAISE_MAX,
THERMAL_TREND_DROP_MAX,

and file step_wise.c can be named as state_wise.c or trend_wise.c.

I am not sure if it is the best way . How do you feel ?

> what do you think?
>
> thanks,
> rui
>
>> > I can write such a governor if you do not have time to.
>> ok. thanks
>> >
>> > thanks,
>> > rui
>> >>  This is needed for temperature sensors which support rising/falling
>> >> threshold interrupts and polling can be totally avoided.
>> >>
>> >
>> >
>> >> Signed-off-by: Amit Daniel Kachhap 
>> >> Signed-off-by: Amit Daniel Kachhap 
>> >> ---
>> >>  drivers/thermal/step_wise.c |   19 +++
>> >>  include/linux/thermal.h |2 ++
>> >>  2 files changed, 17 insertions(+), 4 deletions(-)
>> >>
>> >> diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
>> >> index 1242cff..0d2d8d6 100644
>> >> --- a/drivers/thermal/step_wise.c
>> >> +++ b/drivers/thermal/step_wise.c
>> >> @@ -35,6 +35,10 @@
>> >>   *   state for this trip point
>> >>   *b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
>> >>   *   state for this trip point
>> >> + *c. if the trend is THERMAL_TREND_RAISE_FULL, use highest cooling
>> >> + *   state for this trip point
>> >> + *d. if the trend is THERMAL_TREND_DROP_FULL, use lowest cooling
>> >> + *   state for this trip point
>> >>   */
>> >>  static unsigned long get_target_state(struct thermal_instance *instance,
>> >>   enum thermal_trend trend)
>> >> @@ -50,7 +54,10 @@ static unsigned long get_target_state(struct 
>> >> thermal_instance *instance,
>> >>   } else if (trend == THERMAL_TREND_DROPPING) {
>> >>   cur_state = cur_state > instance->lower ?
>> >>   (cur_state - 1) : instance->lower;
>> >> - }
>> >> + } else if (trend == THERMAL_TREND_RAISE_FULL)
>> >> + cur_state = instance->upper;
>> >> + else if (trend == THERMAL_TREND_DROP_FULL)
>> >> + cur_state = instance->lower;
>> >>
>> >>   return cur_state;
>> >>  }
>> >> @@ -87,7 +94,8 @@ static void update_instance_for_throttle(struct 
>> >> thermal_zone_device *tz,
>> >>  }
>> >>
>> >>  static void update_instance_for_dethrottle(struct thermal_zone_device 
>> >> *tz,
>> >> - int trip, enum thermal_trip_type trip_type)
>> >> + int trip, enum thermal_trip_type trip_type,
>> >> + enum thermal_trend trend)
>> >>  {
>> >>   struct thermal_instance *instance;
>> >>   struct thermal_cooling_device *cdev;
>> >> @@ -101,7 +109,10 @@ static void update_instance_for_dethrottle(struct 
>> >> thermal_zone_device *tz,
>> >>   cdev = instance->cdev;
>> >>   cdev->ops->get_cur_state(cdev, &cur_state);
>> >>
>> >> - instance->target = cur_state > instance->lower ?
>> >> + if (trend == THERMAL_TREND_DROP_FULL)
>> >> + instance->target = instance->lower;
>> >> + else
>> >> + instance->target = cur_state > instance->lower ?
>> >>   (cur_state - 1) : THERMAL_NO_TARGET;
>> >>
>> >>   /* Deactivate a passive thermal instance */
>> >> @@ -133,7 +144,7 @@ static void thermal_zone_trip_update(struct 
>> >> thermal_zone_device *tz, int trip)
>> >>   if (tz->temperature >= trip_temp)
>> >>

Re: [PATCH 4/4] DMA: PL330: add device tree property for DMA_MEMCPY capability

2012-11-08 Thread Jassi Brar

On 30 October 2012 14:51, Bartlomiej Zolnierkiewicz
 wrote:
>
> Hi,
>
> On Monday 29 October 2012 22:45:48 Jassi Brar wrote:
>> On Mon, Oct 29, 2012 at 10:59 AM, Bartlomiej Zolnierkiewicz
>>  wrote:
>> > * Add device tree (DT) property ("pl330,dma-memcpy") for DMA_MEMCPY
>> >   capability and instead of setting this capability unconditionally
>> >   in pl330_probe() do it only when property is present.
>> >
>> Perhaps we should pass the array of peripheral interfaces via DT, the
>> lack of which could imply MEMCPY capability ? (while it works, I doubt
>> if pl330 is supposed to have SLAVE and MEMCPY capabilities in any
>> instance)
>
> In case of PL330 on EXYNOS4 we have two interfaces with SLAVE capability
> and one interface with MEMCPY capability.  Could you please explain more
> the idea of passing the array of peripherals through DT so we can detect
> which interface has MEMCPY capability?
>
The DT node of a 'pdma' should have the array of indices of
peripherals it caters to (what is currently peri_id of 'struct
dma_pl330_platdata'). The array would be missing in the DT node of
'mdma' since all channels are equal.
During probe if the array, say as property 'peri_map', is missing from
DT node of the dmac, that would imply the dmac is 'mdma' and hence the
pl330.c sets DMA_MEMCPY in its cap_mask. Otherwise the peri_map
implies a 'pdma' and hence SLAVE|CYCLIC is set.


>> That would also be a step towards discarding "struct dma_pl330_platdata".
>
> I don't know if getting rid of "struct dma_pl330_platdata" is possible
> but we still need to come up with some way to pass the needed information
> through DT.  Do you have an idea how it could be done?
>
struct dma_pl330_platdata {
  u8 nr_valid_peri;
  u8 *peri_id;
  As explain above, these two should move to DT node of the dma controller.

  dma_cap_mask_t cap_mask;
  Should be set in pl330.c : MEMCPY for mdma,  SLAVE|CYCLIC for pdma

  unsigned mcbuf_sz;
  Currently unused and already safe enough default value set in driver.
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V2] gpio: tegra: read output value when gpio is set in direction_out

2012-11-08 Thread Laxman Dewangan

Read the output value when gpio is set for the output mode for
gpio_get_value(). Reading input value in direction out does not
give correct value.

Signed-off-by: Laxman Dewangan 
---
Changes from V1:
- Keep the if part implementation same the else part to have implementation
  same kind.

 drivers/gpio/gpio-tegra.c |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index c7c175a..1163cf1 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -135,6 +135,11 @@ static void tegra_gpio_set(struct gpio_chip *chip, 
unsigned offset, int value)
 
 static int tegra_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
+   /* If gpio is in output mode then read from the out value */
+   if ((tegra_gpio_readl(GPIO_OE(offset)) >> GPIO_BIT(offset)) & 1)
+   return (tegra_gpio_readl(GPIO_OUT(offset)) >>
+   GPIO_BIT(offset)) & 0x1;
+
return (tegra_gpio_readl(GPIO_IN(offset)) >> GPIO_BIT(offset)) & 0x1;
 }
 
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] uio_pdrv: set memory mapping name

2012-11-08 Thread Manuel Traut

If uio_pdrv[_genirq] is used, the uio maps have currently no name set.
This patch sets the uio_mem name to the name of the memory resource.
 
Signed-off-by: Manuel Traut 
Reported-by: Stefan Staedtler 
Tested-by: Stefan Staedtler 

diff --git a/drivers/uio/uio_pdrv_genirq.c b/drivers/uio/uio_pdrv_genirq.c
index 42202cd..ac988ce 100644
--- a/drivers/uio/uio_pdrv_genirq.c
+++ b/drivers/uio/uio_pdrv_genirq.c
@@ -172,6 +172,7 @@ static int uio_pdrv_genirq_probe(struct platform_device 
*pdev)
uiomem->memtype = UIO_MEM_PHYS;
uiomem->addr = r->start;
uiomem->size = resource_size(r);
+   uiomem->name = r->name;
++uiomem;
}
 
diff --git a/drivers/uio/uio_pdrv.c b/drivers/uio/uio_pdrv.c
index 72d3646..39be9e0 100644
--- a/drivers/uio/uio_pdrv.c
+++ b/drivers/uio/uio_pdrv.c
@@ -60,6 +60,7 @@ static int uio_pdrv_probe(struct platform_device *pdev)
uiomem->memtype = UIO_MEM_PHYS;
uiomem->addr = r->start;
uiomem->size = resource_size(r);
+   uiomem->name = r->name;
++uiomem;
}
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] vmxnet3: convert BUG_ON(true) into a simple BUG()

2012-11-08 Thread Shreyas Bhatewara


- Original Message -
> Signed-off-by: Sasha Levin 
> ---
>  drivers/net/vmxnet3/vmxnet3_drv.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Signed-off-by: Shreyas N Bhatewara 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/3] perf annotate: Merge same lines in summary view

2012-11-08 Thread Namhyung Kim

From: Namhyung Kim 

The --print-line option of perf annotate command shows summary for
each source line.  But it didn't merge same lines so that it can
appear multiple times.

* before:

Sorted summary for file /home/namhyung/bin/mcol
--
   21.71 /home/namhyung/tmp/mcol.c:26
   20.66 /home/namhyung/tmp/mcol.c:25
9.53 /home/namhyung/tmp/mcol.c:24
7.68 /home/namhyung/tmp/mcol.c:25
7.67 /home/namhyung/tmp/mcol.c:25
7.66 /home/namhyung/tmp/mcol.c:26
7.49 /home/namhyung/tmp/mcol.c:26
6.92 /home/namhyung/tmp/mcol.c:25
6.81 /home/namhyung/tmp/mcol.c:25
1.07 /home/namhyung/tmp/mcol.c:26
0.52 /home/namhyung/tmp/mcol.c:25
0.51 /home/namhyung/tmp/mcol.c:25
0.51 /home/namhyung/tmp/mcol.c:24

* after:

Sorted summary for file /home/namhyung/bin/mcol
--
   50.77 /home/namhyung/tmp/mcol.c:25
   37.94 /home/namhyung/tmp/mcol.c:26
   10.04 /home/namhyung/tmp/mcol.c:24

To do that, introduce percent_sum field so that the normal
line-by-line output doesn't get changed.

Signed-off-by: Namhyung Kim 
---
* v3: Renew output in changelog

* v2: ->percent_sum should be used when resorting.

 tools/perf/util/annotate.c | 55 +++---
 tools/perf/util/annotate.h |  1 +
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 83b1078260e3..180113b891a3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -887,12 +887,41 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
struct source_line *iter;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
+   int ret;
 
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct source_line, node);
 
-   if (src_line->percent > iter->percent)
+   ret = strcmp(iter->path, src_line->path);
+   if (ret == 0) {
+   iter->percent_sum += src_line->percent;
+   return;
+   }
+
+   if (ret < 0)
+   p = &(*p)->rb_left;
+   else
+   p = &(*p)->rb_right;
+   }
+
+   src_line->percent_sum = src_line->percent;
+
+   rb_link_node(&src_line->node, parent, p);
+   rb_insert_color(&src_line->node, root);
+}
+
+static void __resort_source_line(struct rb_root *root, struct source_line 
*src_line)
+{
+   struct source_line *iter;
+   struct rb_node **p = &root->rb_node;
+   struct rb_node *parent = NULL;
+
+   while (*p != NULL) {
+   parent = *p;
+   iter = rb_entry(parent, struct source_line, node);
+
+   if (src_line->percent_sum > iter->percent_sum)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -902,6 +931,24 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
rb_insert_color(&src_line->node, root);
 }
 
+static void resort_source_line(struct rb_root *dest_root, struct rb_root 
*src_root)
+{
+   struct source_line *src_line;
+   struct rb_node *node;
+
+   node = rb_first(src_root);
+   while (node) {
+   struct rb_node *next;
+
+   src_line = rb_entry(node, struct source_line, node);
+   next = rb_next(node);
+   rb_erase(node, src_root);
+
+   __resort_source_line(dest_root, src_line);
+   node = next;
+   }
+}
+
 static void symbol__free_source_line(struct symbol *sym, int len)
 {
struct annotation *notes = symbol__annotation(sym);
@@ -926,6 +973,7 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
struct source_line *src_line;
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evidx);
+   struct rb_root tmp_root = RB_ROOT;
 
if (!h->sum)
return 0;
@@ -960,12 +1008,13 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
goto next;
 
strcpy(src_line[i].path, path);
-   insert_source_line(root, &src_line[i]);
+   insert_source_line(&tmp_root, &src_line[i]);
 
next:
pclose(fp);
}
 
+   resort_source_line(root, &tmp_root);
return 0;
 }
 
@@ -989,7 +1038,7 @@ static void print_summary(struct rb_root *root, const char 
*filename)
char *path;
 
src_line = rb_entry(node, struct source_line, node);
-   percent = src_line->percent;
+   percent = src_line->percent_sum;
color = get_percent_color(percent);
path = src_line->path;
 
diff --git a/tools/perf/util/annotate.h b/tools/

RE: [PATCH 1/4] thermal: Add new thermal trend type to support quick cooling

2012-11-08 Thread R, Durgadoss

Hi Amit/Rui,

> -Original Message-
> From: Zhang, Rui
> Sent: Friday, November 09, 2012 9:21 AM
> To: Amit Kachhap
> Cc: linux...@lists.linux-foundation.org; linux-samsung-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; R, Durgadoss;
> l...@kernel.org; linux-a...@vger.kernel.org; jonghwa3@samsung.com
> Subject: Re: [PATCH 1/4] thermal: Add new thermal trend type to support
> quick cooling
> 
> On Thu, 2012-11-08 at 11:56 +0530, Amit Kachhap wrote:
> > On 8 November 2012 11:31, Zhang Rui  wrote:
> > > On Thu, 2012-11-08 at 09:56 +0530, Amit Daniel Kachhap wrote:
> > >> This modification adds 2 new thermal trend type
> THERMAL_TREND_RAISE_FULL
> > >> and THERMAL_TREND_DROP_FULL. This thermal trend can be used to
> quickly
> > >> jump to the upper or lower cooling level instead of incremental increase
> > >> or decrease.
> > >
> > > IMO, what we need is a new more aggressive cooling governor which
> always
> > > uses upper limit when the temperature is raising and lower limit when
> > > the temperature is dropping.
> > Yes I agree that a new aggressive governor is the best approach but
> > then i thought adding a new trend type is a simple solution to achieve
> > this and since most of the governor logic might be same as the
> > step-wise governor. I have no objection in doing it through governor.
> > >
> hmmm,
> I think a more proper way is to set the cooling state to upper limit
> when it overheats and reduce the cooling state step by step when the
> temperature drops.
> what do you think?

I have only one concern here: (mostly on Passive cooling cases)
Setting the cooling state to upper limit will surely help in rapid cooling,
but it will also disrupt the thermal steady state, and the performance might
be jittery.

Let me explain a bit:
On small form factors (like smartphones, tablets, netbooks), when we run
CPU intensive benchmarks, we can easily observe this jittery performance.

The CPU will run in a very high freq for few seconds(which means temperature is
well below trip point), and then switch back to very low frequency in the next
few seconds(which means temperature hit the trip point). This switch will keep
happening for every few seconds. So, the temperature never settles (say for 
example,
somewhere in the middle of [low CPU temp, CPU Trip temp]. 

I could see two reasons for this:
1. The poll delay: Between two successive polls, however small the poll 
delay(~20s) may be,
the CPU temperature can raise up to 15C (Just my observation)
2. Sudden passive cooling. The freq switches between HFM and LFM and never
something in between.

That’s why for passive cooling cases, this behavior might not be welcomed 
always.

So, I would prefer not to set the cooling state to upper limit always. Instead, 
we will
keep the existing behavior but introduce new trend types (something like what 
Amit
has done). In this case, the user/tester is explicitly is setting the cooling 
trend to
'SUDDEN cooling' which means he/she is 'Ok' with Jitter in performance. Things 
are
explicitly said here, which makes it easy to identify performance issues, if 
any arise.

In fact, this is one of the reasons, why we have the 'weight' and the 
'cur_trip_level'
variables in the fair share governor. Together, both these variables, ensure 
that
we do not throttle a cooling device, to more than what is necessary.

I do not think any of this matters for active cooling, where we do not impact
performance :-)

Sorry again for the late response. Thanks both of you for bringing this up..

Thanks,
Durga

> 
> thanks,
> rui
> 
> > > I can write such a governor if you do not have time to.
> > ok. thanks
> > >
> > > thanks,
> > > rui
> > >>  This is needed for temperature sensors which support rising/falling
> > >> threshold interrupts and polling can be totally avoided.
> > >>
> > >
> > >
> > >> Signed-off-by: Amit Daniel Kachhap 
> > >> Signed-off-by: Amit Daniel Kachhap 
> > >> ---
> > >>  drivers/thermal/step_wise.c |   19 +++
> > >>  include/linux/thermal.h |2 ++
> > >>  2 files changed, 17 insertions(+), 4 deletions(-)
> > >>
> > >> diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
> > >> index 1242cff..0d2d8d6 100644
> > >> --- a/drivers/thermal/step_wise.c
> > >> +++ b/drivers/thermal/step_wise.c
> > >> @@ -35,6 +35,10 @@
> > >>   *   state for this trip point
> > >>   *b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
> > >>   *   state for this trip point
> > >> + *c. if the trend is THERMAL_TREND_RAISE_FULL, use highest cooling
> > >> + *   state for this trip point
> > >> + *d. if the trend is THERMAL_TREND_DROP_FULL, use lowest cooling
> > >> + *   state for this trip point
> > >>   */
> > >>  static unsigned long get_target_state(struct thermal_instance
> *instance,
> > >>   enum thermal_trend trend)
> > >> @@ -50,7 +54,10 @@ static unsigned long get_target_state(struct
> thermal_instance *inst

Re: [PATCH] gpio: tegra: read output value when gpio is set in direction_out

2012-11-08 Thread Laxman Dewangan


On Thursday 08 November 2012 10:28 PM, Stephen Warren wrote:

On 11/07/2012 11:27 PM, Laxman Dewangan wrote:

Read the output value when gpio is set for the output mode for
gpio_get_value(). Reading input value in direction out does not
give correct value.

That's an unfortunate HW design, but oh well. Do you have any idea why
reading the input register doesn't work? If you look at the Tegra20 TRM,
page 666 figure 32 "SFIO/GPIO Pin Multiplexing Architecture", there's
not indication that the input path wouldn't work if the output path is
active. Perhaps the issue is in the GPIO module not the pinmux module?



I think this is in the gpio controller design. I again check this in 
cardhu wih dumping gpio registers

Bank:Port CNF OE OUT IN INT_STA INT_ENB INT_LVL
2:2   1c  18 08 04 00 00 00

GPIO pin2,pin3 and pin4 are in gpio mode.
GPIO pin 3 and pin4 are in output mode and pin2 is in input mode.
Set the output to 1 for pin3 and reading back through gpio_in register 
for this pin, it is showing as 0, not 1.




diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
  static int tegra_gpio_get(struct gpio_chip *chip, unsigned offset)
  {
+   int bit_val = BIT(GPIO_BIT(offset));
+
+   /* If gpio is in output mode then read from the out value */
+   if (tegra_gpio_readl(GPIO_OE(offset))&  bit_val)
+   return !!(tegra_gpio_readl(GPIO_OUT(offset))&  bit_val);
+
return (tegra_gpio_readl(GPIO_IN(offset))>>  GPIO_BIT(offset))&  0x1;
  }

Any chance of using the same kind of logic to isolate the bit value? One
branch above does !!(val&  mask) and the other (val>>  shift)&  1.


It was going to more than 80 column and hence I did like this.  Let me 
respin this patch to have same kind of.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/3] perf annotate: Merge same lines in summary view

2012-11-08 Thread Namhyung Kim

From: Namhyung Kim 

The --print-line option of perf annotate command shows summary for
each source line.  But it didn't merge same lines so that it can
appear multiple times.

* before:

Sorted summary for file /home/namhyung/bin/mcol
--

   24.40 /home/namhyung/tmp/mcol.c:26
   21.58 /home/namhyung/tmp/mcol.c:25
   10.14 /home/namhyung/tmp/mcol.c:24
8.59 /home/namhyung/tmp/mcol.c:25
8.57 /home/namhyung/tmp/mcol.c:25
8.42 /home/namhyung/tmp/mcol.c:26
8.31 /home/namhyung/tmp/mcol.c:26
8.30 /home/namhyung/tmp/mcol.c:25
0.80 /home/namhyung/tmp/mcol.c:26

* after:

Sorted summary for file /home/namhyung/bin/mcol
--

   41.93 /home/namhyung/tmp/mcol.c:26
   10.14 /home/namhyung/tmp/mcol.c:24
   47.04 /home/namhyung/tmp/mcol.c:25

To do that, introduce percent_sum field so that the normal
line-by-line output doesn't get changed.

Signed-off-by: Namhyung Kim 
---
* v2: ->percent_sum should be used when resorting.

 tools/perf/util/annotate.c | 55 +++---
 tools/perf/util/annotate.h |  1 +
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 83b1078260e3..180113b891a3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -887,12 +887,41 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
struct source_line *iter;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
+   int ret;
 
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct source_line, node);
 
-   if (src_line->percent > iter->percent)
+   ret = strcmp(iter->path, src_line->path);
+   if (ret == 0) {
+   iter->percent_sum += src_line->percent;
+   return;
+   }
+
+   if (ret < 0)
+   p = &(*p)->rb_left;
+   else
+   p = &(*p)->rb_right;
+   }
+
+   src_line->percent_sum = src_line->percent;
+
+   rb_link_node(&src_line->node, parent, p);
+   rb_insert_color(&src_line->node, root);
+}
+
+static void __resort_source_line(struct rb_root *root, struct source_line 
*src_line)
+{
+   struct source_line *iter;
+   struct rb_node **p = &root->rb_node;
+   struct rb_node *parent = NULL;
+
+   while (*p != NULL) {
+   parent = *p;
+   iter = rb_entry(parent, struct source_line, node);
+
+   if (src_line->percent_sum > iter->percent_sum)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -902,6 +931,24 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
rb_insert_color(&src_line->node, root);
 }
 
+static void resort_source_line(struct rb_root *dest_root, struct rb_root 
*src_root)
+{
+   struct source_line *src_line;
+   struct rb_node *node;
+
+   node = rb_first(src_root);
+   while (node) {
+   struct rb_node *next;
+
+   src_line = rb_entry(node, struct source_line, node);
+   next = rb_next(node);
+   rb_erase(node, src_root);
+
+   __resort_source_line(dest_root, src_line);
+   node = next;
+   }
+}
+
 static void symbol__free_source_line(struct symbol *sym, int len)
 {
struct annotation *notes = symbol__annotation(sym);
@@ -926,6 +973,7 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
struct source_line *src_line;
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evidx);
+   struct rb_root tmp_root = RB_ROOT;
 
if (!h->sum)
return 0;
@@ -960,12 +1008,13 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
goto next;
 
strcpy(src_line[i].path, path);
-   insert_source_line(root, &src_line[i]);
+   insert_source_line(&tmp_root, &src_line[i]);
 
next:
pclose(fp);
}
 
+   resort_source_line(root, &tmp_root);
return 0;
 }
 
@@ -989,7 +1038,7 @@ static void print_summary(struct rb_root *root, const char 
*filename)
char *path;
 
src_line = rb_entry(node, struct source_line, node);
-   percent = src_line->percent;
+   percent = src_line->percent_sum;
color = get_percent_color(percent);
path = src_line->path;
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index c6272011625a..8eec94358a4a 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -76,6 +76,7 @@ struct sym_hist {
 struct source_line

Re: [PATCH] dw_mmc: fix more const pointer warnings

2012-11-08 Thread Jaehoon Chung

Great..Thanks Arnd and Chris.

Best Regards,
Jaehoon Chung
On 11/08/2012 11:35 PM, Chris Ball wrote:
> Hi Arnd,
> 
> On Thu, Nov 08 2012, Arnd Bergmann wrote:
>> The patch "dw_mmc: fix multiple drv_data NULL dereferences" has
>> unfortunately clashed with my "mmc: dw_mmc: constify
>> dw_mci_idmac_ops in exynos back-end" patch, causing new warnings
>> to appear.
>>
>> This should hopefully fix the issue for good.
>>
>> Signed-off-by: Arnd Bergmann 
> 
> Thanks, pushed to mmc-next for 3.7.
> 
> (Jaehoon also sent a patch to fix these last night, but yours applies to
> current mmc-next and his doesn't, so I'll take this one instead.)
> 
> - Chris.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[git pull] drm radeon + nouveau fixes

2012-11-08 Thread Dave Airlie


Hi Linus,

just radeon and nouveau, mostly regressions fixers, and a couple of radeon 
register checker fixes.

Dave.

The following changes since commit 695ddeb457584a602f2ba117d08ce37cf6ec1589:

  drm/radeon: fix typo in evergreen_mc_resume() (2012-11-07 10:53:49 +1000)

are available in the git repository at:

  git://people.freedesktop.org/~airlied/linux drm-fixes

for you to fetch changes up to 4a48ed2334b7ae61dd11bb114fa35bd4ebdc1ca0:

  Merge branch 'drm-nouveau-fixes' of 
git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes (2012-11-09 
14:57:02 +1000)



Alex Deucher (3):
  drm/radeon/dce3: switch back to old pll allocation order for discrete
  drm/radeon/cayman: add some missing regs to the VM reg checker
  drm/radeon/si: add some missing regs to the VM reg checker

Dave Airlie (2):
  Merge branch 'drm-fixes-3.7' of git://people.freedesktop.org/~agd5f/linux 
into drm-fixes
  Merge branch 'drm-nouveau-fixes' of 
git://anongit.freedesktop.org/git/nouveau/linux-2.6 into drm-fixes

Kelly Doran (1):
  drm/nvc0/disp: fix regression in vblank semaphore release

Maarten Lankhorst (1):
  drm/nouveau: fix acpi edid retrieval

Marcin Slusarz (3):
  drm/nv41/vm: fix typo in type name
  drm/nv40/graph: fix typo in type names
  drm/nv40/mpeg: fix context handling

 drivers/gpu/drm/nouveau/core/engine/disp/nv50.c  | 20 +
 drivers/gpu/drm/nouveau/core/engine/graph/nv40.c |  4 +-
 drivers/gpu/drm/nouveau/core/engine/mpeg/nv40.c  |  2 +-
 drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c|  2 +-
 drivers/gpu/drm/nouveau/nouveau_connector.c  |  2 +-
 drivers/gpu/drm/radeon/atombios_crtc.c   | 54 ++--
 drivers/gpu/drm/radeon/evergreen_cs.c|  3 ++
 drivers/gpu/drm/radeon/evergreend.h  |  4 ++
 drivers/gpu/drm/radeon/si.c  |  1 +
 drivers/gpu/drm/radeon/sid.h |  1 +
 10 files changed, 57 insertions(+), 36 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] input: spear-keyboard: Use devm_*() routines

2012-11-08 Thread Dmitry Torokhov

On Fri, Nov 09, 2012 at 08:06:29AM +0530, Viresh Kumar wrote:
> On 8 November 2012 22:08, Dmitry Torokhov  wrote:
> > On Thu, Nov 08, 2012 at 07:10:47PM +0530, Viresh Kumar wrote:
> > It also breaks the error unwinding/removal of the driver as it frees
> > input device while IRQ handler is still active.
> 
> I have heard of this argument before, probably from you. :)
> Just need clarification again. How will we get an interrupt when the 
> controller
> is stopped, unless we have a shared irq.

My bad, I missed that spear-keyboard driver implements open() and
close() methods and shuts off the device properly. Still, thanks for
switching everything to devm_*, I think it is much cleaner this way as
opposed to mixing managed and unmanaged resources.

Thanks.

-- 
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] Device Tree Overlays Proposal (Was Re: capebus moving omap_devices to mach-omap2)

2012-11-08 Thread Joel A Fernandes

Hi Pantelis,

I hope I'm not too late to reply as I'm traveling.

On Nov 6, 2012, at 5:30 AM, Pantelis Antoniou
 wrote:

>
>>
>> Joanne has purchased one of Jane's capes and packaged it into a rugged
>> case for data logging. As far as Joanne is concerned, the BeagleBone and
>> cape together are a single unit and she'd prefer a single monolithic FDT
>> instead of using an FDT overlay.
>> Option A: Using dtc, she uses the BeagleBone and cape .dts source files
>>to generate a single .dtb for the entire system which is
>>loaded by U-Boot. -or-
>
> Unlikely.
>> Option B: Joanne uses a tool to merge the BeagleBone and cape .dtb files
>>(instead of .dts files), -or-
> Possible but low probability.
>
>> Option C: U-Boot loads both the base and overlay FDT files, merges them,
>>and passes the resolved tree to the kernel.
>>
>
> Could be made to work. Only really required if Joanne wants the
> cape interface to work for u-boot too. For example if the cape has some
> kind of network interface that u-boot will use to boot from.
>

I love Grant's hashing idea a lot keeping the phandle problem for
compile time and not requiring fixups.

IMO it is still a cleaner approach if u-boot does the simple tree merging for
all cases, and not the kernel reasons mentioned below.

(1)
>From a development standpoint, very little or nothing will
have to be changed in kernel (except for scripts/dtc) considering we
are moving forward with hashing.

(2)
Also this discussed a while back but at some point is going to brought
up again-  loading of dt fragment directly from EEPROM and merging at
run time. If we were to implement this in kernel, we would have to add
cape specific EEPROM reading code, merge the tree before it is
unflattened and parse. I think doing tree merging in kernel is messy
and we should do it in uboot considering we might have to read EEPROM for
this use case. Ideally reading the fragment from the EEPROM for all capes
and merging without worrying about version detection, Doing the merge and
passing the merged blob to the kernel which (kernel) works the same way
it does today.

>> It may be sufficient to solve it by making the phandle values less
>> volatile. Right now dtc generates phandles linearly. Generated phandles
>> could be overridden with explicit phandle properties, but it isn't a
>> fantastic solution. Perhaps generating the phandle from a hash of the
>> node name would be sufficient.
>>
>
> I doubt the hash method will work reliably. We only have 32 bits to work with,
> nothing like the SHA hashes of git.
>

I was wondering I have worked with kernel's crypto code in the past to
generate 32 bit md5sums of 1000s of dataitems, from what I've seen,
collisions are rare and since we are talking about just a few nodes
that are being referenced in the base dt. I think the probability is
even less (ofcourse such an analysis strongly depends on dataset).
this method also takes away a lot of complexity with having it to do
runtime fixups and will help us get off the ground quickly.

We can also put in a collision handling mechanism if needed.
I think it is worthy doing a sample hash of all nodes in all dts we
have in a script and see for once if we have collisions and what it
looks like.

Alternatively to hashing, reading David Gibson's paper I followed,
phandle is supposed to 'uniquely' identity node. I wonder why the node
name itself is not sufficient to uniquely identify. The code that does
the tree walking can then just strcmp the node name while it walks the
tree instead of having to find a node with a phandle number. I guess
the reason is phandles are small to store as data values. Another
approach can be to arrange the string block in alphabetical order
(unless it already is), and store phandle as index of the node name
referenced relative to the starting of the strong block. This will not
affect nodes in dtb being moved around since they will still have the
same index value. the problem being adding or removing nodes Changes
the index of all other nodes in the string block as well.. Hmm.

Regards,
Joel
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: WARNING: at kernel/rcutree.c:1562 rcu_do_batch()

2012-11-08 Thread Michael Wang

Hi, Fengguang

On 11/09/2012 12:23 PM, Fengguang Wu wrote:
> Paul,
> 
> I got the below warning in stable kernel 3.6.3. linux-next does
> not have this issue. Bisect shows that the first bad commit is

Please allow me to ask few questions:
1. is it 100% sure that linux-next don't show this issue on same hardware?
2. is it 100% sure that when removed commit b1420f1, both WARN in
rcu_do_batch() and __call_rcu() disappeared?

The reason I asked the questions is that this issue looks really similar
to the one we faced previously:
The interrupt come in after cpu has been offline.

I suppose this is caused by apic issue and no matter with the rcu
before, so I really want to figure out whether it is very related with
commit b1420f1?

Regards,
Michael Wang

> 
> commit b1420f1c8bfc30ecf6380a31d0f686884834b599
> Author: Paul E. McKenney 
> Date:   Thu Mar 1 13:18:08 2012 -0800
> 
> rcu: Make rcu_barrier() less disruptive
> 
> 
> [   92.252733] do_IRQ: 1.59 No irq handler for vector (irq -1)
> [   92.253257] [ cut here ]
> [   92.253675] WARNING: at /c/kernel-tests/src/stable/kernel/rcutree.c:1562 
> rcu_do_batch+0x17e/0x63b()
> [   92.254474] Hardware name: Bochs
> [   92.254766] Modules linked in:
> [   92.256689] Pid: 9, comm: migration/1 Not tainted 3.6.3 #1306
> [   92.256689] Call Trace:
> [   92.256689][] warn_slowpath_common+0x83/0x9c
> [   92.256689]  [] warn_slowpath_null+0x1a/0x1c
> [   92.256689]  [] rcu_do_batch+0x17e/0x63b
> [   92.256689]  [] ? rcu_report_qs_rnp+0x28b/0x2d5
> [   92.256689]  [] ? rcu_process_callbacks+0xe3/0x236
> [   92.256689]  [] rcu_process_callbacks+0x172/0x236
> [   92.256689]  [] __do_softirq+0xf6/0x231
> [   92.256689]  [] ? tick_program_event+0x24/0x26
> [   92.256689]  [] call_softirq+0x1c/0x30
> [   92.256689]  [] do_softirq+0x4a/0xa6
> [   92.256689]  [] irq_exit+0x51/0xbc
> [   92.256689]  [] smp_apic_timer_interrupt+0x8b/0x99
> [   92.256689]  [] apic_timer_interrupt+0x6f/0x80
> [   92.256689][] ? local_clock+0x1d/0x5a
> [   92.256689]  [] ? stop_machine_cpu_stop+0x104/0x119
> [   92.256689]  [] cpu_stopper_thread+0xdd/0x17d
> [   92.256689]  [] ? queue_stop_cpus_work+0x130/0x130
> [   92.256689]  [] ? _raw_spin_unlock_irqrestore+0x47/0x65
> [   92.256689]  [] ? trace_hardirqs_on_caller+0x125/0x181
> [   92.256689]  [] ? trace_hardirqs_on+0xd/0xf
> [   92.256689]  [] ? cpu_stop_signal_done+0x2c/0x2c
> [   92.256689]  [] kthread+0x9a/0xa2
> [   92.256689]  [] kernel_thread_helper+0x4/0x10
> [   92.256689]  [] ? retint_restore_args+0x13/0x13
> [   92.256689]  [] ? __init_kthread_worker+0x5a/0x5a
> [   92.317029]  [] ? gs_change+0x13/0x13
> 
> Thanks,
> Fengguang
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] mfd: rtsx_pcr: Include linux/slab.h to fix build error

2012-11-08 Thread Axel Lin

Fix below build error:

  CC [M]  drivers/mfd/rtsx_pcr.o
drivers/mfd/rtsx_pcr.c: In function 'rtsx_pci_init_chip':
drivers/mfd/rtsx_pcr.c:985:2: error: implicit declaration of function 'kcalloc' 
[-Werror=implicit-function-declaration]
drivers/mfd/rtsx_pcr.c:985:13: warning: assignment makes pointer from integer 
without a cast [enabled by default]
drivers/mfd/rtsx_pcr.c:993:3: error: implicit declaration of function 'kfree' 
[-Werror=implicit-function-declaration]
drivers/mfd/rtsx_pcr.c: In function 'rtsx_pci_probe':
drivers/mfd/rtsx_pcr.c:1021:2: error: implicit declaration of function 
'kzalloc' [-Werror=implicit-function-declaration]
drivers/mfd/rtsx_pcr.c:1021:6: warning: assignment makes pointer from integer 
without a cast [enabled by default]
drivers/mfd/rtsx_pcr.c:1027:9: warning: assignment makes pointer from integer 
without a cast [enabled by default]
cc1: some warnings being treated as errors
make[2]: *** [drivers/mfd/rtsx_pcr.o] Error 1
make[1]: *** [drivers/mfd] Error 2
make: *** [drivers] Error 2

Signed-off-by: Axel Lin 
---
 drivers/mfd/rtsx_pcr.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 56d4377..a510584 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
1.7.9.5



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH 1/4] thermal: Add new thermal trend type to support quick cooling

2012-11-08 Thread R, Durgadoss

Hi Rui/Amit,

Sorry for the late response..

> -Original Message-
> From: Amit Kachhap [mailto:amit.kach...@linaro.org]
> Sent: Thursday, November 08, 2012 11:56 AM
> To: Zhang, Rui
> Cc: linux...@lists.linux-foundation.org; linux-samsung-
> s...@vger.kernel.org; linux-kernel@vger.kernel.org; R, Durgadoss;
> l...@kernel.org; linux-a...@vger.kernel.org; jonghwa3@samsung.com
> Subject: Re: [PATCH 1/4] thermal: Add new thermal trend type to support
> quick cooling
> 
> On 8 November 2012 11:31, Zhang Rui  wrote:
> > On Thu, 2012-11-08 at 09:56 +0530, Amit Daniel Kachhap wrote:
> >> This modification adds 2 new thermal trend type
> THERMAL_TREND_RAISE_FULL
> >> and THERMAL_TREND_DROP_FULL. This thermal trend can be used to
> quickly
> >> jump to the upper or lower cooling level instead of incremental increase
> >> or decrease.
> >
> > IMO, what we need is a new more aggressive cooling governor which
> always
> > uses upper limit when the temperature is raising and lower limit when
> > the temperature is dropping.
> Yes I agree that a new aggressive governor is the best approach but
> then i thought adding a new trend type is a simple solution to achieve
> this and since most of the governor logic might be same as the
> step-wise governor. I have no objection in doing it through governor.

Yes, this sounds like a feasible and not-so-complicated implementation for now.
In future, if we see a lot of drivers requiring this sudden raise/drop 
functionality,
at that time we can introduce an 'aggressive' governor.

Thanks,
Durga

> >
> > I can write such a governor if you do not have time to.
> ok. thanks
> >
> > thanks,
> > rui
> >>  This is needed for temperature sensors which support rising/falling
> >> threshold interrupts and polling can be totally avoided.
> >>
> >
> >
> >> Signed-off-by: Amit Daniel Kachhap 
> >> Signed-off-by: Amit Daniel Kachhap 
> >> ---
> >>  drivers/thermal/step_wise.c |   19 +++
> >>  include/linux/thermal.h |2 ++
> >>  2 files changed, 17 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
> >> index 1242cff..0d2d8d6 100644
> >> --- a/drivers/thermal/step_wise.c
> >> +++ b/drivers/thermal/step_wise.c
> >> @@ -35,6 +35,10 @@
> >>   *   state for this trip point
> >>   *b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
> >>   *   state for this trip point
> >> + *c. if the trend is THERMAL_TREND_RAISE_FULL, use highest cooling
> >> + *   state for this trip point
> >> + *d. if the trend is THERMAL_TREND_DROP_FULL, use lowest cooling
> >> + *   state for this trip point
> >>   */
> >>  static unsigned long get_target_state(struct thermal_instance *instance,
> >>   enum thermal_trend trend)
> >> @@ -50,7 +54,10 @@ static unsigned long get_target_state(struct
> thermal_instance *instance,
> >>   } else if (trend == THERMAL_TREND_DROPPING) {
> >>   cur_state = cur_state > instance->lower ?
> >>   (cur_state - 1) : instance->lower;
> >> - }
> >> + } else if (trend == THERMAL_TREND_RAISE_FULL)
> >> + cur_state = instance->upper;
> >> + else if (trend == THERMAL_TREND_DROP_FULL)
> >> + cur_state = instance->lower;
> >>
> >>   return cur_state;
> >>  }
> >> @@ -87,7 +94,8 @@ static void update_instance_for_throttle(struct
> thermal_zone_device *tz,
> >>  }
> >>
> >>  static void update_instance_for_dethrottle(struct thermal_zone_device
> *tz,
> >> - int trip, enum thermal_trip_type trip_type)
> >> + int trip, enum thermal_trip_type trip_type,
> >> + enum thermal_trend trend)
> >>  {
> >>   struct thermal_instance *instance;
> >>   struct thermal_cooling_device *cdev;
> >> @@ -101,7 +109,10 @@ static void
> update_instance_for_dethrottle(struct thermal_zone_device *tz,
> >>   cdev = instance->cdev;
> >>   cdev->ops->get_cur_state(cdev, &cur_state);
> >>
> >> - instance->target = cur_state > instance->lower ?
> >> + if (trend == THERMAL_TREND_DROP_FULL)
> >> + instance->target = instance->lower;
> >> + else
> >> + instance->target = cur_state > instance->lower ?
> >>   (cur_state - 1) : THERMAL_NO_TARGET;
> >>
> >>   /* Deactivate a passive thermal instance */
> >> @@ -133,7 +144,7 @@ static void thermal_zone_trip_update(struct
> thermal_zone_device *tz, int trip)
> >>   if (tz->temperature >= trip_temp)
> >>   update_instance_for_throttle(tz, trip, trip_type, trend);
> >>   else
> >> - update_instance_for_dethrottle(tz, trip, trip_type);
> >> + update_instance_for_dethrottle(tz, trip, trip_type, trend);
> >>
> >>   mutex_unlock(&tz->lock);
> >>  }
> >> diff --git a/include/linux/th

Re: [RFC PATCH 0/8][Sorted-buddy] mm: Linux VM Infrastructure to support Memory Power Management

2012-11-08 Thread Vaidyanathan Srinivasan

* Mel Gorman  [2012-11-08 18:02:57]:

> On Wed, Nov 07, 2012 at 01:22:13AM +0530, Srivatsa S. Bhat wrote:
> > 

Hi Mel,

Thanks for detailed review and comments.  The goal of this patch
series is to brainstorm on ideas that enable Linux VM to record and
exploit memory region boundaries.

The first approach that we had last year (hierarchy) has more runtime
overhead.  This approach of sorted-buddy was one of the alternative
discussed earlier and we are trying to find out if simple requirements
of biasing memory allocations can be achieved with this approach.

Smart reclaim based on this approach is a key piece we still need to
design.  Ideas from compaction will certainly help.

> > Today memory subsystems are offer a wide range of capabilities for managing
> > memory power consumption. As a quick example, if a block of memory is not
> > referenced for a threshold amount of time, the memory controller can decide 
> > to
> > put that chunk into a low-power content-preserving state. And the next
> > reference to that memory chunk would bring it back to full power for 
> > read/write.
> > With this capability in place, it becomes important for the OS to understand
> > the boundaries of such power-manageable chunks of memory and to ensure that
> > references are consolidated to a minimum number of such memory power 
> > management
> > domains.
> > 
> 
> How much power is saved?

On embedded platform the savings could be around 5% as discussed in
the earlier thread: http://article.gmane.org/gmane.linux.kernel.mm/65935

On larger servers with large amounts of memory the savings could be
more.  We do not yet have all the pieces together to evaluate.

> > ACPI 5.0 has introduced MPST tables (Memory Power State Tables) [5] so that
> > the firmware can expose information regarding the boundaries of such memory
> > power management domains to the OS in a standard way.
> > 
> 
> I'm not familiar with the ACPI spec but is there support for parsing of
> MPST and interpreting the associated ACPI events? For example, if ACPI
> fires an event indicating that a memory power node is to enter a low
> state then presumably the OS should actively migrate pages away -- even
> if it's going into a state where the contents are still refreshed
> as exiting that state could take a long time.
> 
> I did not look closely at the patchset at all because it looked like the
> actual support to use it and measure the benefit is missing.

Correct.  The platform interface part is not included in this patch
set mainly because there is not much design required there.  Each
platform can have code to collect the memory region boundaries from
BIOS/firmware and load it into the Linux VM.  The goal of this patch
is to brainstorm on the idea of hos core VM should used the region
information.

> > How can Linux VM help memory power savings?
> > 
> > o Consolidate memory allocations and/or references such that they are
> > not spread across the entire memory address space.  Basically area of memory
> > that is not being referenced, can reside in low power state.
> > 
> 
> Which the series does not appear to do.

Correct.  We need to design the correct reclaim strategy for this to
work.  However having buddy list sorted by region address could get us
one step closer to shaping the allocations.

> > o Support targeted memory reclaim, where certain areas of memory that can be
> > easily freed can be offlined, allowing those areas of memory to be put into
> > lower power states.
> > 
> 
> Which the series does not appear to do judging from this;
> 
>   include/linux/mm.h |   38 +++
>   include/linux/mmzone.h |   52 +
>   mm/compaction.c|8 +
>   mm/page_alloc.c|  263 
> 
>   mm/vmstat.c|   59 ++-
> 
> This does not appear to be doing anything with reclaim and not enough with
> compaction to indicate that the series actively manages memory placement
> in response to ACPI events.

Correct.  Evaluating different ideas for reclaim will be next step
before getting into the platform interface parts.

> Further in section 5.2.21.4 the spec says that power node regions can
> overlap (but are not hierarchal for some reason) but have no gaps yet the
> structure you use to represent is assumes there can be gaps and there are
> no overlaps. Again, this is just glancing at the spec and a quick skim of
> the patches so maybe I missed something that explains why this structure
> is suitable.

This patch is roughly based on the idea that ACPI MPST will give us
memory region boundaries.  It is not designed to implement all options
defined in the spec.  We have taken a general case of regions do not
overlap while memory addresses itself can be discontinuous.

> It seems to me that superficially the VM implementation for the support
> would have
> 
> a) Involved a tree that managed the overlapping regions (even if

Re: [PATCH] virtio: Don't access index after unregister.

2012-11-08 Thread Michael S. Tsirkin

On Thu, Nov 08, 2012 at 11:43:47AM +0100, Cornelia Huck wrote:
> Virtio wants to release used indices after the corresponding
> virtio device has been unregistered. However, virtio does not
> hold an extra reference, giving up its last reference with
> device_unregister(), making accessing dev->index afterwards
> invalid.
> 
> I actually saw problems when testing my (not-yet-merged)
> virtio-ccw code:
> 
> - device_add virtio-net,id=xxx
> -> creates device virtio with n>0
> 
> - device_del xxx
> -> deletes virtio, but calls ida_simple_remove with an
>index of 0
> 
> - device_add virtio-net,id=xxx
> -> tries to add virtio0, which is still in use...
> 
> So let's save the index we want to release before calling
> device_unregister().
> 
> Signed-off-by: Cornelia Huck 
> ---
>  drivers/virtio/virtio.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 1e8659c..809b0de 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -225,8 +225,10 @@ EXPORT_SYMBOL_GPL(register_virtio_device);
>  
>  void unregister_virtio_device(struct virtio_device *dev)
>  {
> + int index = dev->index; /* save for after device release */

It's obvious from code that we safe for after release,
I think a better comment would explain *why* we do this.

Something like
/*
   device_unregister drops reference to device so put_device could
   invoke release callback. In case that callback will free the device,
   make sure we don't access device after this call.
 */
int index = dev->index;

?

> +
>   device_unregister(&dev->dev);
> - ida_simple_remove(&virtio_index_ida, dev->index);
> + ida_simple_remove(&virtio_index_ida, index);
>  }
>  EXPORT_SYMBOL_GPL(unregister_virtio_device);
>  
> -- 
> 1.7.12.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/4] thermal: exynos: Use the new thermal trend type for quick cooling action.

2012-11-08 Thread Amit Daniel Kachhap

This patch uses the quick thermal cooling trend type macros. This is needed
as exynos5 and other thermal sensors now supports only interrupt method for
thresold temperature check.

Signed-off-by: Amit Daniel Kachhap 
Signed-off-by: Amit Daniel Kachhap 
---
 drivers/thermal/exynos_thermal.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
index bbaff56..097c218 100644
--- a/drivers/thermal/exynos_thermal.c
+++ b/drivers/thermal/exynos_thermal.c
@@ -287,7 +287,7 @@ static int exynos_bind(struct thermal_zone_device *thermal,
case MONITOR_ZONE:
case WARN_ZONE:
if (thermal_zone_bind_cooling_device(thermal, i, cdev,
-   level, level)) {
+   level, 0)) {
pr_err("error binding cdev inst %d\n", i);
ret = -EINVAL;
}
@@ -373,9 +373,9 @@ static int exynos_get_trend(struct thermal_zone_device 
*thermal,
return ret;
 
if (thermal->temperature >= trip_temp)
-   *trend = THERMAL_TREND_RAISING;
+   *trend = THERMAL_TREND_RAISE_FULL;
else
-   *trend = THERMAL_TREND_DROPPING;
+   *trend = THERMAL_TREND_DROP_FULL;
 
return ret;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] arm: memtest

2012-11-08 Thread Alexander Holler


Am 08.11.2012 23:39, schrieb Yinghai Lu:

On Thu, Nov 8, 2012 at 12:48 PM, Alexander Holler  wrote:

Hello,

I've recently discovered the lack of the command line parameter memtest for
ARM. So I've made a patch.

But I have some questions:

1. arch/x86/mm/memtest.c looks platform independ.
The only thing why I don't use it for arm, is because it uses 64bit
pointers. Maybe it could be moved to mm/memtest.c. If so, the memtest32.c
I'm using (basically a copy of memtest.c) could be moved there too.

2. Because the below memtest32.c is basically a copy of
arch/x86/mm/memtest.c, I'm not sure if the mapping from physical to virtual
locations there does fit (always) for ARM too. I know almost as much about
the in-kernel memory organization on x86 as on ARM, which is not really that
much (some theory about TLBs, some source code explorations, ..., but I'm
working on it). ;)


We are using arch/x86/mm/memtest.c for x86 32bit and 64bit.

So it should be ok to use it with arm 32bit and 64bit directly.


It does. But in order to enable it on every boot, I wanted it to be as 
fast as possible.


Regards,

Alexander

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PULL] virtio and two module fixes

2012-11-08 Thread Rusty Russell

The following changes since commit bc909421a9c7083fcde795846d22b36a51a7be54:

  Merge tag 'gpio-fixes-v3.7-rc4' of 
git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio (2012-10-30 
15:56:22 -0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux.git fixes

for you to fetch changes up to 237242bddc99041e15a4ca51b8439657cadaff17:

  virtio: Don't access index after unregister. (2012-11-09 14:54:24 +1030)


YA module signing build tweak, and two cc'd to stable.


Cornelia Huck (1):
  virtio: Don't access index after unregister.

Rusty Russell (2):
  module: fix out-by-one error in kallsyms
  modules: don't break modules_install on external modules with no key.

 drivers/virtio/virtio.c  |4 +++-
 kernel/module.c  |   27 ---
 scripts/Makefile.modinst |3 ++-
 3 files changed, 21 insertions(+), 13 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] virtio: Don't access index after unregister.

2012-11-08 Thread Rusty Russell

Cornelia Huck  writes:
> Virtio wants to release used indices after the corresponding
> virtio device has been unregistered. However, virtio does not
> hold an extra reference, giving up its last reference with
> device_unregister(), making accessing dev->index afterwards
> invalid.
>
> I actually saw problems when testing my (not-yet-merged)
> virtio-ccw code:
>
> - device_add virtio-net,id=xxx
> -> creates device virtio with n>0
>
> - device_del xxx
> -> deletes virtio, but calls ida_simple_remove with an
>index of 0
>
> - device_add virtio-net,id=xxx
> -> tries to add virtio0, which is still in use...
>
> So let's save the index we want to release before calling
> device_unregister().

Great catch!  I've add a CC:stable.

Applied,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: Tree for Nov 9

2012-11-08 Thread Stephen Rothwell

Hi all,

Changes since 20121108:

The pci tree still has its build failure for which I applied a merge fix patch.

The v4l-dvb tree still has its build failure so I used the version from
next-20121026.

The net-next tree gained a conflict against Linus' tree.

The pinctrl tree lost its build failure.

The arm-soc tree gained a conflict against the pinctrl tree.

The samsung tree lost its conflict.

The akpm tree gained several build failures for which I reverted several
commits.



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" as mentioned in the FAQ on the wiki
(see below).

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log files
in the Next directory.  Between each merge, the tree was built with
a ppc64_defconfig for powerpc and an allmodconfig for x86_64. After the
final fixups (if any), it is also built with powerpc allnoconfig (32 and
64 bit), ppc44x_defconfig and allyesconfig (minus
CONFIG_PROFILE_ALL_BRANCHES - this fails its final link) and i386, sparc,
sparc64 and arm defconfig. These builds also have
CONFIG_ENABLE_WARN_DEPRECATED, CONFIG_ENABLE_MUST_CHECK and
CONFIG_DEBUG_INFO disabled when necessary.

Below is a summary of the state of the merge.

We are up to 209 trees (counting Linus' and 28 trees of patches pending
for Linus' tree), more are welcome (even if they are currently empty).
Thanks to those who have contributed, and to those who haven't, please do.

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

There is a wiki covering stuff to do with linux-next at
http://linux.f-seidel.de/linux-next/pmwiki/ .  Thanks to Frank Seidel.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

$ git checkout master
$ git reset --hard stable
Merging origin/master (0e4a43e Merge 
git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes)
Merging fixes/master (12250d8 Merge branch 'i2c-embedded/for-next' of 
git://git.pengutronix.de/git/wsa/linux)
Merging kbuild-current/rc-fixes (bad9955 menuconfig: Replace CIRCLEQ by 
list_head-style lists.)
Merging arm-current/fixes (6404f0b ARM: 7569/1: mm: uninitialized warning 
corrections)
Merging m68k-current/for-linus (8a745ee m68k: Wire up kcmp)
Merging powerpc-merge/merge (8c23f40 Merge 
git://git.kernel.org/pub/scm/virt/kvm/kvm)
Merging sparc/master (afe760e sparc: Allow OF_GPIO on sparc.)
Merging net/master (a66fe16 net: usb: cdc_eem: Fix rx skb allocation for 802.1Q 
VLANs)
Merging sound-current/for-linus (8bb4d9c ALSA: Fix card refcount unbalance)
Merging pci-current/for-linus (ff8e59b PCI/portdrv: Don't create hotplug slots 
unless port supports hotplug)
Merging wireless/master (6fe7cc7 ath9k: Test for TID only in BlockAcks while 
checking tx status)
Merging driver-core.current/driver-core-linus (8f0d816 Linux 3.7-rc3)
Merging tty.current/tty-linus (8f0d816 Linux 3.7-rc3)
Merging usb.current/usb-linus (d99e65b USB: fix build with XEN and 
EARLY_PRINTK_DBGP enabled but USB_SUPPORT disabled)
Merging staging.current/staging-linus (8f0d816 Linux 3.7-rc3)
Merging char-misc.current/char-misc-linus (8f0d816 Linux 3.7-rc3)
Merging input-current/for-linus (32ed191 Input: tsc40 - remove wrong 
announcement of pressure support)
Merging md-current/for-linus (ed30be0 MD RAID10: Fix oops when creating RAID10 
arrays via dm-raid.c)
Merging audit-current/for-linus (c158a35 audit: no leading space in 
audit_log_d_path prefix)
Merging crypto-current/master (9efade1 crypto: cryptd - disable softirqs in 
cryptd_queue_worker to prevent data corruption)
Merging ide/master (9974e43 ide: fix generic_ide_suspend/resume Oops)
Merging dwmw2/master (244dc4e Merge 
git://git.infradead.org/users/dwmw2/random-2.6)
Merging sh-current/sh-fixes-for-linus (4403310 SH: Convert out[bwl] macros to 
inline functions)
Merging irqdomain-current/irqdomain/merge (15e06bf irqdomain: Fix debugfs 
formatting)
Merging devicetree-current/devicetree/merge (4e8383b of: release node fix for 
of_parse_phandle_with_args)
Merging spi-current/spi/merge (d1c185b of/spi: Fix SPI module loading by using 
proper "spi:" modalias prefixes.)
Merging gpio-current/gpio/merge (96b7064 gpio/tca6424: merge I2C transactions, 
remove cast)
Merging rr-fixes/fixes (f6a79af modules: don't break mo

Re: [PATCH] bcm_wimax.ko - Modified supported device list [Correction]

2012-11-08 Thread Kevin McKinney

On Thu, Nov 8, 2012 at 10:27 AM, Muhammad Minhazul Haque
 wrote:
> Mr. Kevin and everyone,
>
> There was a serious mistake in the previous message. I forgot to
> attach the patch. Please ignore it. I am posting it again.
>
> I was never reported for that product id 0x0132. Yet you can continue
> support for it if it is rare. In the meantime, we can add more devices
> to the header and add those names to "usb_device_id" table in
> "InterfaceInit.c". I added a new product string
> "BCM_USB_PRODUCT_ID_ZTE_326" and also modified the device id table.
>
> Again, I removed product if 0xbccd because Beceem, ZTE, Sprint use
> this id for the block device containing device driver. Again, this is
> always switched to base product id via udev. Here is my dmesg output
> when udev is turned off.
> =
> root@inspiron:~# dmesg -c
> [24449.439134] cdrom: issuing MRW background format suspend
> [24459.102669] usb 2-1.2: new high-speed USB device number 11 using ehci_hcd
> [24459.336258] scsi11 : usb-storage 2-1.2:1.0
> [24460.334906] scsi 11:0:0:0: CD-ROMBCM-CD V 01.02 01.01
> 1.13  PQ: 0 ANSI: 2
> [24460.336721] sr0: scsi3-mmc drive: 0x/0x xa/form2 tray
> [24460.336971] sr 11:0:0:0: Attached scsi CD-ROM sr0
> [24460.337167] sr 11:0:0:0: Attached scsi generic sg1 type 5
>
> root@inspiron:~# mount /dev/sr1 /media/tmp
> mount: block device /dev/sr1 is write-protected, mounting read-only
> =
>
> I did build after these changes and probed the module. It works
> perfectly. I also tested 0x0172 and 0x0173. No error is reported. So I
> assure that these products are valid. Not sure about 0x0132. Here is
> the modinfo output.
> =
> license:GPL
> version:5.2.45
> description:Beceem Communications Inc. WiMAX driver
> srcversion: 6968AC3635745331FE6470D
> alias:  usb:v19D2p0132d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v19D2p0007d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v19D2p0173d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v19D2p0172d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v0489pE017d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v198Fp015Ed*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v198Fp0300d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v198Fp0220d*dc*dsc*dp*ic*isc*ip*
> alias:  usb:v198Fp0210d*dc*dsc*dp*ic*isc*ip*
> depends:
> vermagic:   3.2.0-32-generic-pae SMP mod_unload modversions 686
> parm:   debug:Debug level (0=none,...,16=all) (uint)
> =
>
> This patch is currently against a linux 3.7-rc4 kernel, for the x86
> architecture.
>
> The first patch is
> =
> diff --git a/bcm.orig/InterfaceInit.c b/bcm/InterfaceInit.c
> index b05f5f7..7da666e 100644
> --- a/bcm.orig/InterfaceInit.c
> +++ b/bcm/InterfaceInit.c
> @@ -4,11 +4,12 @@ static struct usb_device_id InterfaceUsbtable[] = {
> { USB_DEVICE(BCM_USB_VENDOR_ID_T3, BCM_USB_PRODUCT_ID_T3) },
> { USB_DEVICE(BCM_USB_VENDOR_ID_T3, BCM_USB_PRODUCT_ID_T3B) },
> { USB_DEVICE(BCM_USB_VENDOR_ID_T3, BCM_USB_PRODUCT_ID_T3L) },
> -   { USB_DEVICE(BCM_USB_VENDOR_ID_T3, BCM_USB_PRODUCT_ID_SM250) },
> -   { USB_DEVICE(BCM_USB_VENDOR_ID_ZTE, BCM_USB_PRODUCT_ID_226) },
> +   { USB_DEVICE(BCM_USB_VENDOR_ID_T3, BCM_USB_PRODUCT_ID_SYM) },
> { USB_DEVICE(BCM_USB_VENDOR_ID_FOXCONN, BCM_USB_PRODUCT_ID_1901) },
> +   { USB_DEVICE(BCM_USB_VENDOR_ID_ZTE, BCM_USB_PRODUCT_ID_226) },
> { USB_DEVICE(BCM_USB_VENDOR_ID_ZTE, BCM_USB_PRODUCT_ID_ZTE_TU25) },
> { USB_DEVICE(BCM_USB_VENDOR_ID_ZTE, BCM_USB_PRODUCT_ID_ZTE_226) },
> +   { USB_DEVICE(BCM_USB_VENDOR_ID_ZTE, BCM_USB_PRODUCT_ID_ZTE_326) },
> { }
>  };
>  MODULE_DEVICE_TABLE(usb, InterfaceUsbtable);
> =
>
> And the second patch is
> =
> diff --git a/bcm.orig/InterfaceInit.h b/bcm/InterfaceInit.h
> index 866924e..1486608 100644
> --- a/bcm.orig/InterfaceInit.h
> +++ b/bcm/InterfaceInit.h
> @@ -8,11 +8,11 @@
>  #define BCM_USB_PRODUCT_ID_T3  0x0300
>  #define BCM_USB_PRODUCT_ID_T3B 0x0210
>  #define BCM_USB_PRODUCT_ID_T3L 0x0220
> -#define BCM_USB_PRODUCT_ID_SM250   0xbccd
>  #define BCM_USB_PRODUCT_ID_SYM 0x15E
> -#define BCM_USB_PRODUCT_ID_19010xe017
> +#define BCM_USB_PRODUCT_ID_19010xe017 /* Sprint U1901 */
>  #define BCM_USB_PRODUCT_ID_226 0x0132 /* not sure if this is valid */
>  #define BCM_USB_PRODUCT_ID_ZTE_226 0x172
> +#define BCM_USB_PRODUCT_ID_ZTE_326 0x173 /* ZTE AX326 */
>  #define BCM_USB_PRODUCT_ID_ZTE_TU250x0007
>
>  #define BCM_USB_MINOR_BASE 192
> =
>
> diffstat for this patch is:
> =
>  {bcm => bcm.orig}/InterfaceInit.c |5 ++---
>  {bcm => bcm.orig}/InterfaceInit.h |4 ++--
>  2 files changed, 4 insertions(+), 5 deletions(-)
> =

Thanks for these patches. I see the changes you have made a they look
good, however I am not able to apply these patches. Can you resubmit
them in the correct format?

> To use the patch, remove module if it is probed. Build, and then probe.
>
> About the TODO: I currently have a solution to

Re: linux-next: build warning after merge of the final tree (akpm tree related)

2012-11-08 Thread Michel Lespinasse

On Fri, Nov 09, 2012 at 03:19:03PM +1100, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the final tree, today's linux-next build (arm defconfig)
> produced this warning:
> 
> arch/arm/mm/mmap.c: In function 'arch_get_unmapped_area':
> arch/arm/mm/mmap.c:60:16: warning: unused variable 'start_addr' 
> [-Wunused-variable]
> 
> Introduced by commit "mm: use vm_unmapped_area() on arm architecture".

Sorry for the mistakes. The following changes should fix what's been reported 
so far.

commit 1c98949798ce7a1d4a910775623e1830cf88a92c
Author: Michel Lespinasse 
Date:   Thu Nov 8 20:26:34 2012 -0800

fix mm: use vm_unmapped_area() on sparc32 architecture

diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index a59bc637f9af..a20b5ab4c701 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -38,7 +38,6 @@ asmlinkage unsigned long sys_getpagesize(void)
 
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, 
unsigned long len, unsigned long pgoff, unsigned long flags)
 {
-   struct vm_area_struct * vmm;
struct vm_unmapped_area_info info;
 
if (flags & MAP_FIXED) {

commit aa96ebbc9ec664ddafc841f7631c4a092b10c0d8
Author: Michel Lespinasse 
Date:   Thu Nov 8 20:25:48 2012 -0800

fix mm: use vm_unmapped_area() in hugetlbfs on sparc64 architecture

diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 42e5dba6cb26..d2b59441ebdd 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -60,7 +60,6 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const 
unsigned long addr0,
  const unsigned long pgoff,
  const unsigned long flags)
 {
-   struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;

commit ff4dd9742ce0a5f7c703013e70eeb84b845c8fa2
Author: Michel Lespinasse 
Date:   Thu Nov 8 20:24:53 2012 -0800

mm-use-vm_unmapped_area-on-sparc64-architecture-fix2

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 2a5d61587ca2..a836ee967ecb 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -89,7 +89,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, 
unsigned long addr, unsi
struct mm_struct *mm = current->mm;
struct vm_area_struct * vma;
unsigned long task_size = TASK_SIZE;
-   unsigned long start_addr;
int do_color_align;
struct vm_unmapped_area_info info;
 
@@ -191,7 +190,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const 
unsigned long addr0,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
-   info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+   info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
 

commit ed558f3723a4ac2fb71d6c8fc70116994af6e61d
Author: Michel Lespinasse 
Date:   Thu Nov 8 20:22:36 2012 -0800

mm-use-vm_unmapped_area-on-arm-architecture-fix2

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index f4fec6d43d50..10062ceadd1c 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -57,7 +57,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
-   unsigned long start_addr;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
struct vm_unmapped_area_info info;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: kswapd0: excessive CPU usage

2012-11-08 Thread Seth Jennings

On 11/02/2012 02:45 PM, Jiri Slaby wrote:
> On 11/02/2012 11:53 AM, Jiri Slaby wrote:
>> On 11/02/2012 11:44 AM, Zdenek Kabelac wrote:
> Yes, applying this instead of the revert fixes the issue as well.
>>>
>>> I've applied this patch on 3.7.0-rc3 kernel - and I still see excessive
>>> CPU usage - mainly  after  suspend/resume
>>>
>>> Here is just simple  kswapd backtrace from running kernel:
>>
>> Yup, this is what we were seeing with the former patch only too. Try to
>> apply the other one too:
>> https://patchwork.kernel.org/patch/1673231/
>>
>> For me I would say, it is fixed by the two patches now. I won't be able
>> to report later, since I'm leaving to a conference tomorrow.
> 
> Damn it. It recurred right now, with both patches applied. After I
> started a java program which consumed some more memory. Though there are
> still 2 gigs free, kswap is spinning:
> [] __cond_resched+0x2a/0x40
> [] shrink_slab+0x1c0/0x2d0
> [] kswapd+0x66d/0xb60
> [] kthread+0xc0/0xd0
> [] ret_from_fork+0x7c/0xb0
> [] 0x

I'm also hitting this issue in v3.7-rc4.  It appears that the last
release not effected by this issue was v3.3.  Bisecting the changes
included for v3.4-rc1 showed that this commit introduced the issue:

fe2c2a106663130a5ab45cb0e3414b52df2fff0c is the first bad commit
commit fe2c2a106663130a5ab45cb0e3414b52df2fff0c
Author: Rik van Riel 
Date:   Wed Mar 21 16:33:51 2012 -0700

vmscan: reclaim at order 0 when compaction is enabled
...

This is plausible since the issue seems to be in the kswapd + compaction
realm.  I've yet to figure out exactly what about this commit results in
kswapd spinning.

I would be interested if someone can confirm this finding.

--
Seth

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: build warning after merge of the final tree (akpm tree related)

2012-11-08 Thread Stephen Rothwell

Hi all,

After merging the final tree, today's linux-next build (arm defconfig)
produced this warning:

arch/arm/mm/mmap.c: In function 'arch_get_unmapped_area':
arch/arm/mm/mmap.c:60:16: warning: unused variable 'start_addr' 
[-Wunused-variable]

Introduced by commit "mm: use vm_unmapped_area() on arm architecture".
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpHqOfJx3wfl.pgp
Description: PGP signature

linux-next: build failure after merge of the final tree (akpm tree related)

2012-11-08 Thread Stephen Rothwell

Hi all,

After merging the final tree, today's linux-next build (sparc32 defconfig)
failed like this:

arch/sparc/kernel/sys_sparc_32.c: In function 'arch_get_unmapped_area':
arch/sparc/kernel/sys_sparc_32.c:41:26: error: unused variable 'vmm' 
[-Werror=unused-variable]

Caused by commit "mm: use vm_unmapped_area() on sparc32 architecture".

I have reverted that commit for today.
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpWvyviaOp5M.pgp
Description: PGP signature

linux-next: build failure after merge of the final tree (akpm tree related)

2012-11-08 Thread Stephen Rothwell

Hi all,

After merging the final tree, today's linux-next build (sparc64 defconfig)
failed like this:

arch/sparc/kernel/sys_sparc_64.c: In function 'arch_get_unmapped_area':
arch/sparc/kernel/sys_sparc_64.c:92:16: error: unused variable 'start_addr' 
[-Werror=unused-variable]
arch/sparc/kernel/sys_sparc_64.c: In function 'arch_get_unmapped_area_topdown':
arch/sparc/kernel/sys_sparc_64.c:194:20: error: 'do_colour_align' undeclared 
(first use in this function)
arch/sparc/kernel/sys_sparc_64.c:194:20: note: each undeclared identifier is 
reported only once for each function it appears in
arch/sparc/kernel/sys_sparc_64.c:194:51: error: 'shm_align_mask' undeclared 
(first use in this function)

Caused by commit "mm: use vm_unmapped_area() on sparc64 architecture".  I
have reverted that commit for today (and the following fix patch).

arch/sparc/mm/hugetlbpage.c: In function 'hugetlb_get_unmapped_area_topdown':
arch/sparc/mm/hugetlbpage.c:63:25: error: unused variable 'vma' 
[-Werror=unused-variable]

Caused by commit "mm: use vm_unmapped_area() in hugetlbfs on sparc64
architecture".  I have reverted that commit for today.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgphrNveEmkq6.pgp
Description: PGP signature

Re: Why Cypress does not upstream its trackpad driver?

2012-11-08 Thread Robert Hancock


On 11/07/2012 06:26 PM, David Solda wrote:

Dmitry, all,

To clarify my comment.  Our protocol utilizes 8 bytes which are needed in our 
driver.  In order for the Linux system to accept 8 bytes of data, the Linux 
psmouse system driver is required to be modified.  Without this modification, 
the driver that you are referring to will not work correctly.  The psmouse 
system driver change that would be required is the item that would be rejected.

I appreciate your comments and of course, if the driver could be upstreamed, it 
would (we already have I2C drivers updstreamed for Chrome systems), but there 
is a difference here.

I will again look into the possibility of what you are requesting, however, the 
changes are extremely low if not zero that it will be accepted.


Why? If drivers were kept out of the kernel because the hardware they 
are designed to run requires strange things or was badly designed, there 
would be a lot fewer drivers in the kernel than there are today. 
Firmware and hardware frequently does bizarre or nonsensical things and 
we just have to deal with it.




Dave

-Original Message-
From: Dmitry Torokhov [mailto:dmitry.torok...@gmail.com]
Sent: Wednesday, November 07, 2012 4:16 PM
To: David Solda
Cc: Troy Abercrombia; Kamal Mostafa; Ozan Çağlayan; 
linux-kernel@vger.kernel.org; linux-in...@vger.kernel.org; customercare; 
mario_limoncie...@dell.com
Subject: Re: Why Cypress does not upstream its trackpad driver?

Hi David,

On Wednesday, November 07, 2012 06:30:11 PM David Solda wrote:

Kamal,

My name is Dave Solda and I would be happy to answer any other
questions that you have. Troy's response is correct however as in
order to support the default Linux mouse class, our firmware would
also have to be modified to do so, which cannot be done in system. Our
packet protocol maxes out at an 8 byte packet, which requires a change
to the Linux standard in this case.


I am unable to parse this... I do not believe anyone asks you to change your 
firmware and if your protocol needs 8 bytes to transmit device state - that's 
fine.


Our goal in working with canonical was to provide something on Linux
that would support multi-touch and not only have default single finger
movement supported.



If I am mistaken and he Linux kernel would accept this, then we can
proceed to upstream, however all indications we have is that this
patch would be rejected.  If you (or others on from the locus alias)
have any inputs, I would be happy to receive them.


This really depends on whether the changes to the psmouse framework make sense 
or not. Please start submitting patches for review/discussion and we can go 
from there.

Thanks.

--
Dmitry

This message and any attachments may contain Cypress (or its subsidiaries) 
confidential information. If it has been received in error, please advise the 
sender and immediately delete this message.
--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: build failure after merge of the final tree (akpm tree related)

2012-11-08 Thread Andrew Morton

On Fri, 9 Nov 2012 14:58:32 +1100 Stephen Rothwell  
wrote:

> Hi all,
> 
> After merging the final tree, today's linux-next build (i386 defconfig)
> failed like this:
> 
> arch/x86/mm/hugetlbpage.c: In function 'hugetlb_get_unmapped_area_topdown':
> arch/x86/mm/hugetlbpage.c:299:20: error: 'mm' undeclared (first use in this 
> function)
> arch/x86/mm/hugetlbpage.c:299:20: note: each undeclared identifier is 
> reported only once for each function it appears in
> 
> Caused by commit "mm: use vm_unmapped_area() in hugetlbfs on i386
> architecture".
> 

oop, I fixed that but forgot to commit the changes, sorry.

s/mm/current->mm/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: SR-IOV problem with Intel 82599EB (not enough MMIO resources for SR-IOV)

2012-11-08 Thread Jason Gao

> The BIOS in your machine doesn't support SR-IOV.  You'll need to ask the 
> manufacturer for a BIOS upgrade, if in fact one is available.  Sometimes 
> they're not.

very thanks Greg,my server Dell R710 with latest BIOS version and
option for SR-IOV(SR-IOV Global Enable->Enabled)  opened,I'm confused
that Does R710 provide full support for SR-IOV, kernel or  ixgbe
driver's bug? but I'm not sure where the problem lies,anyone has any
experience about this?   .
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: build failure after merge of the final tree (akpm tree related)

2012-11-08 Thread Stephen Rothwell

Hi all,

After merging the final tree, today's linux-next build (i386 defconfig)
failed like this:

arch/x86/mm/hugetlbpage.c: In function 'hugetlb_get_unmapped_area_topdown':
arch/x86/mm/hugetlbpage.c:299:20: error: 'mm' undeclared (first use in this 
function)
arch/x86/mm/hugetlbpage.c:299:20: note: each undeclared identifier is reported 
only once for each function it appears in

Caused by commit "mm: use vm_unmapped_area() in hugetlbfs on i386
architecture".

I have reverted that commit for today.
-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpil0lg01wCG.pgp
Description: PGP signature

Re: [PATCH 1/4] thermal: Add new thermal trend type to support quick cooling

2012-11-08 Thread Zhang Rui

On Thu, 2012-11-08 at 11:56 +0530, Amit Kachhap wrote:
> On 8 November 2012 11:31, Zhang Rui  wrote:
> > On Thu, 2012-11-08 at 09:56 +0530, Amit Daniel Kachhap wrote:
> >> This modification adds 2 new thermal trend type THERMAL_TREND_RAISE_FULL
> >> and THERMAL_TREND_DROP_FULL. This thermal trend can be used to quickly
> >> jump to the upper or lower cooling level instead of incremental increase
> >> or decrease.
> >
> > IMO, what we need is a new more aggressive cooling governor which always
> > uses upper limit when the temperature is raising and lower limit when
> > the temperature is dropping.
> Yes I agree that a new aggressive governor is the best approach but
> then i thought adding a new trend type is a simple solution to achieve
> this and since most of the governor logic might be same as the
> step-wise governor. I have no objection in doing it through governor.
> >
hmmm,
I think a more proper way is to set the cooling state to upper limit
when it overheats and reduce the cooling state step by step when the
temperature drops.
what do you think?

thanks,
rui

> > I can write such a governor if you do not have time to.
> ok. thanks
> >
> > thanks,
> > rui
> >>  This is needed for temperature sensors which support rising/falling
> >> threshold interrupts and polling can be totally avoided.
> >>
> >
> >
> >> Signed-off-by: Amit Daniel Kachhap 
> >> Signed-off-by: Amit Daniel Kachhap 
> >> ---
> >>  drivers/thermal/step_wise.c |   19 +++
> >>  include/linux/thermal.h |2 ++
> >>  2 files changed, 17 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
> >> index 1242cff..0d2d8d6 100644
> >> --- a/drivers/thermal/step_wise.c
> >> +++ b/drivers/thermal/step_wise.c
> >> @@ -35,6 +35,10 @@
> >>   *   state for this trip point
> >>   *b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
> >>   *   state for this trip point
> >> + *c. if the trend is THERMAL_TREND_RAISE_FULL, use highest cooling
> >> + *   state for this trip point
> >> + *d. if the trend is THERMAL_TREND_DROP_FULL, use lowest cooling
> >> + *   state for this trip point
> >>   */
> >>  static unsigned long get_target_state(struct thermal_instance *instance,
> >>   enum thermal_trend trend)
> >> @@ -50,7 +54,10 @@ static unsigned long get_target_state(struct 
> >> thermal_instance *instance,
> >>   } else if (trend == THERMAL_TREND_DROPPING) {
> >>   cur_state = cur_state > instance->lower ?
> >>   (cur_state - 1) : instance->lower;
> >> - }
> >> + } else if (trend == THERMAL_TREND_RAISE_FULL)
> >> + cur_state = instance->upper;
> >> + else if (trend == THERMAL_TREND_DROP_FULL)
> >> + cur_state = instance->lower;
> >>
> >>   return cur_state;
> >>  }
> >> @@ -87,7 +94,8 @@ static void update_instance_for_throttle(struct 
> >> thermal_zone_device *tz,
> >>  }
> >>
> >>  static void update_instance_for_dethrottle(struct thermal_zone_device *tz,
> >> - int trip, enum thermal_trip_type trip_type)
> >> + int trip, enum thermal_trip_type trip_type,
> >> + enum thermal_trend trend)
> >>  {
> >>   struct thermal_instance *instance;
> >>   struct thermal_cooling_device *cdev;
> >> @@ -101,7 +109,10 @@ static void update_instance_for_dethrottle(struct 
> >> thermal_zone_device *tz,
> >>   cdev = instance->cdev;
> >>   cdev->ops->get_cur_state(cdev, &cur_state);
> >>
> >> - instance->target = cur_state > instance->lower ?
> >> + if (trend == THERMAL_TREND_DROP_FULL)
> >> + instance->target = instance->lower;
> >> + else
> >> + instance->target = cur_state > instance->lower ?
> >>   (cur_state - 1) : THERMAL_NO_TARGET;
> >>
> >>   /* Deactivate a passive thermal instance */
> >> @@ -133,7 +144,7 @@ static void thermal_zone_trip_update(struct 
> >> thermal_zone_device *tz, int trip)
> >>   if (tz->temperature >= trip_temp)
> >>   update_instance_for_throttle(tz, trip, trip_type, trend);
> >>   else
> >> - update_instance_for_dethrottle(tz, trip, trip_type);
> >> + update_instance_for_dethrottle(tz, trip, trip_type, trend);
> >>
> >>   mutex_unlock(&tz->lock);
> >>  }
> >> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> >> index 807f214..8bce3ec 100644
> >> --- a/include/linux/thermal.h
> >> +++ b/include/linux/thermal.h
> >> @@ -68,6 +68,8 @@ enum thermal_trend {
> >>   THERMAL_TREND_STABLE, /* temperature is stable */
> >>   THERMAL_TREND_RAISING, /* temperature is raising */
> >>   THERMAL_TREND_DROPPING, /* temperature is dropping */
> >> + THERMAL_TREND_RAISE_FULL, /* Apply highest cooling action*/
> >> + THERMAL_

Re: macbook pro 9.2 stat/ata bus error

2012-11-08 Thread Robert Hancock


On 11/06/2012 09:41 PM, Azat Khuzhin wrote:

  Anybody?

On Mon, Nov 5, 2012 at 7:28 PM, Azat Khuzhin  wrote:

After installing linux on macbook 9.2 (mid 2012), I have next errors
in dmesg log:

[  389.623828] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=600
[  410.038465] NMI watchdog: enabled on all CPUs, permanently consumes
one hw-PMU counter.
[  410.075042] ehci_hcd :00:1a.0: setting latency timer to 64
[  410.483526] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=0
[ 1401.834509] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=1800
[ 1406.467268] NMI watchdog: enabled on all CPUs, permanently consumes
one hw-PMU counter.
[ 1406.506769] ehci_hcd :00:1a.0: setting latency timer to 64
[ 1406.590122] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=0
[ 1407.492260] ata2.00: exception Emask 0x10 SAct 0x0 SErr 0x5
action 0xe frozen
[ 1407.494441] ata2.00: irq_stat 0x0040, PHY RDY changed
[ 1407.495238] ata2: SError: { PHYRdyChg CommWake }
[ 1407.496035] sr 1:0:0:0: CDB:
[ 1407.497333] Get event status notification: 4a 01 00 00 10 00 00 00 08 00
[ 1407.498285] ata2.00: cmd a0/00:00:00:08:00/00:00:00:00:00/a0 tag 0
pio 16392 in
[ 1407.498285]  res 50/00:03:00:00:00/00:00:00:00:00/a0 Emask
0x10 (ATA bus error)
[ 1407.501987] ata2.00: status: { DRDY }
[ 1407.502882] ata2: hard resetting link
[ 1408.230302] ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[ 1408.233279] ata2.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)
filtered out
[ 1408.237467] ata2.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)
filtered out
[ 1408.239084] ata2.00: configured for UDMA/100
[ 1408.262238] ata2: EH complete


Is this after a resume? It could be that for some reason the SATA link 
is a little bit unstable right after the machine powers up again. There 
may not be much the kernel can do about this..



[ 3565.785609] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=1800
[ 3576.921499] NMI watchdog: enabled on all CPUs, permanently consumes
one hw-PMU counter.
[ 3576.958624] ehci_hcd :00:1a.0: setting latency timer to 64
[ 3577.114612] EXT4-fs (sda4): re-mounted. Opts:
errors=remount-ro,data=ordered,commit=0
[ 3577.923688] ata2.00: exception Emask 0x10 SAct 0x0 SErr 0x5
action 0xe frozen
[ 3577.925852] ata2.00: irq_stat 0x0040, PHY RDY changed
[ 3577.926746] ata2: SError: { PHYRdyChg CommWake }
[ 3577.927544] sr 1:0:0:0: CDB:
[ 3577.928345] Get event status notification: 4a 01 00 00 10 00 00 00 08 00
[ 3577.929642] ata2.00: cmd a0/00:00:00:08:00/00:00:00:00:00/a0 tag 0
pio 16392 in
[ 3577.929642]  res 50/00:03:00:00:00/00:00:00:00:00/a0 Emask
0x10 (ATA bus error)
[ 3577.932954] ata2.00: status: { DRDY }
[ 3577.934264] ata2: hard resetting link
[ 3578.662228] ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[ 3578.665211] ata2.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)
filtered out
[ 3578.669355] ata2.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES)
filtered out
[ 3578.670969] ata2.00: configured for UDMA/100
[ 3578.694145] ata2: EH complete

Is it linux driver, or maybe

$ lspci # sata information only
00:1f.2 SATA controller: Intel Corporation 7 Series Chipset Family
6-port SATA Controller [AHCI mode] (rev 04) (prog-if 01 [AHCI 1.0])
 Subsystem: Intel Corporation Device 7270
 Flags: bus master, 66MHz, medium devsel, latency 0, IRQ 20
 I/O ports at 2098 [size=8]
 I/O ports at 20bc [size=4]
 I/O ports at 2090 [size=8]
 I/O ports at 20b8 [size=4]
 I/O ports at 2060 [size=32]
 Memory at a0816000 (32-bit, non-prefetchable) [size=2K]
 Capabilities: [80] MSI: Enable+ Count=1/1 Maskable- 64bit-
 Capabilities: [70] Power Management version 3
 Capabilities: [a8] SATA HBA v1.0
 Capabilities: [b0] PCI Advanced Features
 Kernel driver in use: ahci

$ uname -a
Linux macbook-pro 3.6.5macbook-pro-custom-v0.1 #4 SMP Sun Nov 4
12:39:03 UTC 2012 x86_64 GNU/Linux
$ cat /etc/debian_version
wheezy/sid

In OSX there is no errors with hard drive.

What else can I do investigate this situation next?

--
Azat Khuzhin






--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] i2c / ACPI: add ACPI enumeration support

2012-11-08 Thread Mika Westerberg

On Thu, Nov 08, 2012 at 06:58:47PM +, Grant Likely wrote:
> On Sat, Nov 3, 2012 at 7:46 AM, Mika Westerberg
>  wrote:
> > ACPI 5 introduced I2cSerialBus resource that makes it possible to enumerate
> > and configure the I2C slave devices behind the I2C controller. This patch
> > adds helper functions to support I2C slave enumeration.
> >
> > An ACPI enabled I2C controller driver only needs to call 
> > acpi_i2c_register_devices()
> > in order to get its slave devices enumerated, created and bound to the
> > corresponding ACPI handle.
> >
> > Signed-off-by: Mika Westerberg 
> > Acked-by: Rafael J. Wysocki 
> > ---
> >  drivers/acpi/Kconfig |6 ++
> >  drivers/acpi/Makefile|1 +
> >  drivers/acpi/acpi_i2c.c  |  234 
> > ++
> >  drivers/i2c/i2c-core.c   |9 ++
> >  include/linux/acpi_i2c.h |   29 ++
> >  5 files changed, 279 insertions(+)
> >  create mode 100644 drivers/acpi/acpi_i2c.c
> >  create mode 100644 include/linux/acpi_i2c.h
> >
> > diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> > index 119d58d..0300bf6 100644
> > --- a/drivers/acpi/Kconfig
> > +++ b/drivers/acpi/Kconfig
> > @@ -181,6 +181,12 @@ config ACPI_DOCK
> >   This driver supports ACPI-controlled docking stations and 
> > removable
> >   drive bays such as the IBM Ultrabay and the Dell Module Bay.
> >
> > +config ACPI_I2C
> > +   def_tristate I2C
> > +   depends on I2C
> > +   help
> > + ACPI I2C enumeration support.
> > +
> >  config ACPI_PROCESSOR
> > tristate "Processor"
> > select THERMAL
> > diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> > index a7badb5..8573346 100644
> > --- a/drivers/acpi/Makefile
> > +++ b/drivers/acpi/Makefile
> > @@ -69,6 +69,7 @@ obj-$(CONFIG_ACPI_HED)+= hed.o
> >  obj-$(CONFIG_ACPI_EC_DEBUGFS)  += ec_sys.o
> >  obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o
> >  obj-$(CONFIG_ACPI_BGRT)+= bgrt.o
> > +obj-$(CONFIG_ACPI_I2C) += acpi_i2c.o
> >
> >  # processor has its own "processor." module_param namespace
> >  processor-y:= processor_driver.o processor_throttling.o
> > diff --git a/drivers/acpi/acpi_i2c.c b/drivers/acpi/acpi_i2c.c
> > new file mode 100644
> > index 000..dc6997e
> > --- /dev/null
> > +++ b/drivers/acpi/acpi_i2c.c
> > @@ -0,0 +1,234 @@
> > +/*
> > + * ACPI I2C enumeration support
> > + *
> > + * Copyright (C) 2012, Intel Corporation
> > + * Author: Mika Westerberg 
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> > +struct acpi_i2c {
> > +   acpi_status (*callback)(struct acpi_device *, void *);
> > +   void *data;
> > +};
> > +
> > +static acpi_status acpi_i2c_enumerate_device(acpi_handle handle, u32 level,
> > +void *data, void 
> > **return_value)
> > +{
> > +   struct acpi_i2c *acpi_i2c = data;
> > +   struct acpi_device *adev;
> > +
> > +   if (acpi_bus_get_device(handle, &adev))
> > +   return AE_OK;
> > +   if (acpi_bus_get_status(adev) || !adev->status.present)
> > +   return AE_OK;
> > +
> > +   return acpi_i2c->callback(adev, acpi_i2c->data);
> > +}
> > +
> > +static acpi_status acpi_i2c_enumerate(acpi_handle handle,
> > +   acpi_status (*callback)(struct acpi_device *, void *), void *data)
> > +{
> > +   struct acpi_i2c acpi_i2c;
> > +
> > +   acpi_i2c.callback = callback;
> > +   acpi_i2c.data = data;
> > +
> > +   return acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
> > +  acpi_i2c_enumerate_device, NULL,
> > +  &acpi_i2c, NULL);
> > +}
> 
> Same comment here as for the SPI patch. The two levels of indirection
> is more convoluted than it needs to be. Can acpi_i2c_find_client and
> acpi_i2c_add_device be passed directly to acpi_walk_namespace?

Yes they can, I'll do that in the next version. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] spi / ACPI: add ACPI enumeration support

2012-11-08 Thread Mika Westerberg

On Thu, Nov 08, 2012 at 06:48:05PM +, Grant Likely wrote:
> On Sat, Nov 3, 2012 at 7:46 AM, Mika Westerberg
>  wrote:
> > ACPI 5 introduced SPISerialBus resource that allows us to enumerate and
> > configure the SPI slave devices behind the SPI controller. This patch adds
> > support for this to the SPI core.
> >
> > In addition we bind ACPI nodes to SPI devices. This makes it possible for
> > the slave drivers to get the ACPI handle for further configuration.
> >
> > Signed-off-by: Mika Westerberg 
> > Acked-by: Rafael J. Wysocki 
> > ---
> >  drivers/spi/spi.c |  231 
> > -
> >  1 file changed, 230 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
> > index 84c2861..de22a6e 100644
> > --- a/drivers/spi/spi.c
> > +++ b/drivers/spi/spi.c
> > @@ -35,6 +35,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  static void spidev_release(struct device *dev)
> >  {
> > @@ -93,6 +94,10 @@ static int spi_match_device(struct device *dev, struct 
> > device_driver *drv)
> > if (of_driver_match_device(dev, drv))
> > return 1;
> >
> > +   /* Then try ACPI */
> > +   if (acpi_driver_match_device(dev, drv))
> > +   return 1;
> > +
> > if (sdrv->id_table)
> > return !!spi_match_id(sdrv->id_table, spi);
> >
> > @@ -888,6 +893,227 @@ static void of_register_spi_devices(struct spi_master 
> > *master)
> >  static void of_register_spi_devices(struct spi_master *master) { }
> >  #endif
> >
> > +#ifdef CONFIG_ACPI
> > +struct acpi_spi {
> > +   acpi_status (*callback)(struct acpi_device *, void *);
> > +   void *data;
> > +};
> > +
> > +static acpi_status acpi_spi_enumerate_device(acpi_handle handle, u32 level,
> > +void *data, void 
> > **return_value)
> > +{
> > +   struct acpi_spi *acpi_spi = data;
> > +   struct acpi_device *adev;
> > +
> > +   if (acpi_bus_get_device(handle, &adev))
> > +   return AE_OK;
> > +   if (acpi_bus_get_status(adev) || !adev->status.present)
> > +   return AE_OK;
> > +
> > +   return acpi_spi->callback(adev, acpi_spi->data);
> > +}
> > +
> > +static acpi_status acpi_spi_enumerate(acpi_handle handle,
> > +   acpi_status (*callback)(struct acpi_device *, void *), void *data)
> > +{
> > +   struct acpi_spi acpi_spi;
> > +
> > +   acpi_spi.callback = callback;
> > +   acpi_spi.data = data;
> > +
> > +   return acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
> > +  acpi_spi_enumerate_device, NULL,
> > +  &acpi_spi, NULL);
> > +}
> 
> >From my reading of this, the block causes 2 levels of callback
> indirection. First to either acpi_spi_find_child or
> acpi_spi_add_device and second to acpi_spi_enumerate_device. All to
> share about 4 lines of code in acpi_spi_enumerate_device. It took me a
> while to unravel it. I think acpi_spi_find_child and
> acpi_spi_add_device should be passed directly to acpi_walk_namespace.
> Is there anything that prevents that?

No, I'll fix that up in the next version of the series.

> I also agree with the discussion that the actual parsing code for the
> resources should be common,. Retrieving things like IRQs and address
> resources should be function calls into ACPI helpers instead of open
> coding it in the spi core code.

We are working on that and I'm hoping the second version will use the
resources as provided by the ACPI core instead of calling _CRS directly
here.

> Otherwise the patch looks sane to me.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: build warning after merge of the akpm tree

2012-11-08 Thread Stephen Rothwell

Hi Andrew,

After merging the akpm tree, today's linux-next build (powerpc
allnoconfig - among others) produced this warning:

drivers/of/fdt.c: In function 'of_scan_flat_dt':
drivers/of/fdt.c:490:10: warning: assignment discards 'const' qualifier from 
pointer target type [enabled by default]

Introduced by commit "drivers/of/fdt.c: re-use kernel's kbasename()".

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpFuoOGL6PFo.pgp
Description: PGP signature

linux-next: build failure after merge of the akpm tree

2012-11-08 Thread Stephen Rothwell

Hi Andrew,

After merging the akpm tree, today's linux-next build (powerpc_ppc64_defconfig)
failed like this:

mm/mmap.c: In function 'SYSC_mmap_pgoff':
mm/mmap.c:1271:15: error: 'MAP_HUGE_SHIFT' undeclared (first use in this 
function)
mm/mmap.c:1271:15: note: each undeclared identifier is reported only once for 
each function it appears in
mm/mmap.c:1271:33: error: 'MAP_HUGE_MASK' undeclared (first use in this 
function)

Caused by commit 160b36937ca2 ("mm: support more pagesizes for
MAP_HUGETLB/SHM_HUGETLB").

I have reverted that commit for today (along with the two fix patches).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgp4IxG2Y94ER.pgp
Description: PGP signature

Re: [PATCH v6 28/29] slub: slub-specific propagation changes.

2012-11-08 Thread Sasha Levin

On 11/08/2012 01:51 AM, Glauber Costa wrote:
> On 11/07/2012 04:53 PM, Sasha Levin wrote:
>> On 11/01/2012 08:07 AM, Glauber Costa wrote:
>>> SLUB allows us to tune a particular cache behavior with sysfs-based
>>> tunables.  When creating a new memcg cache copy, we'd like to preserve
>>> any tunables the parent cache already had.
>>>
>>> This can be done by tapping into the store attribute function provided
>>> by the allocator. We of course don't need to mess with read-only
>>> fields. Since the attributes can have multiple types and are stored
>>> internally by sysfs, the best strategy is to issue a ->show() in the
>>> root cache, and then ->store() in the memcg cache.
>>>
>>> The drawback of that, is that sysfs can allocate up to a page in
>>> buffering for show(), that we are likely not to need, but also can't
>>> guarantee. To avoid always allocating a page for that, we can update the
>>> caches at store time with the maximum attribute size ever stored to the
>>> root cache. We will then get a buffer big enough to hold it. The
>>> corolary to this, is that if no stores happened, nothing will be
>>> propagated.
>>>
>>> It can also happen that a root cache has its tunables updated during
>>> normal system operation. In this case, we will propagate the change to
>>> all caches that are already active.
>>>
>>> Signed-off-by: Glauber Costa 
>>> CC: Christoph Lameter 
>>> CC: Pekka Enberg 
>>> CC: Michal Hocko 
>>> CC: Kamezawa Hiroyuki 
>>> CC: Johannes Weiner 
>>> CC: Suleiman Souhlal 
>>> CC: Tejun Heo 
>>> ---
>>
>> Hi guys,
>>
>> This patch is making lockdep angry! *bark bark*
>>
>> [  351.935003] ==
>> [  351.937693] [ INFO: possible circular locking dependency detected ]
>> [  351.939720] 3.7.0-rc4-next-20121106-sasha-8-g353b62f #117 Tainted: G  
>>   W
>> [  351.942444] ---
>> [  351.943528] trinity-child13/6961 is trying to acquire lock:
>> [  351.943528]  (s_active#43){.+}, at: [] 
>> sysfs_addrm_finish+0x31/0x60
>> [  351.943528]
>> [  351.943528] but task is already holding lock:
>> [  351.943528]  (slab_mutex){+.+.+.}, at: [] 
>> kmem_cache_destroy+0x22/0xe0
>> [  351.943528]
>> [  351.943528] which lock already depends on the new lock.
>> [  351.943528]
>> [  351.943528]
>> [  351.943528] the existing dependency chain (in reverse order) is:
>> [  351.943528]
>> -> #1 (slab_mutex){+.+.+.}:
>> [  351.960334][] lock_acquire+0x1aa/0x240
>> [  351.960334][] __mutex_lock_common+0x59/0x5a0
>> [  351.960334][] mutex_lock_nested+0x3f/0x50
>> [  351.960334][] slab_attr_store+0xde/0x110
>> [  351.960334][] sysfs_write_file+0xfa/0x150
>> [  351.960334][] vfs_write+0xb0/0x180
>> [  351.960334][] sys_pwrite64+0x60/0xb0
>> [  351.960334][] tracesys+0xe1/0xe6
>> [  351.960334]
>> -> #0 (s_active#43){.+}:
>> [  351.960334][] __lock_acquire+0x14df/0x1ca0
>> [  351.960334][] lock_acquire+0x1aa/0x240
>> [  351.960334][] sysfs_deactivate+0x122/0x1a0
>> [  351.960334][] sysfs_addrm_finish+0x31/0x60
>> [  351.960334][] sysfs_remove_dir+0x89/0xd0
>> [  351.960334][] kobject_del+0x16/0x40
>> [  351.960334][] __kmem_cache_shutdown+0x40/0x60
>> [  351.960334][] kmem_cache_destroy+0x40/0xe0
>> [  351.960334][] mon_text_release+0x78/0xe0
>> [  351.960334][] __fput+0x122/0x2d0
>> [  351.960334][] fput+0x9/0x10
>> [  351.960334][] task_work_run+0xbe/0x100
>> [  351.960334][] do_exit+0x432/0xbd0
>> [  351.960334][] do_group_exit+0x84/0xd0
>> [  351.960334][] get_signal_to_deliver+0x81d/0x930
>> [  351.960334][] do_signal+0x3a/0x950
>> [  351.960334][] do_notify_resume+0x3e/0x90
>> [  351.960334][] int_signal+0x12/0x17
>> [  351.960334]
>> [  351.960334] other info that might help us debug this:
>> [  351.960334]
>> [  351.960334]  Possible unsafe locking scenario:
>> [  351.960334]
>> [  351.960334]CPU0CPU1
>> [  351.960334]
>> [  351.960334]   lock(slab_mutex);
>> [  351.960334]lock(s_active#43);
>> [  351.960334]lock(slab_mutex);
>> [  351.960334]   lock(s_active#43);
>> [  351.960334]
>> [  351.960334]  *** DEADLOCK ***
>> [  351.960334]
>> [  351.960334] 2 locks held by trinity-child13/6961:
>> [  351.960334]  #0:  (mon_lock){+.+.+.}, at: [] 
>> mon_text_release+0x25/0xe0
>> [  351.960334]  #1:  (slab_mutex){+.+.+.}, at: [] 
>> kmem_cache_destroy+0x22/0xe0
>> [  351.960334]
>> [  351.960334] stack backtrace:
>> [  351.960334] Pid: 6961, comm: trinity-child13 Tainted: GW
>> 3.7.0-rc4-next-20121106-sasha-8-g353b62f #117
>> [  351.960334] Call Trace:
>> [  351.960334]  [] print_circular_bug+0x1fb/0x20c
>> [  351.960334]  [] __lock_acquire+0x14df/0x1ca0
>> [

Re: [PATCH] tcp: Avoid infinite loop on recvmsg bug

2012-11-08 Thread Eric Dumazet

On Wed, 2012-11-07 at 18:25 -0800, Julius Werner wrote:
> > So you probably are fighting a bug we already fixed in upstream kernel.
> >
> > (commit c8628155ece363 "tcp: reduce out_of_order memory use" did not
> > played well with cloned skbs.)
> >
> > This issue was already discussed on netdev in the past.
> 
> Thanks for the hint. Unfortunately, we have not pulled c8628 into our
> tree yet, so that's not it. Is there another point where the cloned
> skb or the faked truesize might make it break? We have been running
> this test with that hardware some 30 times in the last months and only
> seen it once, so it cannot be that common.

Update : Chrome OS current tree is based on 3.4 and really needed the
patch :

https://gerrit.chromium.org/gerrit/#/c/37666/



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] firmware loader: Fix the concurrent request_firmware() race for kref_get/put

2012-11-08 Thread Chuansheng Liu


There is one race that both request_firmware() with the same
firmware name.

The race scenerio is as below:
CPU1  CPU2
request_firmware() -->
_request_firmware_load() return err   another 
request_firmware() is coming -->
_request_firmware_cleanup is called -->   _request_firmware_prepare 
-->
release_firmware ---> 
fw_lookup_and_allocate_buf -->
  spin_lock(&fwc->lock)
...   __fw_lookup_buf() return 
true
fw_free_buf() will be called -->  ...
kref_put -->
decrease the refcount to 0
  kref_get(&tmp->ref) ==> 
it will trigger warning
  
due to refcount == 0
__fw_free_buf() -->
...   spin_unlock(&fwc->lock)
spin_lock(&fwc->lock)
list_del(&buf->list)
spin_unlock(&fwc->lock)
kfree(buf)
  After that, the freed buf 
will be used.

The key race is decreasing refcount to 0 and list_del is not protected together 
by
fwc->lock, and it is possible another thread try to get it between refcount==0
and list_del.

Fix it here to protect it together.

Signed-off-by: liu chuansheng 
---
 drivers/base/firmware_class.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b44ed35..7df32cd 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -246,7 +246,6 @@ static void __fw_free_buf(struct kref *ref)
 __func__, buf->fw_id, buf, buf->data,
 (unsigned int)buf->size);
 
-   spin_lock(&fwc->lock);
list_del(&buf->list);
spin_unlock(&fwc->lock);
 
@@ -263,7 +262,10 @@ static void __fw_free_buf(struct kref *ref)
 
 static void fw_free_buf(struct firmware_buf *buf)
 {
-   kref_put(&buf->ref, __fw_free_buf);
+   struct firmware_cache *fwc = buf->fwc;
+   spin_lock(&fwc->lock);
+   if(!kref_put(&buf->ref, __fw_free_buf))
+   spin_unlock(&fwc->lock);
 }
 
 /* direct firmware loading support */
-- 
1.7.0.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RESEND v2 1/1] percpu_rw_semaphore: reimplement to not block the readers unnecessarily

2012-11-08 Thread Paul E. McKenney

On Thu, Nov 08, 2012 at 04:41:36PM -0800, Paul E. McKenney wrote:
> On Thu, Nov 08, 2012 at 06:41:10PM -0500, Mikulas Patocka wrote:
> > 
> > 
> > On Thu, 8 Nov 2012, Paul E. McKenney wrote:
> > 
> > > On Thu, Nov 08, 2012 at 12:07:00PM -0800, Andrew Morton wrote:
> > > > On Thu, 8 Nov 2012 14:48:49 +0100
> > > > Oleg Nesterov  wrote:
> > > > 
> > > > > Currently the writer does msleep() plus synchronize_sched() 3 times
> > > > > to acquire/release the semaphore, and during this time the readers
> > > > > are blocked completely. Even if the "write" section was not actually
> > > > > started or if it was already finished.
> > > > > 
> > > > > With this patch down_write/up_write does synchronize_sched() twice
> > > > > and down_read/up_read are still possible during this time, just they
> > > > > use the slow path.
> > > > > 
> > > > > percpu_down_write() first forces the readers to use rw_semaphore and
> > > > > increment the "slow" counter to take the lock for reading, then it
> > > > > takes that rw_semaphore for writing and blocks the readers.
> > > > > 
> > > > > Also. With this patch the code relies on the documented behaviour of
> > > > > synchronize_sched(), it doesn't try to pair synchronize_sched() with
> > > > > barrier.
> > > > > 
> > > > > ...
> > > > >
> > > > >  include/linux/percpu-rwsem.h |   83 +
> > > > >  lib/Makefile |2 +-
> > > > >  lib/percpu-rwsem.c   |  123 
> > > > > ++
> > > > 
> > > > The patch also uninlines everything.
> > > > 
> > > > And it didn't export the resulting symbols to modules, so it isn't an
> > > > equivalent.  We can export thing later if needed I guess.
> > > > 
> > > > It adds percpu-rwsem.o to lib-y, so the CONFIG_BLOCK=n kernel will
> > > > avoid including the code altogether, methinks?
> > > > 
> > > > >
> > > > > ...
> > > > >
> > > > > --- /dev/null
> > > > > +++ b/lib/percpu-rwsem.c
> > > > > @@ -0,0 +1,123 @@
> > > > 
> > > > That was nice and terse ;)
> > > > 
> > > > > +#include 
> > > > > +#include 
> > > > > +#include 
> > > > 
> > > > This list is nowhere near sufficient to support this file's
> > > > requirements.  atomic.h, percpu.h, rwsem.h, wait.h, errno.h and plenty
> > > > more.  IOW, if it compiles, it was sheer luck.
> > > > 
> > > > > +int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
> > > > > +{
> > > > > + brw->fast_read_ctr = alloc_percpu(int);
> > > > > + if (unlikely(!brw->fast_read_ctr))
> > > > > + return -ENOMEM;
> > > > > +
> > > > > + mutex_init(&brw->writer_mutex);
> > > > > + init_rwsem(&brw->rw_sem);
> > > > > + atomic_set(&brw->slow_read_ctr, 0);
> > > > > + init_waitqueue_head(&brw->write_waitq);
> > > > > + return 0;
> > > > > +}
> > > > > +
> > > > > +void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
> > > > > +{
> > > > > + free_percpu(brw->fast_read_ctr);
> > > > > + brw->fast_read_ctr = NULL; /* catch use after free bugs */
> > > > > +}
> > > > > +
> > > > > +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, 
> > > > > unsigned int val)
> > > > > +{
> > > > > + bool success = false;
> > > > > +
> > > > > + preempt_disable();
> > > > > + if (likely(!mutex_is_locked(&brw->writer_mutex))) {
> > > > > + __this_cpu_add(*brw->fast_read_ctr, val);
> > > > > + success = true;
> > > > > + }
> > > > > + preempt_enable();
> > > > > +
> > > > > + return success;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * Like the normal down_read() this is not recursive, the writer can
> > > > > + * come after the first percpu_down_read() and create the deadlock.
> > > > > + */
> > > > > +void percpu_down_read(struct percpu_rw_semaphore *brw)
> > > > > +{
> > > > > + if (likely(update_fast_ctr(brw, +1)))
> > > > > + return;
> > > > > +
> > > > > + down_read(&brw->rw_sem);
> > > > > + atomic_inc(&brw->slow_read_ctr);
> > > > > + up_read(&brw->rw_sem);
> > > > > +}
> > > > > +
> > > > > +void percpu_up_read(struct percpu_rw_semaphore *brw)
> > > > > +{
> > > > > + if (likely(update_fast_ctr(brw, -1)))
> > > > > + return;
> > > > > +
> > > > > + /* false-positive is possible but harmless */
> > > > > + if (atomic_dec_and_test(&brw->slow_read_ctr))
> > > > > + wake_up_all(&brw->write_waitq);
> > > > > +}
> > > > > +
> > > > > +static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
> > > > > +{
> > > > > + unsigned int sum = 0;
> > > > > + int cpu;
> > > > > +
> > > > > + for_each_possible_cpu(cpu) {
> > > > > + sum += per_cpu(*brw->fast_read_ctr, cpu);
> > > > > + per_cpu(*brw->fast_read_ctr, cpu) = 0;
> > > > > + }
> > > > > +
> > > > > + return sum;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * A writer takes ->writer_mutex to exclude other writers and to 
> > > > > force the
> > > > > + * readers to switch to the

[PATCH] staging/vme: Use dev_ or pr_ printks in devices/vme_user.c

2012-11-08 Thread YAMANE Toshiaki

fixed below checkpatch warnings.
- WARNING: Prefer netdev_err(netdev, ... then dev_err(dev, ... then pr_err(...  
to printk(KERN_ERR ...
- WARNING: Prefer netdev_info(netdev, ... then dev_info(dev, ... then 
pr_info(...  to printk(KERN_INFO ...
- WARNING: Prefer netdev_dbg(netdev, ... then dev_dbg(dev, ... then 
pr_debug(...  to printk(KERN_DEBUG ...
- WARNING: Prefer netdev_warn(netdev, ... then dev_warn(dev, ... then 
pr_warn(...  to printk(KERN_WARNING ...

and add pr_fmt.

Signed-off-by: YAMANE Toshiaki 
---
 drivers/staging/vme/devices/vme_user.c |   73 +++-
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/drivers/staging/vme/devices/vme_user.c 
b/drivers/staging/vme/devices/vme_user.c
index c3f94f3..e3731eb 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -15,6 +15,8 @@
  * option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include 
 #include 
 #include 
@@ -170,7 +172,7 @@ static int vme_user_open(struct inode *inode, struct file 
*file)
mutex_lock(&image[minor].mutex);
/* Allow device to be opened if a resource is needed and allocated. */
if (minor < CONTROL_MINOR && image[minor].resource == NULL) {
-   printk(KERN_ERR "No resources allocated for device\n");
+   pr_err("No resources allocated for device\n");
err = -EINVAL;
goto err_res;
}
@@ -225,13 +227,13 @@ static ssize_t resource_to_user(int minor, char __user 
*buf, size_t count,
(unsigned long)copied);
if (retval != 0) {
copied = (copied - retval);
-   printk(KERN_INFO "User copy failed\n");
+   pr_info("User copy failed\n");
return -EINVAL;
}
 
} else {
/* XXX Need to write this */
-   printk(KERN_INFO "Currently don't support large transfers\n");
+   pr_info("Currently don't support large transfers\n");
/* Map in pages from userspace */
 
/* Call vme_master_read to do the transfer */
@@ -265,7 +267,7 @@ static ssize_t resource_from_user(unsigned int minor, const 
char __user *buf,
image[minor].kern_buf, copied, *ppos);
} else {
/* XXX Need to write this */
-   printk(KERN_INFO "Currently don't support large transfers\n");
+   pr_info("Currently don't support large transfers\n");
/* Map in pages from userspace */
 
/* Call vme_master_write to do the transfer */
@@ -286,7 +288,7 @@ static ssize_t buffer_to_user(unsigned int minor, char 
__user *buf,
retval = __copy_to_user(buf, image_ptr, (unsigned long)count);
if (retval != 0) {
retval = (count - retval);
-   printk(KERN_WARNING "Partial copy to userspace\n");
+   pr_warn("Partial copy to userspace\n");
} else
retval = count;
 
@@ -305,7 +307,7 @@ static ssize_t buffer_from_user(unsigned int minor, const 
char __user *buf,
retval = __copy_from_user(image_ptr, buf, (unsigned long)count);
if (retval != 0) {
retval = (count - retval);
-   printk(KERN_WARNING "Partial copy to userspace\n");
+   pr_warn("Partial copy to userspace\n");
} else
retval = count;
 
@@ -476,7 +478,7 @@ static int vme_user_ioctl(struct inode *inode, struct file 
*file,
copied = copy_from_user(&irq_req, argp,
sizeof(struct vme_irq_id));
if (copied != 0) {
-   printk(KERN_WARNING "Partial copy from 
userspace\n");
+   pr_warn("Partial copy from userspace\n");
return -EFAULT;
}
 
@@ -503,8 +505,7 @@ static int vme_user_ioctl(struct inode *inode, struct file 
*file,
copied = copy_to_user(argp, &master,
sizeof(struct vme_master));
if (copied != 0) {
-   printk(KERN_WARNING "Partial copy to "
-   "userspace\n");
+   pr_warn("Partial copy to userspace\n");
return -EFAULT;
}
 
@@ -515,8 +516,7 @@ static int vme_user_ioctl(struct inode *inode, struct file 
*file,
 
copied = copy_from_user(&master, argp, sizeof(master));
if (copied != 0) {
-   printk(KERN_WARNING "Partial copy from "
-   "userspace\n");
+   pr_warn("Partial copy from userspace\n");

Re: [PATCH 1/3] rtc: rtc-spear: Use devm_*() routines

2012-11-08 Thread Viresh Kumar

On 8 November 2012 20:07, Viresh Kumar  wrote:
> This patch frees rtc-spear driver from tension of freeing resources :)
> devm_* derivatives of multiple routines are used while allocating resources,
> which would be freed automatically by kernel.
>
> Signed-off-by: Viresh Kumar 

Hi Andrew,

Can you please apply this fixup to this patch?

commit ad4a753df699e525d9c91a353f4406fd0e363599
Author: Viresh Kumar 
Date:   Fri Nov 9 08:36:29 2012 +0530

fixup! rtc: rtc-spear: Use devm_*() routines
---
 drivers/rtc/rtc-spear.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index 0c4805c..141fc94 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -363,11 +363,6 @@ static int __devinit spear_rtc_probe(struct
platform_device *pdev)
dev_err(&pdev->dev, "no resource defined\n");
return -EBUSY;
}
-   if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-   pdev->name)) {
-   dev_err(&pdev->dev, "rtc region already claimed\n");
-   return -EBUSY;
-   }

config = devm_kzalloc(&pdev->dev, sizeof(*config), GFP_KERNEL);
if (!config) {
@@ -390,10 +385,9 @@ static int __devinit spear_rtc_probe(struct
platform_device *pdev)
return status;
}

-   config->ioaddr = devm_ioremap(&pdev->dev, res->start,
-   resource_size(res));
+   config->ioaddr = devm_request_and_ioremap(&pdev->dev, res);
if (!config->ioaddr) {
-   dev_err(&pdev->dev, "ioremap fail\n");
+   dev_err(&pdev->dev, "request-ioremap fail\n");
return -ENOMEM;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] input: spear-keyboard: Use devm_*() routines

2012-11-08 Thread Viresh Kumar

On 9 November 2012 08:06, Viresh Kumar  wrote:
> On 8 November 2012 22:08, Dmitry Torokhov  wrote:
>> There is devm_request_and_ioremap() which you can use here.
>
> Should have been done in V1 only.

Hi Dmitry,

Please apply below fixup to original patch:

x-x

From: Viresh Kumar 
Date: Fri, 9 Nov 2012 08:22:28 +0530
Subject: [PATCH] fixup! input: spear-keyboard: Use devm_*() routines

---
 drivers/input/keyboard/spear-keyboard.c | 41 -
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/input/keyboard/spear-keyboard.c
b/drivers/input/keyboard/spear-keyboard.c
index b8784df..25e0a3b 100644
--- a/drivers/input/keyboard/spear-keyboard.c
+++ b/drivers/input/keyboard/spear-keyboard.c
@@ -55,7 +55,6 @@

 struct spear_kbd {
struct input_dev *input;
-   struct resource *res;
void __iomem *io_base;
struct clk *clk;
unsigned int irq;
@@ -205,11 +204,15 @@ static int __devinit spear_kbd_probe(struct
platform_device *pdev)
}

kbd = devm_kzalloc(&pdev->dev, sizeof(*kbd), GFP_KERNEL);
-   input_dev = input_allocate_device();
-   if (!kbd || !input_dev) {
+   if (!kbd) {
+   dev_err(&pdev->dev, "out of memory\n");
+   return -ENOMEM;
+   }
+
+   input_dev = devm_input_allocate_device(&pdev->dev);
+   if (!input_dev) {
dev_err(&pdev->dev, "out of memory\n");
-   error = -ENOMEM;
-   goto err_free_mem;
+   return -ENOMEM;
}

kbd->input = input_dev;
@@ -218,41 +221,29 @@ static int __devinit spear_kbd_probe(struct
platform_device *pdev)
if (!pdata) {
error = spear_kbd_parse_dt(pdev, kbd);
if (error)
-   goto err_free_mem;
+   return error;
} else {
kbd->mode = pdata->mode;
kbd->rep = pdata->rep;
kbd->suspended_rate = pdata->suspended_rate;
}

-   kbd->res = devm_request_mem_region(&pdev->dev, res->start,
-   resource_size(res), pdev->name);
-   if (!kbd->res) {
-   dev_err(&pdev->dev, "keyboard region already claimed\n");
-   error = -EBUSY;
-   goto err_free_mem;
-   }
-
-   kbd->io_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+   kbd->io_base = devm_request_and_ioremap(&pdev->dev, res);
if (!kbd->io_base) {
-   dev_err(&pdev->dev, "ioremap failed for kbd_region\n");
-   error = -ENOMEM;
-   goto err_free_mem;
+   dev_err(&pdev->dev, "request-ioremap failed for kbd_region\n");
+   return -ENOMEM;
}

kbd->clk = devm_clk_get(&pdev->dev, NULL);
-   if (IS_ERR(kbd->clk)) {
-   error = PTR_ERR(kbd->clk);
-   goto err_free_mem;
-   }
+   if (IS_ERR(kbd->clk))
+   return PTR_ERR(kbd->clk);

error = clk_prepare(kbd->clk);
if (error)
-   goto err_free_mem;
+   return error;

input_dev->name = "Spear Keyboard";
input_dev->phys = "keyboard/input0";
-   input_dev->dev.parent = &pdev->dev;
input_dev->id.bustype = BUS_HOST;
input_dev->id.vendor = 0x0001;
input_dev->id.product = 0x0001;
@@ -293,8 +284,6 @@ static int __devinit spear_kbd_probe(struct
platform_device *pdev)

 err_unprepare_clk:
clk_unprepare(kbd->clk);
-err_free_mem:
-   input_free_device(input_dev);

return error;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [BUGFIX] PM: Fix active child counting when disabled and forbidden

2012-11-08 Thread Huang Ying

On Thu, 2012-11-08 at 12:07 -0500, Alan Stern wrote:
> On Thu, 8 Nov 2012, Rafael J. Wysocki wrote:
> 
> > > > > is it a good idea to allow to set device state to SUSPENDED if the 
> > > > > device
> > > > > is disabled?
> > > > 
> > > > No, it is not.  The status should always be ACTIVE as long as 
> > > > usage_count > 0.
> 
> That isn't strictly true, because pm_runtime_get_noresume violates this
> rule.  What the PM core actually does is prevent a transition from the
> ACTIVE state to the SUSPENDING/SUSPENDED state if usage_count > 0,
> _provided_ runtime PM is enabled.  There's no such restriction when it
> is disabled.

Usage count may be not a issue for the end user.  But "on" in "control"
sysfs file + SUSPENDED can be confusing for the end user.  Maybe we need
to check dev->power.runtime_auto in pm_runtime_set_suspended().

> BTW, do we need to think about what happens in the case where the
> device _does_ have a driver and for some reason the driver has disabled
> the device for runtime PM?  I would just as soon ignore the issue.
> 
> > > > However, in some cases we actually would like to change the status to
> > > > SUSPENDED when usage_count becomes equal to 0, because that means we can
> > > > suspend (I mean really suspend) the parents of the devices in question
> > > > (and we want to notify the parents in those cases).
> > > 
> > > So do you think Alan Stern's suggestion about forbidden and disabled is
> > > the right way to go?
> > 
> > I'm not really sure about that.
> > 
> > My original idea was that the runtime PM status and usage counter would
> > only matter when runtime PM of a device was enabled.  That leads to
> > problems, though, when we enable runtime PM of a device whose usage
> > counter is greater from zero and status is SUSPENDED.
> 
> That doesn't seem to be a problem.  It can arise without disabling
> runtime PM at all -- just call pm_runtime_get_noresume.

I think pm_runtime_get_noresume can not fix the issue.
pm_runtiem_set_active() should be invoked before pm_runtime_enable() if
necessary.  That is, the invoker should be responsible for the
consistence between usage_count and SUSPENDED/ACTIVE status.  And the
API may be a little low level and error-prone to the invoker (mainly bus
code).

Best Regards,
Huang Ying

> >  Also when the
> > device's status is ACTIVE, but its parent's child count is 0.
> 
> __pm_runtime_set_status prevents this situation from arising.  When the 
> device's status is set to ACTIVE, the parent's child count is 
> incremented.  So this isn't a problem either.
> 
> > It's not very easy to fix this at the core level, though, because we
> > depend on the current behavior in some places.  I'm thinking that
> > perhaps pm_runtime_enable() should just WARN() if things are obviously
> > inconsistent (although there still may be problems, for example, if the
> > parent's child count is 2 when we enable runtime PM for its child, but that
> > child is the only one it actually has).
> 
> I think we should continue the original strategy of ignoring the status
> and usage counter when runtime PM is disabled.  This is definitely the
> easiest and most straightforward approach.  Fixing the problem at hand
> (VGA controllers) by changing the PCI subsystem seems like the simplest
> solution.
> 
> Your revised patch does do the job, except for a few problems.  
> Namely, while local_pci_probe() and pci_device_remove() are running,
> the device _does_ have a driver.  This means that local_pci_probe()
> should not call pm_runtime_get_sync(), for example.  Doing so would
> invoke the driver's runtime_resume routine before calling the driver's
> probe routine!
> 
> The USB subsystem solves this problem by carefully keeping track of the 
> state of the device-driver binding:
> 
>   Originally the device is UNBOUND.
> 
>   At the start of the subsystem's probe routine, the state
>   changes to BINDING.
> 
>   If the probe succeeds then it changes to BOUND; otherwise
>   it goes back to UNBOUND.
> 
>   At the start of the subsystem's remove routine, the state
>   changes to UNBINDING.  At the end it goes to UNBOUND.
> 
> When the state is anything other than BOUND, the subsystem's runtime PM 
> routines act as though there is no driver.  This works because the 
> subsystem makes sure that the device is ACTIVE with a nonzero usage 
> count before calling the driver's probe or remove routine, so no 
> runtime PM callbacks can occur at these awkward times.
> 
> If PCI adopted this strategy then your new patch would work okay.  I 
> think -- I haven't checked it thoroughly.
> 
> Alan Stern
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv2] mm: Fix calculation of dirtyable memory

2012-11-08 Thread Fengguang Wu

On Thu, Nov 08, 2012 at 04:52:33PM -0800, Sonny Rao wrote:
> The system uses global_dirtyable_memory() to calculate
> number of dirtyable pages/pages that can be allocated
> to the page cache.  A bug causes an underflow thus making
> the page count look like a big unsigned number.  This in turn
> confuses the dirty writeback throttling to aggressively write
> back pages as they become dirty (usually 1 page at a time).
> 
> Fix is to ensure there is no underflow while doing the math.

Good catch, thanks!

> Signed-off-by: Sonny Rao 
> Signed-off-by: Puneet Kumar 
> ---
>  v2: added apkm's suggestion to make the highmem calculation better
>  mm/page-writeback.c |   17 +++--
>  1 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 830893b..ce62442 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -201,6 +201,18 @@ static unsigned long highmem_dirtyable_memory(unsigned 
> long total)
>zone_reclaimable_pages(z) - z->dirty_balance_reserve;
>   }
>   /*
> +  * Unreclaimable memory (kernel memory or anonymous memory
> +  * without swap) can bring down the dirtyable pages below
> +  * the zone's dirty balance reserve and the above calculation
> +  * will underflow.  However we still want to add in nodes
> +  * which are below threshold (negative values) to get a more
> +  * accurate calculation but make sure that the total never
> +  * underflows.
> +  */
> + if ((long)x < 0)
> + x = 0;
> +
> + /*
>* Make sure that the number of highmem pages is never larger
>* than the number of the total dirtyable memory. This can only
>* occur in very strange VM situations but we want to make sure
> @@ -222,8 +234,9 @@ static unsigned long global_dirtyable_memory(void)
>  {
>   unsigned long x;
>  
> - x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
> - dirty_balance_reserve;
> + x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
> + if (x >= dirty_balance_reserve)
> + x -= dirty_balance_reserve;

That can be converted to "if ((long)x < 0) x = 0;", too.

And I suspect zone_dirtyable_memory() needs similar fix, too.

Thanks,
Fengguang
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] input: spear-keyboard: Use devm_*() routines

2012-11-08 Thread Viresh Kumar

On 8 November 2012 22:08, Dmitry Torokhov  wrote:
> On Thu, Nov 08, 2012 at 07:10:47PM +0530, Viresh Kumar wrote:
> It also breaks the error unwinding/removal of the driver as it frees
> input device while IRQ handler is still active.

I have heard of this argument before, probably from you. :)
Just need clarification again. How will we get an interrupt when the controller
is stopped, unless we have a shared irq.

> I will push the patch which implements devm_input_allocate_device() to
> my 'next' branch in a few, please use it because it will make sure input
> device will stick around long enough.

Will surely use that.

>> + kbd->io_base = devm_ioremap(&pdev->dev, res->start, 
>> resource_size(res));
>
> There is devm_request_and_ioremap() which you can use here.

Should have been done in V1 only.

--
viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] kfifo: round up the fifo size power of 2

2012-11-08 Thread Yuanhan Liu

On Thu, Nov 08, 2012 at 01:37:15PM +0100, Stefani Seibold wrote:
> Am Donnerstag, den 08.11.2012, 20:24 +0800 schrieb Yuanhan Liu:
> > On Tue, Oct 30, 2012 at 11:52:10PM -0700, Andrew Morton wrote:
> > > On Wed, 31 Oct 2012 07:30:33 +0100 Stefani Seibold  
> > > wrote:
> > > 
> > > > > Yes, and I guess the same to give them a 64-element one.
> > > > > 
> > > > > > 
> > > > > > If there's absolutely no prospect that the kfifo code will ever 
> > > > > > support
> > > > > > 100-byte fifos then I guess we should rework the API so that the 
> > > > > > caller
> > > > > > has to pass in log2 of the size, not the size itself.  That way 
> > > > > > there
> > > > > > will be no surprises and no mistakes.
> > > > > > 
> > > > > > That being said, the power-of-2 limitation isn't at all intrinsic 
> > > > > > to a
> > > > > > fifo, so we shouldn't do this.  Ideally, we'd change the kfifo
> > > > > > implementation so it does what the caller asked it to do!
> > > > > 
> > > > > I'm fine with removing the power-of-2 limitation. Stefani, what's your
> > > > > comment on that?
> > > > > 
> > > > 
> > > > You can't remove the power-of-2-limitation, since this would result in a
> > > > performance decrease (bit wise and vs. modulo operation).
> > > 
> > > Probably an insignificant change in performance.
> > > 
> > > It could be made much smaller by just never doing the modulus operation
> > > - instead do
> > > 
> > >   if (++index == max)
> > >   index = 0;
> > > 
> > > this does introduce one problem: it's no longer possible to distinguish
> > > the "full" and "empty" states by comparing the head and tail indices. 
> > > But that is soluble.
> > 
> > Hi Andrew,
> > 
> > Yes, it is soluble. How about the following solution?
> > 
> > Add 2 more fields(in_off and out_off) in __kfifo structure, so that in
> > and out will keep increasing each time, while in_off and out_off will be
> > wrapped to head if goes to the end of fifo buffer.
> > 
> > So, we can use in and out for counting unused space, and distinguish the
> > "full" and "empty" state, and also, of course no need for locking.
> > 
> > Stefani, sorry for quite late reply. I checked all the code used kfifo_alloc
> > and kfifo_init. Firstly, there are a lot of users ;-)
> > 
> > And secondly, I did find some examples used kfifo as it supports
> > none-power-of-2 kfifo. Say, the one at drivers/hid/hid-logitech-dj.c:
> >if (kfifo_alloc(&djrcv_dev->notif_fifo,
> >DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct 
> > dj_report),
> >GFP_KERNEL)) {
> > 
> > which means it wants to allocate a kfifo buffer which can store
> > DJ_MAX_NUMBER_NOTIFICATIONS(8 here) dj_report(each 15 bytes) at once.
> > 
> > And DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report) = 8 * 15.
> > Then current code would allocate a size of rounddown_power_of_2(120) =
> > 64 bytes, which can hold 4 dj_report only once, which is a half of expected.
> > 
> 
> This will go away with a log API.
> 
> > There are few more examples like this.
> > 
> > And, kfifo_init used a pre-allocated buffer, it would be a little strange
> > to ask user to pre-allocate a power of 2 size aligned buffer.
> > 
> > So, I guess it's would be good to support none-power-of-2 kfifo?
> > 
> > I know you care the performance a lot. Well, as Andrew said, it may
> > introduce a little insignificant drop(no modulus, few more add/dec).
> > Thus, do you have some benchmarks for that? I can have a test to check
> > if it is a insignificant change on performance or not :)
> > 
> 
> Dirty, Ugly, Hacky and this will produce a lot of overhead, especially
> for kfifo_put and kfifo_get which are inlined code.

Yes, it is. I will try log API then.

Stefani, I found an issue while rework to current API. Say the current
code of __kfifo_init:
int __kfifo_init(struct __kfifo *fifo, void *buffer,
unsigned int size, size_t esize)
{
size /= esize;

if (!is_power_of_2(size))
size = rounddown_pow_of_two(size);

}

Even thought I changed the API to something like:
int __kfifo_init(struct __kfifo *fifo, void *buffer,
int size_order, size_t esize)
{
unsigned int size = 1 << size_order;

size /= esize;
...
}

See? There is still a divide and we can't make it sure that it will be
power of 2 after that.

So, I came up 2 proposal to fix this.

1. refactor the meaning of 'size' argument first.

   'size' means the size of pre-allocated buffer. We can refactor it to
   meaning of 'the number of fifo elements' just like __kfifo_alloc, so
   that we don't need do the size /= esize stuff.

2. remove kfifo_init

   As we can't make sure that kfifo will do exactly what users asked(in
   the way of fifo size). It would be safe and good to maintain buffer
   and buffer size inside kfifo. So, I propose to remove it and use
   kfifo_alloc instead.

   git grep 'kfifo_init\>' shows

linux-next: manual merge of the arm-soc tree with the pinctrl tree

2012-11-08 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the arm-soc tree got a conflict in
drivers/pinctrl/pinctrl-nomadik.c between commit 241e51ebd3b2
("pinctrl/nomadik: make independent of prcmu driver") from the pinctrl
tree and commit 44e47ccf8ab6 ("Merge branch 'next/multiplatform' into
for-next") from the arm-soc tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc drivers/pinctrl/pinctrl-nomadik.c
index 33c614e,3ad23fb..000
--- a/drivers/pinctrl/pinctrl-nomadik.c
+++ b/drivers/pinctrl/pinctrl-nomadik.c
@@@ -30,10 -30,23 +30,9 @@@
  #include 
  /* Since we request GPIOs from ourself */
  #include 
 -/*
 - * For the U8500 archs, use the PRCMU register interface, for the older
 - * Nomadik, provide some stubs. The functions using these will only be
 - * called on the U8500 series.
 - */
 -#ifdef CONFIG_ARCH_U8500
 -#include 
 -#else
 -static inline u32 prcmu_read(unsigned int reg) {
 -  return 0;
 -}
 -static inline void prcmu_write(unsigned int reg, u32 value) {}
 -static inline void prcmu_write_masked(unsigned int reg, u32 mask, u32 value) 
{}
 -#endif
  #include 
- 
  #include 
- 
+ #include 
  #include "pinctrl-nomadik.h"
  
  /*


pgpAPc8PBQxhI.pgp
Description: PGP signature

Re: [RFC] Device Tree Overlays Proposal (Was Re: capebus moving omap_devices to mach-omap2)

2012-11-08 Thread David Gibson

On Mon, Nov 05, 2012 at 08:40:30PM +, Grant Likely wrote:
> Hey folks,
> 
> As promised, here is my early draft to try and capture what device
> tree overlays need to do and how to get there. Comments and
> suggestions greatly appreciated.
> 
> Device Tree Overlay Feature

Hrm.  So, you may yet convince me otherwise, but I'm really getting a
feeling of overengineering from this proposal so far.

> Purpose
> ===
> Sometimes it is not convenient to describe an entire system with a
> single FDT. For example, processor modules that are plugged into one or
> more modules (a la the BeagleBone), or systems with an FPGA peripheral
> that is programmed after the system is booted.
> 
> For these cases it is proposed to implement an overlay feature for the
> so that the initial device tree data can be modified by userspace at
> runtime by loading additional overlay FDTs that amend the original data.
> 
> User Stories
> 

[snip]

The user stories mostly sound reasonable to me, but I don't know
enough about the hardware in question to know what they'll mean in
terms of what needs to be added to the base device tree.

> Summary points:
> - Create an FDT overlay data format and usage model
>   - SHALL reliable resolve or validate of phandles between base and
> overlay trees

So, I'm not at all clear on what this proposed phandle validation
would involve.  I'm also not convinced it's as necessary as you
think, more on that below.

[snip - lots of technical stuff]

So, let me take a stab at this from a more bottom-up approach, and see
if we meet in the middle somewhere.  As I discussed in the other
thread with Daniel Mack, I can see two different operationso on the
fdt that might be useful in this context.  I think of them as "graft"
- which takes one fdt and adds it as a new subtree to an existing fdt
- and "overlay" where a new fdt adds or overrides arbitrary properties
in an existing tree.  Overlay is more or less what we do at the source
level in dtc already.

Overlay is obviously more general - you can add, change and possibly
delete any existing node or property.

Graft can only add new nodes and properties, not modify existing ones.
But that restriction comes with some advantages: reversing the
operation is just a matter of deleting the subtree with no extra
tracking info required.  It's simple to see how to have rules or
permissions about where subtrees can be grafted, and if the graft
point is identified by a label or id, rather than full path, it
automatically adapts to at least some changes in the base tree
structure.

I think graft is basically a safer operation, particular if we're
doing this at runtime with userspace passing in these fdt fragments.
In fact I'd go so far as to say if you really need the full overlay
functionality, then you really ought to be working at the bootloader
or early kernel load level to assemble the correct full device tree.
And as Mitch says, an existing programming language (C, OFW Forth or
whatever as you please) will serve you better for this sort of general
manipulation than a limited template system.

I also think graft will handle most of your use cases, although as I
said I don't fully understand the implications of some of them, so I
could be wrong.  So, the actual insertion of the subtree is pretty
trivial to implement.  phandles are the obvious other matter to be
dealt with.  I haven't found the right thread where what you've
envisaged so far is discussed, so here are things I can see:

1) Avoiding phandle collisions between main tree and subtree (or
   between subtrees).

I'm hopeful that this can be resolved just by establishing some
conventions about the ranges of phandles to be used for various
components.  I'd certainly be happy to add a directive to dtc which
enforces allocation of phandles within a specified range.

2) Resolving phandle references within a subtree

If we can handle (1) by convention, we don't need anything here, the
references are fine as is.

(3) Resolving phandle references from the subtree to the main tree.

So, I think this can actually be avoided, at least in cases where what
physical connections are available to the expansion module is well
defined.  The main causes to have external references are interrupts
and gpios.  Interrupts we handle by defining an interrupt specifier
space for the interrupts available on the expansion
socket/connector/bus/whatever.  In the master tree we then have
something like:

...
expansion-socket@ {
expansion-id = "SlotA";
interrupt-map = < /* map expansion irq specs to
 board interrupt controllers */ >;
interrupt-map-mask = < ... >;
ranges = < /* map expansion local addresses to global
  mmio */ >;
};

The subtree for the expansion module gets attached as a subnode of
this one.  It doesn't use explicit interrupt-parents but instead just
uses th

[PATCH v3 9/9] net: batman-adv: use per_cpu_add helper

2012-11-08 Thread Shan Wei

From: Shan Wei 

this_cpu_add is an atomic operation.
and be more faster than per_cpu_ptr operation.

Signed-off-by: Shan Wei 
Reviewed-by: Christoph Lameter 
---
v3: change commit message.
---
 net/batman-adv/main.h |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 897ba6a..3aef5b2 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -263,9 +263,7 @@ static inline bool batadv_has_timed_out(unsigned long 
timestamp,
 static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
  size_t count)
 {
-   int cpu = get_cpu();
-   per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count;
-   put_cpu();
+   this_cpu_add(bat_priv->bat_counters[idx], count);
 }
 
 #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 8/9] clocksource: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 


Signed-off-by: Shan Wei 
Reviewed-by: Christoph Lameter 
---
no changes vs v2.
---
 drivers/clocksource/arm_generic.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/clocksource/arm_generic.c 
b/drivers/clocksource/arm_generic.c
index c4d9f95..cb445ab 100644
--- a/drivers/clocksource/arm_generic.c
+++ b/drivers/clocksource/arm_generic.c
@@ -224,7 +224,7 @@ int __init arm_generic_timer_init(void)
lpj_fine = arch_timer_rate / HZ;
 
/* Immediately configure the timer on the boot CPU */
-   arch_timer_setup(per_cpu_ptr(&arch_timer_evt, smp_processor_id()));
+   arch_timer_setup(this_cpu_ptr(&arch_timer_evt));
 
register_cpu_notifier(&arch_timer_cpu_nb);
 
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 7/9] trace: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 


Signed-off-by: Shan Wei 
---
v3:
  directly return member address of per-cpu variable.
---
 kernel/trace/blktrace.c |2 +-
 kernel/trace/trace.c|5 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd030..71259e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char 
*fmt, ...)
return;
 
local_irq_save(flags);
-   buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
+   buf = this_cpu_ptr(bt->msg_data);
va_start(args, fmt);
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 31e4f55..65cb003 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1495,7 +1495,6 @@ static struct trace_buffer_struct 
*trace_percpu_nmi_buffer;
 static char *get_trace_buf(void)
 {
struct trace_buffer_struct *percpu_buffer;
-   struct trace_buffer_struct *buffer;
 
/*
 * If we have allocated per cpu buffers, then we do not
@@ -1513,9 +1512,7 @@ static char *get_trace_buf(void)
if (!percpu_buffer)
return NULL;
 
-   buffer = per_cpu_ptr(percpu_buffer, smp_processor_id());
-
-   return buffer->buffer;
+   return (char *)this_cpu_ptr(&percpu_buffer->buffer);
 }
 
 static int alloc_percpu_trace_buffer(void)
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 6/9] rcu: use __this_cpu_read helper instead of per_cpu_ptr(p, raw_smp_processor_id())

2012-11-08 Thread Shan Wei

From: Shan Wei 

Signed-off-by: Shan Wei 
---
no changes vs v2.
---
 kernel/rcutree.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 74df86b..441b945 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1960,7 +1960,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
struct rcu_node *rnp_old = NULL;
 
/* Funnel through hierarchy to reduce memory contention. */
-   rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
+   rnp = __this_cpu_read(rsp->rda->mynode);
for (; rnp != NULL; rnp = rnp->parent) {
ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
  !raw_spin_trylock(&rnp->fqslock);
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 5/9] kernel: padata : use __this_cpu_read per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 

For bottom halves off, __this_cpu_read is better.

Signed-off-by: Shan Wei 
---
v3: use __this_cpu_read instead of this_cpu_read.
---
 kernel/padata.c |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 89fe3d1..072f4ee 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -171,7 +171,7 @@ static struct padata_priv *padata_get_next(struct 
parallel_data *pd)
 {
int cpu, num_cpus;
unsigned int next_nr, next_index;
-   struct padata_parallel_queue *queue, *next_queue;
+   struct padata_parallel_queue *next_queue;
struct padata_priv *padata;
struct padata_list *reorder;
 
@@ -204,8 +204,7 @@ static struct padata_priv *padata_get_next(struct 
parallel_data *pd)
goto out;
}
 
-   queue = per_cpu_ptr(pd->pqueue, smp_processor_id());
-   if (queue->cpu_index == next_queue->cpu_index) {
+   if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
goto out;
}
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 4/9] net: openvswitch: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 

just use more faster this_cpu_ptr instead of per_cpu_ptr(p, smp_processor_id());


Signed-off-by: Shan Wei 
---
no changes vs v2.
---
 net/openvswitch/datapath.c |4 ++--
 net/openvswitch/vport.c|5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4c4b62c..77d16a5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct 
sk_buff *skb)
int error;
int key_len;
 
-   stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+   stats = this_cpu_ptr(dp->stats_percpu);
 
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
@@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
return 0;
 
 err:
-   stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+   stats = this_cpu_ptr(dp->stats_percpu);
 
u64_stats_update_begin(&stats->sync);
stats->n_lost++;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 03779e8..70af0be 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff 
*skb)
 {
struct vport_percpu_stats *stats;
 
-   stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
-
+   stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->sync);
stats->rx_packets++;
stats->rx_bytes += skb->len;
@@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
if (likely(sent)) {
struct vport_percpu_stats *stats;
 
-   stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+   stats = this_cpu_ptr(vport->percpu_stats);
 
u64_stats_update_begin(&stats->sync);
stats->tx_packets++;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/9] net: xfrm: use __this_cpu_read per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 


Signed-off-by: Shan Wei 
---
v3 fix compile warning:
net/xfrm/xfrm_ipcomp.c: In function 'ipcomp_alloc_tfms':
net/xfrm/xfrm_ipcomp.c:285: warning: assignment from incompatible pointer type

---
 net/xfrm/xfrm_ipcomp.c |7 ++-
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index e5246fb..898c899 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -276,18 +276,15 @@ static struct crypto_comp * __percpu 
*ipcomp_alloc_tfms(const char *alg_name)
struct crypto_comp * __percpu *tfms;
int cpu;
 
-   /* This can be any valid CPU ID so we don't need locking. */
-   cpu = raw_smp_processor_id();
 
list_for_each_entry(pos, &ipcomp_tfms_list, list) {
struct crypto_comp *tfm;
 
-   tfms = pos->tfms;
-   tfm = *per_cpu_ptr(tfms, cpu);
+   tfm = (struct crypto_comp *) __this_cpu_read(pos->tfms);
 
if (!strcmp(crypto_comp_name(tfm), alg_name)) {
pos->users++;
-   return tfms;
+   return pos->tfms;
}
}
 
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/9] net: rds: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 

Signed-off-by: Shan Wei 
Reviewed-by: Christoph Lameter 
---
no changes vs v2.
---
 net/rds/ib_recv.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8d19491..a4a5064 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -423,7 +423,7 @@ static void rds_ib_recv_cache_put(struct list_head 
*new_item,
 
local_irq_save(flags);
 
-   chp = per_cpu_ptr(cache->percpu, smp_processor_id());
+   chp = this_cpu_ptr(cache->percpu);
if (!chp->first)
INIT_LIST_HEAD(new_item);
else /* put on front */
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/9] net: core: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

From: Shan Wei 

flush_tasklet is a struct, not a pointer in percpu var.
so use this_cpu_ptr to get the member pointer.

Signed-off-by: Shan Wei 
---
 net/core/flow.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/core/flow.c b/net/core/flow.c
index e318c7e..b0901ee 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data)
 static void flow_cache_flush_per_cpu(void *data)
 {
struct flow_flush_info *info = data;
-   int cpu;
struct tasklet_struct *tasklet;
 
-   cpu = smp_processor_id();
-   tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
+   tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
 }
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/9 v3] use efficient this_cpu_* helper

2012-11-08 Thread Shan Wei

this_cpu_ptr/this_cpu_read is faster than per_cpu_ptr(p, smp_processor_id()) 
and can reduce  memory accesses.
The latter helper needs to find the offset for current cpu,
and needs more assembler instructions which objdump shows in following. 

this_cpu_ptr relocates and address. this_cpu_read() relocates the address
and performs the fetch. If you want to operate on rda(defined as per_cpu) 
then you can only use this_cpu_ptr. this_cpu_read() saves you more instructions
since it can do the relocation and the fetch in one instruction.

per_cpu_ptr(p, smp_processor_id())：
  1e:   65 8b 04 25 00 00 00 00 mov%gs:0x0,%eax
  26:   48 98   cltq
  28:   31 f6   xor%esi,%esi
  2a:   48 c7 c7 00 00 00 00mov$0x0,%rdi
  31:   48 8b 04 c5 00 00 00 00 mov0x0(,%rax,8),%rax
  39:   c7 44 10 04 14 00 00 00 movl   $0x14,0x4(%rax,%rdx,1)

this_cpu_ptr(p)
  1e:   65 48 03 14 25 00 00 00 00  add%gs:0x0,%rdx
  27:   31 f6   xor%esi,%esi
  29:   c7 42 04 14 00 00 00movl   $0x14,0x4(%rdx)
  30:   48 c7 c7 00 00 00 00mov$0x0,%rdi


Changelog V3:
1. use this_cpu_read directly read member of per-cpu variable,
   so that droping the this_cpu_ptr operation.
2. for preemption off and bottom halves off case,
   use __this_cpu_read instead of this_cpu_read. 

Changelog V2:
1. Use this_cpu_read directly instead of ref to field of per-cpu variable.
2. Patch5 about ftrace is dropped from this series.
3. Add new patch9 to replace get_cpu;per_cpu_ptr;put_cpu with this_cpu_add opt.
4. For preemption disable case, use __this_cpu_read instead.
  

$ git diff --stat 7da716aee2532399e213a14f656d304098f67a11..
 drivers/clocksource/arm_generic.c |2 +-
 kernel/padata.c   |5 ++---
 kernel/rcutree.c  |2 +-
 kernel/trace/blktrace.c   |2 +-
 kernel/trace/trace.c  |5 +
 net/batman-adv/main.h |4 +---
 net/core/flow.c   |4 +---
 net/openvswitch/datapath.c|4 ++--
 net/openvswitch/vport.c   |5 ++---
 net/rds/ib_recv.c |2 +-
 net/xfrm/xfrm_ipcomp.c|7 ++-
 11 files changed, 15 insertions(+), 27 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

How to debug system freeze (not detected by kernel debug options)

2012-11-08 Thread Jongman Heo


Dear kernel hackers,

I have a problem in SMP environment, in x86 platform (Intel Atom based embedded 
system)

In UP, there is no issue, but in SMP, system freezed in tens of minutes (or 
shorter), if I perform IO test with flash memory and HDD simultaneously (using 
dd).

I enabled relevant kernel debug options, like LOCKDEP, DETECT_SOFTLOCKUP, 
DETECT_HUNG_TASK, along with "nmi_watchdog=1".
(Yeah, this is somewhat old kernel, 2.6.35.14).

But no debug message is shown. (I had checked that NMI interrupt count 
correctly increase.).

Do you have any thoughts what can cause system freeze without being detected by 
LOCKDEP, watchdog, and other options.

Thanks in advance,
Jongman.

[PATCH 3/3] ipgre: capture inner headers during encapsulation

2012-11-08 Thread Joseph Gasparakis

Populating the inner header pointers of skb for ipgre
This patch has been compile-tested only.

Signed-off-by: Joseph Gasparakis 
Signed-off-by: Peter P Waskiewicz Jr 
---
 net/ipv4/ip_gre.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7240f8e..ec3ebb1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -766,6 +766,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, 
struct net_device *dev
intgre_hlen;
__be32 dst;
intmtu;
+   unsigned int offset;
 
if (skb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(skb))
@@ -902,6 +903,17 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, 
struct net_device *dev
tunnel->err_count = 0;
}
 
+   offset = skb->data - skb->head;
+
+   skb_reset_inner_mac_header(skb);
+
+   if (skb->network_header)
+   skb_set_inner_network_header(skb, skb->network_header - offset);
+
+   if (skb->transport_header)
+   skb_set_inner_transport_header(skb, skb->transport_header -
+  offset);
+
max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] net: Add support for hardware-offloaded encapsulation

2012-11-08 Thread Joseph Gasparakis

This patch adds support in the kernel for offloading in the NIC Tx and Rx 
checksumming for encapsulated packets (such as VXLAN and IP GRE)

Signed-off-by: Joseph Gasparakis 
Signed-off-by: Peter P Waskiewicz Jr 
---
 Documentation/networking/netdev-features.txt |  10 +++
 include/linux/if_ether.h |   5 ++
 include/linux/ip.h   |   5 ++
 include/linux/netdev_features.h  |   3 +
 include/linux/skbuff.h   | 114 +++
 include/linux/udp.h  |   5 ++
 net/core/ethtool.c   |   2 +
 net/core/skbuff.c|  17 
 8 files changed, 161 insertions(+)

diff --git a/Documentation/networking/netdev-features.txt 
b/Documentation/networking/netdev-features.txt
index 4164f5c..82695c0 100644
--- a/Documentation/networking/netdev-features.txt
+++ b/Documentation/networking/netdev-features.txt
@@ -165,3 +165,13 @@ This requests that the NIC receive all possible frames, 
including errored
 frames (such as bad FCS, etc).  This can be helpful when sniffing a link with
 bad packets on it.  Some NICs may receive more packets if also put into normal
 PROMISC mdoe.
+
+*  tx-enc-checksum-offload
+
+This feature implies that the NIC will be able to calculate the Tx checksums
+for both inner and outer packets in the case of vxlan and ipgre encapsulation.
+
+*  rx-enc-checksum-offload
+
+This feature implies that the NIC will be able to verify the Rx checksums
+for both inner and outer packets in the case of vxlan and ipgre encapsulation.
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 12b4d55..195376b 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -28,6 +28,11 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff 
*skb)
return (struct ethhdr *)skb_mac_header(skb);
 }
 
+static inline struct ethhdr *eth_inner_hdr(const struct sk_buff *skb)
+{
+   return (struct ethhdr *)skb_inner_mac_header(skb);
+}
+
 int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 
 int mac_pton(const char *s, u8 *mac);
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 58b82a2..e084de7 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -25,6 +25,11 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
return (struct iphdr *)skb_network_header(skb);
 }
 
+static inline struct iphdr *ip_inner_hdr(const struct sk_buff *skb)
+{
+   return (struct iphdr *)skb_inner_network_header(skb);
+}
+
 static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
return (struct iphdr *)skb_transport_header(skb);
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 5ac3212..6dd59a5 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -19,6 +19,7 @@ enum {
NETIF_F_IP_CSUM_BIT,/* Can checksum TCP/UDP over IPv4. */
__UNUSED_NETIF_F_1,
NETIF_F_HW_CSUM_BIT,/* Can checksum all the packets. */
+   NETIF_F_HW_CSUM_ENC_BIT,/* Can checksum all inner headers */
NETIF_F_IPV6_CSUM_BIT,  /* Can checksum TCP/UDP over IPV6 */
NETIF_F_HIGHDMA_BIT,/* Can DMA to high memory. */
NETIF_F_FRAGLIST_BIT,   /* Scatter/gather IO. */
@@ -52,6 +53,8 @@ enum {
NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */
NETIF_F_RXHASH_BIT, /* Receive hashing offload */
NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */
+   NETIF_F_RXCSUM_ENC_BIT, /* Receive checksuming offload */
+   /* for encapsulation */
NETIF_F_NOCACHE_COPY_BIT,   /* Use no-cache copyfromuser */
NETIF_F_LOOPBACK_BIT,   /* Enable loopback */
NETIF_F_RXFCS_BIT,  /* Append FCS to skb pkt data */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2af494..4b9b50b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -379,6 +379,9 @@ typedef unsigned char *sk_buff_data_t;
  * @transport_header: Transport layer header
  * @network_header: Network layer header
  * @mac_header: Link layer header
+ * @inner_transport_header: Inner transport layer header (encapsulation)
+ * @inner_network_header: Network layer header (encapsulation)
+ * @inner_mac_header: Link layer header (encapsulation)
  * @tail: Tail pointer
  * @end: End pointer
  * @head: Head of buffer
@@ -489,6 +492,9 @@ struct sk_buff {
sk_buff_data_t  transport_header;
sk_buff_data_t  network_header;
sk_buff_data_t  mac_header;
+   sk_buff_data_t  inner_transport_header;
+   sk_buff_data_t  inner_network_header;
+   sk_buff_data_t  inner_mac_header;
/* These elements must be at the end, see alloc_skb() f

Re: [PATCH 2/8] aoe: provide ATA identify device content to user on request

2012-11-08 Thread Ed Cashin

On Nov 8, 2012, at 7:40 PM, Jeff Garzik wrote:

> On 11/08/2012 11:32 AM, Ed Cashin wrote:
>> This patch makes the aoe driver follow expected behavior when
>> the user uses ioctl to get the ATA device identify information.
>> 
>> Signed-off-by: Ed Cashin 
>> ---
>>  drivers/block/aoe/aoe.h|1 +
>>  drivers/block/aoe/aoeblk.c |   30 ++
>>  drivers/block/aoe/aoecmd.c |   16 
>>  3 files changed, 47 insertions(+), 0 deletions(-)
>> 
>> diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
>> index 536942b..f6e0c03 100644
>> --- a/drivers/block/aoe/aoe.h
>> +++ b/drivers/block/aoe/aoe.h
>> @@ -169,6 +169,7 @@ struct aoedev {
>>  struct aoetgt *htgt;/* target needing rexmit assistance */
>>  ulong ntargets;
>>  ulong kicked;
>> +char ident[512];
>>  };
>> 
>>  /* kthread tracking */
>> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
>> index 56736cd..7ba0fcf 100644
>> --- a/drivers/block/aoe/aoeblk.c
>> +++ b/drivers/block/aoe/aoeblk.c
>> @@ -17,6 +17,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include "aoe.h"
>> 
>>  static DEFINE_MUTEX(aoeblk_mutex);
>> @@ -212,9 +213,38 @@ aoeblk_getgeo(struct block_device *bdev, struct 
>> hd_geometry *geo)
>>  return 0;
>>  }
>> 
>> +static int
>> +aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
>> +{
>> +struct aoedev *d;
>> +
>> +if (!arg)
>> +return -EINVAL;
>> +
>> +d = bdev->bd_disk->private_data;
>> +if ((d->flags & DEVFL_UP) == 0) {
>> +pr_err("aoe: disk not up\n");
>> +return -ENODEV;
>> +}
>> +
>> +if (cmd == HDIO_GET_IDENTITY) {
>> +if (!copy_to_user((void __user *) arg, &d->ident,
>> +sizeof(d->ident)))
>> +return 0;
>> +return -EFAULT;
>> +}
>> +
>> +/* udev calls scsi_id, which uses SG_IO, resulting in noise */
>> +if (cmd != SG_IO)
>> +pr_info("aoe: unknown ioctl 0x%x\n", cmd);
>> +
>> +return -ENOTTY;
>> +}
>> +
>>  static const struct block_device_operations aoe_bdops = {
>>  .open = aoeblk_open,
>>  .release = aoeblk_release,
>> +.ioctl = aoeblk_ioctl,
>>  .getgeo = aoeblk_getgeo,
>>  .owner = THIS_MODULE,
>>  };
>> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
>> index 3ce01f6..c4ff70b 100644
>> --- a/drivers/block/aoe/aoecmd.c
>> +++ b/drivers/block/aoe/aoecmd.c
>> @@ -799,6 +799,17 @@ aoecmd_sleepwork(struct work_struct *work)
>>  }
>> 
>>  static void
>> +ata_ident_fixstring(u16 *id, int ns)
>> +{
>> +u16 s;
>> +
>> +while (ns-- > 0) {
>> +s = *id;
>> +*id++ = s >> 8 | s << 8;
>> +}
>> +}
>> +
>> +static void
>>  ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
>>  {
>>  u64 ssize;
>> @@ -833,6 +844,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, 
>> unsigned char *id)
>>  d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
>>  }
>> 
>> +ata_ident_fixstring((u16 *) &id[10<<1], 10);/* serial */
>> +ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */
>> +ata_ident_fixstring((u16 *) &id[27<<1], 20);/* model */
> 
> This duplicates ata_id_string() and/or ata_id_c_string(), does it not?


They're similar, yes, but aoecmd.c:ata_ident_fixstring performs the byte 
swapping 
in place, avoiding the need for any on-stack or other memory allocation.  The 
code
is pretty readable now as a simple in-place byte swap, and I'm concerned that 
the
extra mechanics of buffer allocation, any allocation failure handling, and 
memmov-ing 
the results back into the id array wouldn't simplify the code if aoecmd.c tried 
to use 
ata_id_string as it is.

-- 
  Ed Cashin
  ecas...@coraid.com


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] vxlan: capture inner headers during encapsulation

2012-11-08 Thread Joseph Gasparakis

Populating the inner header pointers of skb for vxlan

Signed-off-by: Joseph Gasparakis 
Signed-off-by: Peter P Waskiewicz Jr 
---
 drivers/net/vxlan.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 030559d..14e6c8f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -694,11 +694,23 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct 
net_device *dev)
__be16 df = 0;
__u8 tos, ttl;
int err;
+   unsigned int offset;
 
dst = vxlan_find_dst(vxlan, skb);
if (!dst)
goto drop;
 
+   offset = skb->data - skb->head;
+
+   skb_reset_inner_mac_header(skb);
+
+   if (skb->network_header)
+   skb_set_inner_network_header(skb, skb->network_header - offset);
+
+   if (skb->transport_header)
+   skb_set_inner_transport_header(skb, skb->transport_header -
+ offset);
+
/* Need space for new headers (invalidates iph ptr) */
if (skb_cow_head(skb, VXLAN_HEADROOM))
goto drop;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net-next 0/3 ] tunneling: Add support for hardware-offloaded encapsulation

2012-11-08 Thread Joseph Gasparakis

The series contains updates to add in the NIC Rx and Tx checksumming support
for encapsulated packets.

The sk_buff needs to somehow have information of the inner packet, and adding
three fields for the inner mac, network and transport headers was the prefered
approach. 

Not adding these fields would mean that the drivers would need to parse the
sk_buff data in hot-path, having a negative impact in the performance.

Adding in sk_buff a pointer to the skbuff of the inner packet made sense, but
would be a complicated change as assumptions needed to be made with regards to
helper functions such as skb_clone() skb_copy(). Also code for the existing
encapsulation protocols (such as VXLAN and IP GRE) had to be reworked, so the
decision was to have the simple approach of adding these three fields.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: scsi target, likely GPL violation

2012-11-08 Thread Nicholas A. Bellinger

On Thu, 2012-11-08 at 13:22 -0800, Andy Grover wrote:
> On 11/08/2012 12:05 PM, Nicholas A. Bellinger wrote:
> > Accusing us of violating GPL is a serious legal claim.  
> > 
> > In fact, we are not violating GPL.  In short, this is because we wrote
> > the code you are referring to (the SCSI target core in our commercial
> > RTS OS product), we have exclusive copyright ownership of it, and this
> > code contains no GPL code from the community.  GPL obligations only
> > apply downstream to licensees, and not to the author of the code.  Those
> > who use the code under GPL are subject to its conditions; we are not.
> 
> Hi Nick, thanks for finally responding.
> 
> I believe your argument is wrong for two reasons.
> 
> First, LIO is a derivative work of the Linux kernel. It uses kernel APIs
> and headers. You ship Linux as part of RTS OS. Even if you had not asked
> for LIO to be included in mainline, this would still be true and would
> require you to publish your changes under the GPLv2.
> 
> Second, you claim you hold exclusive copyright for the code. Not true.
> One example: on http://www.risingtidesystems.com/storage.html you claim
> support for FCoE. You didn't build tcm_fc, Intel did. Under the GPLv2.
> Furthermore, SRP support came from SCST, iirc. None of these
> contributors gave RTS any right to use their copyrighted code except
> under the conditions of the GPLv2.
> 

Andy,

Support for certified VAAI is part of our commercial target core. The
target core constitutes a stand-alone kernel subsystem of which we are
the sole copyright owners. In addition, our target contains a number of
backend drivers, of which we are also the sole copyright owners, and a
number of fabric modules, of which some we are the sole copyright
owners, and of which others we are not, as you pointed out. A
substantial fraction of the code of which we own the sole copyright was
certified by BlackDuck Software as early as in 2007. 

We contributed our target to the Linux kernel in 2010, at which point we
forked it into the upstream version and our commercial version. These
target versions have been diverging over time, as we keep maintaining
either one of them independently.

For our commercial target core, we only use Linux kernel symbols that
are not marked as GPL. In addition, we define the API between the target
core and its backend drivers and between the target core and its fabric
modules, we define the ABI between the target core and user space, and
we have done so years before our code went upstream into the Linux
kernel.

We have been contributing substantially to the upstream target version
to keep improving Linux. We have also been improving our commercial
target version to afford the considerable effort and expense involved in
our ongoing Linux contributions, and to compensate other top Linux
kernel developers for their contributions to the upstream target
version.

RTS OS is based on a stock Linux enterprise kernel. This Linux kernel
has naturally the ability to load either one of our standalone
self-contained target module versions without any modifications.

Again, we’re very disappointed by these untrue and highly damaging
accusations from Red Hat. We have generously contributed to Linux, and
we have generously supported the Linux community for their contributions
to our upstream target and other Linux kernel parts. You have mostly
just incorporated our work into Red Hat’s products.

So yes, Andy, please start behaving properly, so that at least we can
get back to making Linux better.

--nab

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Is this a kernel bug?

2012-11-08 Thread Andrew Morton

On Fri, 9 Nov 2012 08:53:49 +0800 Cyberman Wu  wrote:

> A lot of these message on many CPU:
> 
> ...
>
> Starting stack dump of tid 906, pid 906 (kworker/16:1) on cpu 16 at
> cycle 416925426066163
>   frame 0: 0xfff700375f58 kthread_data+0x18/0x20 (sp 0xfe00f9fbf430)
>   frame 1: 0xfff700357fe8 wq_worker_sleeping+0x28/0xf8 (sp
> 0xfe00f9fbf430)
>   frame 2: 0xfff700021ab8 schedule+0xd00/0x1538 (sp 0xfe00f9fbf448)
>   frame 3: 0xfff70041f950 do_exit+0x510/0x658 (sp 0xfe00f9fbf790)
>   frame 4: 0xfff7000ade50 do_group_exit+0xc0/0x220 (sp 0xfe00f9fbf840)
>   frame 5: 0xfff7001137a0 jit_bundle_gen+0xf20/0x27d8 (sp
> 0xfe00f9fbf878)

I don't recognize jit_bundle_gen.  Has this kernel been modified?

>   frame 6: 0xfff70034e830 do_unaligned+0xe0/0x5b0 (sp 0xfe00f9fbfac8)
>   frame 7: 0xfff700139af8 handle_interrupt+0x270/0x278 (sp
> 0xfe00f9fbfc00)
>   
>   frame 8: 0xfff7002fc488 worker_enter_idle+0x1c8/0x2e8 (sp
> 0xfe00f9fbfe78)
>   frame 9: 0xfff7002750c8 worker_thread+0x4c8/0x898 (sp 
> 0xfe00f9fbfea0)
>   frame 10: 0xfff7000f0530 kthread+0xe0/0xe8 (sp 0xfe00f9fbff80)
>   frame 11: 0xfff7000bab38 start_kernel_thread+0x18/0x20 (sp

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 06/11] ARM: pxa: convert timer suspend/resume to clock_event_device

2012-11-08 Thread Eric Miao

On Fri, Nov 9, 2012 at 5:01 AM, Stephen Warren  wrote:
> From: Stephen Warren 
>
> Move PXA's timer suspend/resume functions from struct sys_timer
> pxa_timer into struct clock_event_device ckevt_pxa_osmr0. This
> will allow the sys_timer suspend/resume fields to be removed, and
> eventually lead to a complete removal of struct sys_timer.
>
> Cc: Eric Miao 
> Cc: Russell King 
> Cc: Haojian Zhuang 
> Signed-off-by: Stephen Warren 

Acked-by: Eric Miao 

> ---
>  arch/arm/mach-pxa/time.c |   76 
> +++---
>  1 files changed, 38 insertions(+), 38 deletions(-)
>
> diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
> index 4bc47d6..ce58bc9 100644
> --- a/arch/arm/mach-pxa/time.c
> +++ b/arch/arm/mach-pxa/time.c
> @@ -89,12 +89,50 @@ pxa_osmr0_set_mode(enum clock_event_mode mode, struct 
> clock_event_device *dev)
> }
>  }
>
> +#ifdef CONFIG_PM
> +static unsigned long osmr[4], oier, oscr;
> +
> +static void pxa_timer_suspend(struct clock_event_device *cedev)
> +{
> +   osmr[0] = readl_relaxed(OSMR0);
> +   osmr[1] = readl_relaxed(OSMR1);
> +   osmr[2] = readl_relaxed(OSMR2);
> +   osmr[3] = readl_relaxed(OSMR3);
> +   oier = readl_relaxed(OIER);
> +   oscr = readl_relaxed(OSCR);
> +}
> +
> +static void pxa_timer_resume(struct clock_event_device *cedev)
> +{
> +   /*
> +* Ensure that we have at least MIN_OSCR_DELTA between match
> +* register 0 and the OSCR, to guarantee that we will receive
> +* the one-shot timer interrupt.  We adjust OSMR0 in preference
> +* to OSCR to guarantee that OSCR is monotonically incrementing.
> +*/
> +   if (osmr[0] - oscr < MIN_OSCR_DELTA)
> +   osmr[0] += MIN_OSCR_DELTA;
> +
> +   writel_relaxed(osmr[0], OSMR0);
> +   writel_relaxed(osmr[1], OSMR1);
> +   writel_relaxed(osmr[2], OSMR2);
> +   writel_relaxed(osmr[3], OSMR3);
> +   writel_relaxed(oier, OIER);
> +   writel_relaxed(oscr, OSCR);
> +}
> +#else
> +#define pxa_timer_suspend NULL
> +#define pxa_timer_resume NULL
> +#endif
> +
>  static struct clock_event_device ckevt_pxa_osmr0 = {
> .name   = "osmr0",
> .features   = CLOCK_EVT_FEAT_ONESHOT,
> .rating = 200,
> .set_next_event = pxa_osmr0_set_next_event,
> .set_mode   = pxa_osmr0_set_mode,
> +   .suspend= pxa_timer_suspend,
> +   .resume = pxa_timer_resume,
>  };
>
>  static struct irqaction pxa_ost0_irq = {
> @@ -127,44 +165,6 @@ static void __init pxa_timer_init(void)
> clockevents_register_device(&ckevt_pxa_osmr0);
>  }
>
> -#ifdef CONFIG_PM
> -static unsigned long osmr[4], oier, oscr;
> -
> -static void pxa_timer_suspend(void)
> -{
> -   osmr[0] = readl_relaxed(OSMR0);
> -   osmr[1] = readl_relaxed(OSMR1);
> -   osmr[2] = readl_relaxed(OSMR2);
> -   osmr[3] = readl_relaxed(OSMR3);
> -   oier = readl_relaxed(OIER);
> -   oscr = readl_relaxed(OSCR);
> -}
> -
> -static void pxa_timer_resume(void)
> -{
> -   /*
> -* Ensure that we have at least MIN_OSCR_DELTA between match
> -* register 0 and the OSCR, to guarantee that we will receive
> -* the one-shot timer interrupt.  We adjust OSMR0 in preference
> -* to OSCR to guarantee that OSCR is monotonically incrementing.
> -*/
> -   if (osmr[0] - oscr < MIN_OSCR_DELTA)
> -   osmr[0] += MIN_OSCR_DELTA;
> -
> -   writel_relaxed(osmr[0], OSMR0);
> -   writel_relaxed(osmr[1], OSMR1);
> -   writel_relaxed(osmr[2], OSMR2);
> -   writel_relaxed(osmr[3], OSMR3);
> -   writel_relaxed(oier, OIER);
> -   writel_relaxed(oscr, OSCR);
> -}
> -#else
> -#define pxa_timer_suspend NULL
> -#define pxa_timer_resume NULL
> -#endif
> -
>  struct sys_timer pxa_timer = {
> .init   = pxa_timer_init,
> -   .suspend= pxa_timer_suspend,
> -   .resume = pxa_timer_resume,
>  };
> --
> 1.7.0.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2] sched: add a tuning knob to allow changing RR tmeslice

2012-11-08 Thread Clark Williams


This version stores the user-input value in a separate location from
the jiffies values used by the scheduler, to prevent a race condition.

Subject: [PATCH v2] sched: add a tuning knob to allow changing RR
timeslice

User wanted a facility simliar to the ability on Solaris to adjust
the SCHED_RR timeslice value. Add a /proc/sys/kernel scheduler knob
named sched_rr_timeslice_ms which allows global changing of the SCHED_RR
timeslice value. User visable value is in milliseconds but is stored as
jiffies.  Setting to 0 (zero) resets to the default (currently 100ms).

Patch against tip/master, currently 3.7-rc3.

Signed-off-by: Clark Williams 
---
 include/linux/sched.h |  7 ++-
 kernel/sched/core.c   | 30 ++
 kernel/sched/rt.c |  4 ++--
 kernel/sysctl.c   |  7 +++
 4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 03be150..1e2f38a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2071,11 +2071,16 @@ static inline unsigned int
get_sysctl_timer_migration(void) #endif /* CONFIG_SCHED_DEBUG */
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
-
+extern int sysctl_sched_rr_timeslice_ms;
+extern int sched_rr_timeslice;
 int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
 
+int sched_rr_handler(struct ctl_table *table, int write,
+   void __user *buffer, size_t *lenp,
+   loff_t *ppos);
+
 #ifdef CONFIG_SCHED_AUTOGROUP
 extern unsigned int sysctl_sched_autogroup_enabled;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c2e077c..318f617 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -288,6 +288,17 @@ __read_mostly int scheduler_running;
 int sysctl_sched_rt_runtime = 95;
 
 
+/*
+ * SCHED_RR timeslice in jiffies
+ *
+ */
+int sched_rr_timeslice = RR_TIMESLICE;
+int sysctl_sched_rr_timeslice_ms;
+
+static inline void init_rr_timeslice()
+{
+   sysctl_sched_rr_timeslice_ms = msecs_to_jiffies(RR_TIMESLICE);
+}
 
 /*
  * __task_rq_lock - lock the rq @p resides on.
@@ -6863,6 +6874,8 @@ void __init sched_init(void)
init_rt_bandwidth(&def_rt_bandwidth,
global_rt_period(), global_rt_runtime());
 
+   init_rr_timeslice();
+
 #ifdef CONFIG_RT_GROUP_SCHED
init_rt_bandwidth(&root_task_group.rt_bandwidth,
global_rt_period(), global_rt_runtime());
@@ -7543,6 +7556,23 @@ int sched_rt_handler(struct ctl_table *table,
int write, return ret;
 }
 
+int sched_rr_handler(struct ctl_table *table, int write,
+   void __user *buffer, size_t *lenp,
+   loff_t *ppos)
+{
+   int ret;
+   static DEFINE_MUTEX(mutex);
+
+   mutex_lock(&mutex);
+   ret = proc_dointvec(table, write, buffer, lenp, ppos);
+   /* make sure we maintain jiffies internally */
+   if (!ret && write)
+   sched_rr_timeslice = (sysctl_sched_rr_timeslice_ms <=
0) ?
+   RR_TIMESLICE :
msecs_to_jiffies(sysctl_sched_rr_timeslice_ms);
+   mutex_unlock(&mutex);
+   return ret;
+}
+
 #ifdef CONFIG_CGROUP_SCHED
 
 /* return corresponding task_group object of a cgroup */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 418feb0..71aa6d0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2010,7 +2010,7 @@ static void task_tick_rt(struct rq *rq, struct
task_struct *p, int queued) if (--p->rt.time_slice)
return;
 
-   p->rt.time_slice = RR_TIMESLICE;
+   p->rt.time_slice = sched_rr_timeslice;
 
/*
 * Requeue to the end of queue if we (and all of our
ancestors) are the @@ -2041,7 +2041,7 @@ static unsigned int
get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 * Time slice is 0 for SCHED_FIFO tasks
 */
if (task->policy == SCHED_RR)
-   return RR_TIMESLICE;
+   return sched_rr_timeslice;
else
return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b769d25..9fa0885 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -403,6 +403,13 @@ static struct ctl_table kern_table[] = {
.mode   = 0644,
.proc_handler   = sched_rt_handler,
},
+   {
+   .procname   = "sched_rr_timeslice_ms",
+   .data   = &sysctl_sched_rr_timeslice_ms,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = sched_rr_handler,
+   },
 #ifdef CONFIG_SCHED_AUTOGROUP
{
.procname   = "sched_autogroup_enabled",
-- 
1.7.11.7



signature.asc
Description: PGP signature

Re: [PATCH] smack: SMACK_MAGIC to include/uapi/linux/magic.h

2012-11-08 Thread Casey Schaufler

On 11/8/2012 10:08 AM, Jarkko Sakkinen wrote:
> SMACK_MAGIC moved to a proper place for easy user space access
> (i.e. libsmack).
>
> Signed-off-by: Jarkko Sakkinen 

I will apply once James updates the next branch of his security tree.

> ---
>  include/uapi/linux/magic.h |1 +
>  security/smack/smack.h |6 +-
>  security/smack/smack_lsm.c |1 -
>  3 files changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index e15192c..12735ad 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -11,6 +11,7 @@
>  #define DEBUGFS_MAGIC  0x64626720
>  #define SECURITYFS_MAGIC 0x73636673
>  #define SELINUX_MAGIC0xf97cff8c
> +#define SMACK_MAGIC  0x43415d53  /* "SMAC" */
>  #define RAMFS_MAGIC  0x858458f6  /* some random number */
>  #define TMPFS_MAGIC  0x01021994
>  #define HUGETLBFS_MAGIC  0x958458f6  /* some random number */
> diff --git a/security/smack/smack.h b/security/smack/smack.h
> index 99b3612..e3e7a4f 100644
> --- a/security/smack/smack.h
> +++ b/security/smack/smack.h
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /*
>   * Smack labels were limited to 23 characters for a long time.
> @@ -149,11 +150,6 @@ struct smack_known {
>  #define SMACK_CIPSO_SOCKET   1
>  
>  /*
> - * smackfs magic number
> - */
> -#define SMACK_MAGIC  0x43415d53 /* "SMAC" */
> -
> -/*
>   * CIPSO defaults.
>   */
>  #define SMACK_CIPSO_DOI_DEFAULT  3   /* Historical */
> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index 38be92c..6e8fa99 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -32,7 +32,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 4/9] net: openvswitch: use this_cpu_ptr per-cpu helper

2012-11-08 Thread Shan Wei

Christoph Lameter said, at 2012/11/9 1:18:
> On Thu, 8 Nov 2012, Shan Wei wrote:
> 
>> Christoph Lameter said, at 2012/11/3 1:46:
u64_stats_update_begin(&stats->sync);
stats->tx_packets++;
>>>
>>> Use this_cpu_inc(vport->percpu_stats->packets) here?
>>
>> Lots of network drivers use u64_stats_sync infrastructure for statistics
> 
> So they would all have an advantage from the patch.

I will try to do the optimizing next time in another patchset which not included
in this series patchset.

I will submit v3 version of this series after testing today.

Thanks

 
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V2 3/5] checkpatch: Remove reference to feature-removal-schedule.txt.

2012-11-08 Thread Tao Ma

From: Tao Ma 

In 9c0ece069, Linus removes feature-removal-schedule.txt from Documentation,
but there is still some reference to this file. So remove them.

Cc: Andrew Morton 
Cc: Andy Whitcroft 
Signed-off-by: Tao Ma 
---
 scripts/checkpatch.pl |   37 -
 1 files changed, 0 insertions(+), 37 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 21a9f5d..b56c5e0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -352,27 +352,6 @@ sub deparenthesize {
 
 $chk_signoff = 0 if ($file);
 
-my @dep_includes = ();
-my @dep_functions = ();
-my $removal = "Documentation/feature-removal-schedule.txt";
-if ($tree && -f "$root/$removal") {
-   open(my $REMOVE, '<', "$root/$removal") ||
-   die "$P: $removal: open failed - $!\n";
-   while (<$REMOVE>) {
-   if (/^Check:\s+(.*\S)/) {
-   for my $entry (split(/[, ]+/, $1)) {
-   if ($entry =~ m@include/(.*)@) {
-   push(@dep_includes, $1);
-
-   } elsif ($entry !~ m@/@) {
-   push(@dep_functions, $entry);
-   }
-   }
-   }
-   }
-   close($REMOVE);
-}
-
 my @rawlines = ();
 my @lines = ();
 my $vname;
@@ -3181,22 +3160,6 @@ sub process {
}
}
 
-# don't include deprecated include files (uses RAW line)
-   for my $inc (@dep_includes) {
-   if ($rawline =~ m@^.\s*\#\s*include\s*\<$inc>@) {
-   ERROR("DEPRECATED_INCLUDE",
- "Don't use <$inc>: see 
Documentation/feature-removal-schedule.txt\n" . $herecurr);
-   }
-   }
-
-# don't use deprecated functions
-   for my $func (@dep_functions) {
-   if ($line =~ /\b$func\b/) {
-   ERROR("DEPRECATED_FUNCTION",
- "Don't use $func(): see 
Documentation/feature-removal-schedule.txt\n" . $herecurr);
-   }
-   }
-
 # no volatiles please
my $asm_volatile = 
qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) {
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 82571EB: Detected Hardware Unit Hang

2012-11-08 Thread Joe Jin

On 11/09/12 04:35, Dave, Tushar N wrote:
> Are you sure this is not similar issue as before that you reported.
> i.e. 

Tushar,

Thanks for your quick response, I'll check with customer if they can modify the 
Max
payload size from BIOS, this time issue hit on HP's server.

Thanks again,
Joe

> On Mon, 2012-07-09 at 16:51 +0800, Joe Jin wrote:
>> > I'm seeing a Unit Hang even with the latest e1000e driver 2.0.0 when 
>> > doing scp test. this issue is easy do reproduced on SUN FIRE X2270 M2, 
>> > just copy a big file (>500M) from another server will hit it at once.
> All devices in path from root complex to 82571, should have *same* max 
> payload size otherwise it can cause hang. 
> Can you double check this?
> 


-- 
Oracle 
Joe Jin | Software Development Senior Manager | +8610.6106.5624
ORACLE | Linux and Virtualization
No. 24 Zhongguancun Software Park, Haidian District | 100193 Beijing 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Is this a kernel bug?

2012-11-08 Thread Tejun Heo

Hello,

On Fri, Nov 09, 2012 at 08:53:49AM +0800, Cyberman Wu wrote:
> A lot of these message on many CPU:

What I'm really curious about is the *first* exception.

Is the following the first one?  Some lines (why the stackdump is
happening) are missing at the top.

>  Pid: 906, comm: kworker/16:1, CPU: 16
...
>  pc : 0xfff7002fc488 ex1: 1 faultnum: 17
> 
> Starting stack dump of tid 906, pid 906 (kworker/16:1) on cpu 16 at
> cycle 416925425702833
>   frame 0: 0xfff7002fc488 worker_enter_idle+0x1c8/0x2e8 (sp
> 0xfe00f9fbfe78)
>   frame 1: 0xfff7002750c8 worker_thread+0x4c8/0x898 (sp 
> 0xfe00f9fbfea0)
>   frame 2: 0xfff7000f0530 kthread+0xe0/0xe8 (sp 0xfe00f9fbff80)
>   frame 3: 0xfff7000bab38 start_kernel_thread+0x18/0x20 (sp

Is it triggering one of BUG_ON() in worker_enter_idle()?  Can you map
the pc to the source line number using addr2line?

> The first exception is platform specific and should be a hardware error:
> fff7002fc480:   180906cfc0128d82{ addi r2, sp, 40 ;
> addi r31, sp, 32 }
> fff7002fc488:   87b886ca04218d95{ addi r21, sp, 24 ;
> addi r20, sp, 16 ; ld lr, r2 }
> While 'ld lr, r2' executed, r2 should be sp+40, but it value is 2.
> I've analysis the execute
> snap shot and:
> 1. r2 should be 2 before 'addi r2, sp, 40' executed.
> 2. r0's value is sp+40 when exception ocurred, but it shouldn't be
> that value following
> executing flow in that function.
> So it seems while 'addi r2, sp 40' be executed, what it really
> executed is 'addi r0, sp, 40',
> maybe the instruction was load with a bit reverted for memory error,
> or cache error or
> problem of CPU? I'm not sure since it never occurred again.

So, the first exception wasn't a software bug?

> What I thought maybe a kernel bug is that second exception. I've
> simulated it try to
> generate a exception in kworker, and it occurred again. Then I checked
> the code and

After a fatal exception in kernel space, nothing is guaranteed to
work.  It's usually in the realm of "if it limps along, great;
otherwise, too bad", so it isn't really a bug.  There are only so many
things you can do after a program segfaults after all.  That said, it
might be a good idea to clear PF_WQ_WORKER from do_exit() so that at
least we can avoid oops from irq context after a work item messes up.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] perf annotate: Merge same lines in summary view

2012-11-08 Thread Namhyung Kim

From: Namhyung Kim 

The --print-line option of perf annotate command shows summary for
each source line.  But it didn't merge same lines so that it can
appear multiple times.

* before:

Sorted summary for file /home/namhyung/bin/mcol
--

   24.40 /home/namhyung/tmp/mcol.c:26
   21.58 /home/namhyung/tmp/mcol.c:25
   10.14 /home/namhyung/tmp/mcol.c:24
8.59 /home/namhyung/tmp/mcol.c:25
8.57 /home/namhyung/tmp/mcol.c:25
8.42 /home/namhyung/tmp/mcol.c:26
8.31 /home/namhyung/tmp/mcol.c:26
8.30 /home/namhyung/tmp/mcol.c:25
0.80 /home/namhyung/tmp/mcol.c:26

* after:

Sorted summary for file /home/namhyung/bin/mcol
--

   41.93 /home/namhyung/tmp/mcol.c:26
   10.14 /home/namhyung/tmp/mcol.c:24
   47.04 /home/namhyung/tmp/mcol.c:25

To do that, introduce percent_sum field so that the normal
line-by-line output doesn't get changed.

Signed-off-by: Namhyung Kim 
---
 tools/perf/util/annotate.c | 53 --
 tools/perf/util/annotate.h |  1 +
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 83b1078260e3..d604a90c60ef 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -887,6 +887,35 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
struct source_line *iter;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
+   int ret;
+
+   while (*p != NULL) {
+   parent = *p;
+   iter = rb_entry(parent, struct source_line, node);
+
+   ret = strcmp(iter->path, src_line->path);
+   if (ret == 0) {
+   iter->percent_sum += src_line->percent;
+   return;
+   }
+
+   if (ret < 0)
+   p = &(*p)->rb_left;
+   else
+   p = &(*p)->rb_right;
+   }
+
+   src_line->percent_sum = src_line->percent;
+
+   rb_link_node(&src_line->node, parent, p);
+   rb_insert_color(&src_line->node, root);
+}
+
+static void __resort_source_line(struct rb_root *root, struct source_line 
*src_line)
+{
+   struct source_line *iter;
+   struct rb_node **p = &root->rb_node;
+   struct rb_node *parent = NULL;
 
while (*p != NULL) {
parent = *p;
@@ -902,6 +931,24 @@ static void insert_source_line(struct rb_root *root, 
struct source_line *src_lin
rb_insert_color(&src_line->node, root);
 }
 
+static void resort_source_line(struct rb_root *dest_root, struct rb_root 
*src_root)
+{
+   struct source_line *src_line;
+   struct rb_node *node;
+
+   node = rb_first(src_root);
+   while (node) {
+   struct rb_node *next;
+
+   src_line = rb_entry(node, struct source_line, node);
+   next = rb_next(node);
+   rb_erase(node, src_root);
+
+   __resort_source_line(dest_root, src_line);
+   node = next;
+   }
+}
+
 static void symbol__free_source_line(struct symbol *sym, int len)
 {
struct annotation *notes = symbol__annotation(sym);
@@ -926,6 +973,7 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
struct source_line *src_line;
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evidx);
+   struct rb_root tmp_root = RB_ROOT;
 
if (!h->sum)
return 0;
@@ -960,12 +1008,13 @@ static int symbol__get_source_line(struct symbol *sym, 
struct map *map,
goto next;
 
strcpy(src_line[i].path, path);
-   insert_source_line(root, &src_line[i]);
+   insert_source_line(&tmp_root, &src_line[i]);
 
next:
pclose(fp);
}
 
+   resort_source_line(root, &tmp_root);
return 0;
 }
 
@@ -989,7 +1038,7 @@ static void print_summary(struct rb_root *root, const char 
*filename)
char *path;
 
src_line = rb_entry(node, struct source_line, node);
-   percent = src_line->percent;
+   percent = src_line->percent_sum;
color = get_percent_color(percent);
path = src_line->path;
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index c6272011625a..8eec94358a4a 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -76,6 +76,7 @@ struct sym_hist {
 struct source_line {
struct rb_node  node;
double  percent;
+   double  percent_sum;
char*path;
 };
 
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
P

[PATCH 1/3] perf annotate: Parse --asm-raw output properly

2012-11-08 Thread Namhyung Kim

From: Namhyung Kim 

If --asm-raw option was given, objdump output will contain hex numbers
of the instruction before the symbolic name.  However current parser
code doesn't handle it properly.  Fix it.

Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/annotate.c |  2 ++
 tools/perf/util/annotate.c| 29 +
 2 files changed, 31 insertions(+)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 3eff17f703f3..aec11f34d394 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -117,6 +117,8 @@ static void annotate_browser__write(struct ui_browser 
*browser, void *entry, int
ab->addr_width, " ");
slsmg_write_nstring(bf, printed);
slsmg_write_nstring(dl->line, width - printed - 6);
+   } else if (!dl->name) {
+   slsmg_write_nstring(" ", width - 7);
} else {
u64 addr = dl->offset;
int color = -1;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b14d4df9f149..83b1078260e3 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -492,6 +492,9 @@ int symbol__inc_addr_samples(struct symbol *sym, struct map 
*map,
 
 static void disasm_line__init_ins(struct disasm_line *dl)
 {
+   if (dl->name == NULL)
+   return;
+
dl->ins = ins__find(dl->name);
 
if (dl->ins == NULL)
@@ -514,6 +517,32 @@ static int disasm_line__parse(char *line, char **namep, 
char **rawp)
if (name[0] == '\0')
return -1;
 
+   if (symbol_conf.annotate_asm_raw) {
+   /*
+* If --asm-raw option was given, objdump output will contain
+* hex numbers of the instructions before the symbolic name.
+* They are separated by at least two space characters:
+*
+*   400540: 48 c7 45 f8 00 00 00movq   $0x0,-0x8(%rbp)
+*   400547: 00
+*
+* It sometimes was broken to multiple lines due to a limited
+* width.  In this case following condition will be met:
+*
+*   dl->offset != -1 && dl->name == NULL.
+*/
+   name = strstr(name, "  ");
+   if (name == NULL)
+   return 0;
+
+   while (isspace(name[0]))
+   ++name;
+
+   if (name[0] == '\0')
+   return -1;
+
+   }
+
*rawp = name + 1;
 
while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] perf annotate: Don't try to follow jump target on PLT symbols

2012-11-08 Thread Namhyung Kim

From: Namhyung Kim 

The perf annotate browser on TUI can identify a jump target for a
selected instruction.  It assumes that the jump target is within the
function but it's not the case of PLT symbols which have offset out of
the function as a target.  Since it caused a segmentation fault, do
not try to follow jump target on the PLT symbols.

Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/annotate.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index aec11f34d394..bb393dd26ba2 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -190,6 +190,12 @@ static void annotate_browser__draw_current_jump(struct 
ui_browser *browser)
struct disasm_line *cursor = ab->selection, *target;
struct browser_disasm_line *btarget, *bcursor;
unsigned int from, to;
+   struct map_symbol *ms = ab->b.priv;
+   struct symbol *sym = ms->sym;
+
+   /* PLT symbols contain external offsets */
+   if (strstr(sym->name, "@plt"))
+   return;
 
if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) ||
!disasm_line__has_offset(cursor))
@@ -773,6 +779,12 @@ static void annotate_browser__mark_jump_targets(struct 
annotate_browser *browser
size_t size)
 {
u64 offset;
+   struct map_symbol *ms = browser->b.priv;
+   struct symbol *sym = ms->sym;
+
+   /* PLT symbols contain external offsets */
+   if (strstr(sym->name, "@plt"))
+   return;
 
for (offset = 0; offset < size; ++offset) {
struct disasm_line *dl = browser->offsets[offset], *dlt;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 net-next] bonding: extend bond_arp_send_all to bridge devices

2012-11-08 Thread Jay Vosburgh

Chris J Arges  wrote:

>ARP monitoring does not work when we have a network in the
>following configuration:
>
>eth0+ +bond0.100br0-100---{+virtual machines
>  | |
>  +bond0+br0---(fixed IP)->--{LAN arp_ip_target}
>  | |
>eth1+ +bond0.200br0-200---{+virtual machines
>
>This patch extends bond_arp_send_all to check if a device
>is also in a bridge.

I did some testing with this.  The required network
configuration to show the problem is much simpler than this.  I tested
with something of the form:

eth0 -> bond0 -> {bond0.123, br0} br0 is 10.0.9.1/16, arp_ip_target is
10.0.1.1

br0 has the only assigned IP address, and the arp_ip_target is
something on that same subnet.  In this case, merely having 8021q loaded
would induce the problem, as that would add a VLAN (VID 0) to the bond
even without explicitly adding a VLAN to the bond.

The patch does resolve the immediate problem that with the VLAN,
the bond would not send the ARP queries out via eth0.

However, it misses one corner case, because the new test does
not validate that the bond is actually a port of the bridge that's found
via the route.  In this case, a configuration like:

eth0 -> bond0 -> bond0.123 IP address 10.99.0.1/16, arp_ip_target 10.0.1.1
eth1 -> br0 IP address 10.0.9.1/16

would cause the bond to issue the ARP probe request for 10.0.1.1
on eth0, even though it really shouldn't (and currently wouldn't).  If,
say, eth0 and eth1 are parallel subnets, it's possible that the host
with 10.0.1.1 (multihomed to both subnets) may answer on the "wrong"
subnet even if regular traffic routed via the correct subnet can't get
through for some reason.

I don't see an easy way to find out if device X is a port of
bridge Y, either, although we can easily check if the bond is a bridge
port (priv_flags & IFF_BRIDGE_PORT) before doing the new check.  That
doesn't completely fix the problem, but does require that the bond be a
port of a different bridge to cause a misbehavior.

-J

>This is related to the following issues:
>http://launchpad.net/bugs/736226
>http://bugzilla.kernel.org/show_bug.cgi?id=31822
>
>Thanks to help from Andy Gospodarek .
>
>Signed-off-by: Chris J Arges 
>---
> drivers/net/bonding/bond_main.c |8 
> 1 file changed, 8 insertions(+)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index b2530b0..62931b0 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2708,6 +2708,14 @@ static void bond_arp_send_all(struct bonding *bond, 
>struct slave *slave)
>   continue;
>   }
>
>+  /* Check if the target is part of a bridge.
>+   */
>+  if (rt->dst.dev->priv_flags & IFF_EBRIDGE) {
>+  addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
>+  bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 
>addr, 0);
>+  continue;
>+  }
>+
>   if (net_ratelimit()) {
>   pr_warning("%s: no path to arp_ip_target %pI4 via 
> rt.dev %s\n",
>  bond->dev->name, &targets[i],
>-- 
>1.7.9.5
>

---
-Jay Vosburgh, IBM Linux Technology Center, fu...@us.ibm.com

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND] perf test: fix a build error on builtin-test

2012-11-08 Thread Vinson Lee

From: Zheng Liu 

Recently I build perf and get a build error on builtin-test.c. The error is as
following:

$ make
CC perf.o
CC builtin-test.o
cc1: warnings being treated as errors
builtin-test.c: In function ‘sched__get_first_possible_cpu’:
builtin-test.c:977: warning: implicit declaration of function ‘CPU_ALLOC’
builtin-test.c:977: warning: nested extern declaration of ‘CPU_ALLOC’
builtin-test.c:977: warning: assignment makes pointer from integer without a 
cast
builtin-test.c:978: warning: implicit declaration of function ‘CPU_ALLOC_SIZE’
builtin-test.c:978: warning: nested extern declaration of ‘CPU_ALLOC_SIZE’
builtin-test.c:979: warning: implicit declaration of function ‘CPU_ZERO_S’
builtin-test.c:979: warning: nested extern declaration of ‘CPU_ZERO_S’
builtin-test.c:982: warning: implicit declaration of function ‘CPU_FREE’
builtin-test.c:982: warning: nested extern declaration of ‘CPU_FREE’
builtin-test.c:992: warning: implicit declaration of function ‘CPU_ISSET_S’
builtin-test.c:992: warning: nested extern declaration of ‘CPU_ISSET_S’
builtin-test.c:998: warning: implicit declaration of function ‘CPU_CLR_S’
builtin-test.c:998: warning: nested extern declaration of ‘CPU_CLR_S’
make: *** [builtin-test.o] Error 1

This problem is introduced in 3e7c439a. CPU_ALLOC and related macros are
missing in sched__get_first_possible_cpu function. In 54489c18, commiter
mentioned that CPU_ALLOC has been removed. So CPU_ALLOC calls in this
function are removed to let perf to be built.

Cc: Arnaldo Carvalho de Melo 
Cc: Ingo Molnar 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Mike Galbraith 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Stephane Eranian 
Signed-off-by: Zheng Liu 
Cc: sta...@vger.kernel.org
Signed-off-by: Vinson Lee 
---
 tools/perf/builtin-test.c |   38 --
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 484f26c..29af87f 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -604,19 +604,13 @@ out_free_threads:
 #undef nsyscalls
 }
 
-static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp,
-size_t *sizep)
+static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 {
-   cpu_set_t *mask;
-   size_t size;
int i, cpu = -1, nrcpus = 1024;
 realloc:
-   mask = CPU_ALLOC(nrcpus);
-   size = CPU_ALLOC_SIZE(nrcpus);
-   CPU_ZERO_S(size, mask);
+   CPU_ZERO(maskp);
 
-   if (sched_getaffinity(pid, size, mask) == -1) {
-   CPU_FREE(mask);
+   if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
if (errno == EINVAL && nrcpus < (1024 << 8)) {
nrcpus = nrcpus << 2;
goto realloc;
@@ -626,19 +620,14 @@ realloc:
}
 
for (i = 0; i < nrcpus; i++) {
-   if (CPU_ISSET_S(i, size, mask)) {
-   if (cpu == -1) {
+   if (CPU_ISSET(i, maskp)) {
+   if (cpu == -1)
cpu = i;
-   *maskp = mask;
-   *sizep = size;
-   } else
-   CPU_CLR_S(i, size, mask);
+   else
+   CPU_CLR(i, maskp);
}
}
 
-   if (cpu == -1)
-   CPU_FREE(mask);
-
return cpu;
 }
 
@@ -653,8 +642,8 @@ static int test__PERF_RECORD(void)
.freq   = 10,
.mmap_pages = 256,
};
-   cpu_set_t *cpu_mask = NULL;
-   size_t cpu_mask_size = 0;
+   cpu_set_t cpu_mask;
+   size_t cpu_mask_size = sizeof(cpu_mask);
struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
struct perf_evsel *evsel;
struct perf_sample sample;
@@ -718,8 +707,7 @@ static int test__PERF_RECORD(void)
evsel->attr.sample_type |= PERF_SAMPLE_TIME;
perf_evlist__config_attrs(evlist, &opts);
 
-   err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask,
-   &cpu_mask_size);
+   err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
if (err < 0) {
pr_debug("sched__get_first_possible_cpu: %s\n", 
strerror(errno));
goto out_delete_evlist;
@@ -730,9 +718,9 @@ static int test__PERF_RECORD(void)
/*
 * So that we can check perf_sample.cpu on all the samples.
 */
-   if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 
0) {
+   if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 
0) {
pr_debug("sched_setaffinity: %s\n", strerror(errno));
-   goto out_free_cpu_mask;
+   goto out_delete_evlist;
}
 
/*
@@ -916,8 +904,6 @@ found_exit:
}
 out_err:

Re: Is this a kernel bug?

2012-11-08 Thread Cyberman Wu

A lot of these message on many CPU:


 Pid: 906, comm: kworker/16:1, CPU: 16
 r0 : 0xfe00f9fbfea0 r1 : 0x0010 r2 : 0x0002
 r3 : 0xfff5001017e4 r4 : 0xfe00 r5 : 0xfea4
 r6 : 0xfe00 r7 : 0x0002 r8 : 0x
 r9 : 0xfff5001017e0 r10: 0xfff5001017dc r11: 0xfff5001017c8
 r12: 0x0001 r13: 0xfe40fc690090 r14: 0x
 r15: 0x r16: 0xfe40fc690088 r17: 0xfe00f841be80
 r18: 0xfe00f841be80 r19: 0xfff500101790 r20: 0x0001
 r21: 0xfe40fe710ce8 r22: 0xfeb5 r23: 0xfff5001017d8
 r24: 0xfe8e3c80 r25: 0x01f4ff82 r26: 0xfea40080
 r27: 0xfe8e r28: 0x0010 r29: 0xfea4
 r30: 0x r31: 0xfe00f9fbfe98 r32: 0xfe00
 r33: 0xfff5001017c8 r34: 0xfe8e3c80 r35: 0xfe40fc6900a0
 r36: 0xfe40fc6900a0 r37: 0xfff5001017dc r38: 0xfeb5ad00
 r39: 0xfea4 r40: 0xfeb5ad04 r41: 0xfe8e0040
 r42: 0xfff5001017c8 r43: 0xfe9aa9a0 r44: 0xfe8e3c80
 r45: 0xfe40fc6900b0 r46: 0xfff5001017d8 r47: 0xfeb5ad05
 r48: 0xfe8e3c80 r49: 0xfe40fc6900b8 r50: 0xfff5001017e4
 r51: 0xfff5001017c0 r52: 0xfe8e3c80 tp : 0x01f4ff82
 sp : 0xfe00f9fbfe78 lr : 0x0002
 pc : 0xfff7002fc488 ex1: 1 faultnum: 17

Starting stack dump of tid 906, pid 906 (kworker/16:1) on cpu 16 at
cycle 416925425702833
  frame 0: 0xfff7002fc488 worker_enter_idle+0x1c8/0x2e8 (sp
0xfe00f9fbfe78)
  frame 1: 0xfff7002750c8 worker_thread+0x4c8/0x898 (sp 0xfe00f9fbfea0)
  frame 2: 0xfff7000f0530 kthread+0xe0/0xe8 (sp 0xfe00f9fbff80)
  frame 3: 0xfff7000bab38 start_kernel_thread+0x18/0x20 (sp
0xfe00f9fbffe8)
Stack dump complete
Unable to handle kernel paging request
 at virtual address 0xfff8, pc 0xfff700375f58

 Pid: 906, comm: kworker/16:1, CPU: 16
 r0 : 0xfff8 r1 : 0x r2 : 0xfe00f841c1b8
 r3 : 0x3459 r4 : 0x0001 r5 : 0x
 r6 : 0xfe00f9fb0028 r7 : 0x01f4ff82 r8 : 0xfe00f9fb
 r9 : 0x r10: 0x0081 r11: 0xfe00f841be9c
 r12: 0xfff500103c68 r13: 0xfe00f9fbf488 r14: 0xfe00f9fbf4c8
 r15: 0xfe00f9fbf490 r16: 0xfe00f9fbf498 r17: 0xfe00f9fbf4a0
 r18: 0xfe00f841c5b0 r19: 0xfe00f9fbf4a8 r20: 0xfe00f841c0e8
 r21: 0x8420806c r22: 0x0020 r23: 0xfea7b988
 r24: 0xfe00f841be94 r25: 0xfe00 r26: 0xfea7
 r27: 0xfe00f9fbf440 r28: 0xfe00f9fbf438 r29: 0xfe00f9fbf448
 r30: 0x0010 r31: 0xfe00f841be80 r32: 0x001a1174
 r33: 0x001a1173 r34: 0xfe00f9fbf610 r35: 0x0001f9fbf398
 r36: 0xfe401d9008c0 r37: 0xfe401d9008c0 r38: 0xfe401d9008c8
 r39: 0xfea9c770 r40: 0xfea9c750 r41: 0x0001
 r42: 0xfe401d900990 r43: 0xfff7003dd1b0 r44: 0xfe00f9fbf350
 r45: 0xfeb5865b r46: 0x0002 r47: 0xfeb58a50
 r48: 0xfff7003dfbe8 r49: 0xfe00f9fbf400 r50: 0x6c102009
 r51: 0x663966626538 r52: 0xfe00f9fbf790 tp : 0x01f4ff82
 sp : 0xfe00f9fbf430 lr : 0xfff700357fe8
 pc : 0xfff700375f58 ex1: 1 faultnum: 18

Starting stack dump of tid 906, pid 906 (kworker/16:1) on cpu 16 at
cycle 416925426066163
  frame 0: 0xfff700375f58 kthread_data+0x18/0x20 (sp 0xfe00f9fbf430)
  frame 1: 0xfff700357fe8 wq_worker_sleeping+0x28/0xf8 (sp
0xfe00f9fbf430)
  frame 2: 0xfff700021ab8 schedule+0xd00/0x1538 (sp 0xfe00f9fbf448)
  frame 3: 0xfff70041f950 do_exit+0x510/0x658 (sp 0xfe00f9fbf790)
  frame 4: 0xfff7000ade50 do_group_exit+0xc0/0x220 (sp 0xfe00f9fbf840)
  frame 5: 0xfff7001137a0 jit_bundle_gen+0xf20/0x27d8 (sp
0xfe00f9fbf878)
  frame 6: 0xfff70034e830 do_unaligned+0xe0/0x5b0 (sp 0xfe00f9fbfac8)
  frame 7: 0xfff700139af8 handle_interrupt+0x270/0x278 (sp
0xfe00f9fbfc00)
  
  frame 8: 0xfff7002fc488 worker_enter_idle+0x1c8/0x2e8 (sp
0xfe00f9fbfe78)
  frame 9: 0xfff7002750c8 worker_thread+0x4c8/0x898 (sp 0xfe00f9fbfea0)
  frame 10: 0xfff7000f0530 kthread+0xe0/0xe8 (sp 0xfe00f9fbff80)
  frame 11: 0xfff7000bab38 start_kernel_thread+0x18/0x20 (sp
0xfe00f9fbffe8)
Stack dump complete
Fixing recursive fault but reboot is needed!

The first exception is platform specific and should be a hardware error:
fff7002fc480:   180906cfc0128d82{ addi r2, sp, 40 ;
addi r31, sp, 32 }
fff7002fc488:   87b886ca04218d95{ addi r21, sp, 24 ;
addi r20, sp, 16 ; ld lr, r2 }
While 'ld lr, r2' executed, r2 should be sp+40, but it value is 2.
I've analysis the execute
snap shot and:
1. r2 should be 2 before 'ad

[PATCHv2] mm: Fix calculation of dirtyable memory

2012-11-08 Thread Sonny Rao

The system uses global_dirtyable_memory() to calculate
number of dirtyable pages/pages that can be allocated
to the page cache.  A bug causes an underflow thus making
the page count look like a big unsigned number.  This in turn
confuses the dirty writeback throttling to aggressively write
back pages as they become dirty (usually 1 page at a time).

Fix is to ensure there is no underflow while doing the math.

Signed-off-by: Sonny Rao 
Signed-off-by: Puneet Kumar 
---
 v2: added apkm's suggestion to make the highmem calculation better
 mm/page-writeback.c |   17 +++--
 1 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 830893b..ce62442 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -201,6 +201,18 @@ static unsigned long highmem_dirtyable_memory(unsigned 
long total)
 zone_reclaimable_pages(z) - z->dirty_balance_reserve;
}
/*
+* Unreclaimable memory (kernel memory or anonymous memory
+* without swap) can bring down the dirtyable pages below
+* the zone's dirty balance reserve and the above calculation
+* will underflow.  However we still want to add in nodes
+* which are below threshold (negative values) to get a more
+* accurate calculation but make sure that the total never
+* underflows.
+*/
+   if ((long)x < 0)
+   x = 0;
+
+   /*
 * Make sure that the number of highmem pages is never larger
 * than the number of the total dirtyable memory. This can only
 * occur in very strange VM situations but we want to make sure
@@ -222,8 +234,9 @@ static unsigned long global_dirtyable_memory(void)
 {
unsigned long x;
 
-   x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
-   dirty_balance_reserve;
+   x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+   if (x >= dirty_balance_reserve)
+   x -= dirty_balance_reserve;
 
if (!vm_highmem_is_dirtyable)
x -= highmem_dirtyable_memory(x);
-- 
1.7.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv2] mm: Fix calculation of dirtyable memory

2012-11-08 Thread Sonny Rao

On Thu, Nov 8, 2012 at 4:42 PM, Sonny Rao  wrote:
> add apkm's suggestion
>

Oops, sorry, will add akpm's suggestion and re-post
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv2] mm: Fix calculation of dirtyable memory

2012-11-08 Thread Sonny Rao

add apkm's suggestion

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RESEND v2 1/1] percpu_rw_semaphore: reimplement to not block the readers unnecessarily

2012-11-08 Thread Paul E. McKenney

On Thu, Nov 08, 2012 at 06:41:10PM -0500, Mikulas Patocka wrote:
> 
> 
> On Thu, 8 Nov 2012, Paul E. McKenney wrote:
> 
> > On Thu, Nov 08, 2012 at 12:07:00PM -0800, Andrew Morton wrote:
> > > On Thu, 8 Nov 2012 14:48:49 +0100
> > > Oleg Nesterov  wrote:
> > > 
> > > > Currently the writer does msleep() plus synchronize_sched() 3 times
> > > > to acquire/release the semaphore, and during this time the readers
> > > > are blocked completely. Even if the "write" section was not actually
> > > > started or if it was already finished.
> > > > 
> > > > With this patch down_write/up_write does synchronize_sched() twice
> > > > and down_read/up_read are still possible during this time, just they
> > > > use the slow path.
> > > > 
> > > > percpu_down_write() first forces the readers to use rw_semaphore and
> > > > increment the "slow" counter to take the lock for reading, then it
> > > > takes that rw_semaphore for writing and blocks the readers.
> > > > 
> > > > Also. With this patch the code relies on the documented behaviour of
> > > > synchronize_sched(), it doesn't try to pair synchronize_sched() with
> > > > barrier.
> > > > 
> > > > ...
> > > >
> > > >  include/linux/percpu-rwsem.h |   83 +
> > > >  lib/Makefile |2 +-
> > > >  lib/percpu-rwsem.c   |  123 
> > > > ++
> > > 
> > > The patch also uninlines everything.
> > > 
> > > And it didn't export the resulting symbols to modules, so it isn't an
> > > equivalent.  We can export thing later if needed I guess.
> > > 
> > > It adds percpu-rwsem.o to lib-y, so the CONFIG_BLOCK=n kernel will
> > > avoid including the code altogether, methinks?
> > > 
> > > >
> > > > ...
> > > >
> > > > --- /dev/null
> > > > +++ b/lib/percpu-rwsem.c
> > > > @@ -0,0 +1,123 @@
> > > 
> > > That was nice and terse ;)
> > > 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > 
> > > This list is nowhere near sufficient to support this file's
> > > requirements.  atomic.h, percpu.h, rwsem.h, wait.h, errno.h and plenty
> > > more.  IOW, if it compiles, it was sheer luck.
> > > 
> > > > +int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
> > > > +{
> > > > +   brw->fast_read_ctr = alloc_percpu(int);
> > > > +   if (unlikely(!brw->fast_read_ctr))
> > > > +   return -ENOMEM;
> > > > +
> > > > +   mutex_init(&brw->writer_mutex);
> > > > +   init_rwsem(&brw->rw_sem);
> > > > +   atomic_set(&brw->slow_read_ctr, 0);
> > > > +   init_waitqueue_head(&brw->write_waitq);
> > > > +   return 0;
> > > > +}
> > > > +
> > > > +void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
> > > > +{
> > > > +   free_percpu(brw->fast_read_ctr);
> > > > +   brw->fast_read_ctr = NULL; /* catch use after free bugs */
> > > > +}
> > > > +
> > > > +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned 
> > > > int val)
> > > > +{
> > > > +   bool success = false;
> > > > +
> > > > +   preempt_disable();
> > > > +   if (likely(!mutex_is_locked(&brw->writer_mutex))) {
> > > > +   __this_cpu_add(*brw->fast_read_ctr, val);
> > > > +   success = true;
> > > > +   }
> > > > +   preempt_enable();
> > > > +
> > > > +   return success;
> > > > +}
> > > > +
> > > > +/*
> > > > + * Like the normal down_read() this is not recursive, the writer can
> > > > + * come after the first percpu_down_read() and create the deadlock.
> > > > + */
> > > > +void percpu_down_read(struct percpu_rw_semaphore *brw)
> > > > +{
> > > > +   if (likely(update_fast_ctr(brw, +1)))
> > > > +   return;
> > > > +
> > > > +   down_read(&brw->rw_sem);
> > > > +   atomic_inc(&brw->slow_read_ctr);
> > > > +   up_read(&brw->rw_sem);
> > > > +}
> > > > +
> > > > +void percpu_up_read(struct percpu_rw_semaphore *brw)
> > > > +{
> > > > +   if (likely(update_fast_ctr(brw, -1)))
> > > > +   return;
> > > > +
> > > > +   /* false-positive is possible but harmless */
> > > > +   if (atomic_dec_and_test(&brw->slow_read_ctr))
> > > > +   wake_up_all(&brw->write_waitq);
> > > > +}
> > > > +
> > > > +static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
> > > > +{
> > > > +   unsigned int sum = 0;
> > > > +   int cpu;
> > > > +
> > > > +   for_each_possible_cpu(cpu) {
> > > > +   sum += per_cpu(*brw->fast_read_ctr, cpu);
> > > > +   per_cpu(*brw->fast_read_ctr, cpu) = 0;
> > > > +   }
> > > > +
> > > > +   return sum;
> > > > +}
> > > > +
> > > > +/*
> > > > + * A writer takes ->writer_mutex to exclude other writers and to force 
> > > > the
> > > > + * readers to switch to the slow mode, note the mutex_is_locked() 
> > > > check in
> > > > + * update_fast_ctr().
> > > > + *
> > > > + * After that the readers can only inc/dec the slow ->slow_read_ctr 
> > > > counter,
> > > > + * ->fast_read_ctr is stable. Once the writer moves its sum

1 2 3 4 5 6 >

1 - 100 of 558 matches

Mail list logo