from:"Zoran Markovic"

[RFC PATCH] smack: fix access permissions for keyring

2018-10-17 Thread Zoran Markovic

Function smack_key_permission() only issues smack requests for the
following operations:
 - KEY_NEED_READ (issues MAY_READ)
 - KEY_NEED_WRITE (issues MAY_WRITE)
 - KEY_NEED_LINK (issues MAY_WRITE)
 - KEY_NEED_SETATTR (issues MAY_WRITE)
A blank smack request is issued in all other cases, resulting in
smack access being granted if there is any rule defined between
subject and object, or denied with -EACCES otherwise.

Request MAY_READ access for KEY_NEED_SEARCH and KEY_NEED_VIEW.
Fix the logic in the unlikely case when both MAY_READ and
MAY_WRITE are needed. Validate access permission field for valid
contents.

Signed-off-by: Zoran Markovic 
Cc: Casey Schaufler 
Cc: James Morris 
Cc: "Serge E. Hallyn" 
---
 security/smack/smack_lsm.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 340fc30..77e405f 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4326,6 +4326,12 @@ static int smack_key_permission(key_ref_t key_ref,
int request = 0;
int rc;
 
+   /*
+* Validate requested permissions
+*/
+   if (perm & ~KEY_NEED_ALL)
+   return -EINVAL;
+
keyp = key_ref_to_ptr(key_ref);
if (keyp == NULL)
return -EINVAL;
@@ -4349,10 +4355,10 @@ static int smack_key_permission(key_ref_t key_ref,
ad.a.u.key_struct.key = keyp->serial;
ad.a.u.key_struct.key_desc = keyp->description;
 #endif
-   if (perm & KEY_NEED_READ)
-   request = MAY_READ;
+   if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW))
+   request |= MAY_READ;
if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR))
-   request = MAY_WRITE;
+   request |= MAY_WRITE;
rc = smk_access(tkp, keyp->security, request, &ad);
rc = smk_bu_note("key access", tkp, keyp->security, request, rc);
return rc;
-- 
2.7.4

[RFC PATCHv2 2/4] clk: mdm9615: Add EBI2 clock

2016-12-22 Thread Zoran Markovic

Add definition of EBI2 clock used by MDM9615 NAND controller.

Cc: Andy Gross 
Cc: David Brown 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Neil Armstrong 
Cc: linux-arm-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicet...@vger.kernel.org
Signed-off-by: Zoran Markovic 
---
 drivers/clk/qcom/gcc-mdm9615.c   |   30 ++
 include/dt-bindings/clock/qcom,gcc-mdm9615.h |3 +++
 2 files changed, 33 insertions(+)

diff --git a/drivers/clk/qcom/gcc-mdm9615.c b/drivers/clk/qcom/gcc-mdm9615.c
index 581a17f..e9e98b1 100644
--- a/drivers/clk/qcom/gcc-mdm9615.c
+++ b/drivers/clk/qcom/gcc-mdm9615.c
@@ -1563,6 +1563,34 @@ enum {
},
 };
 
+static struct clk_branch ebi2_clk = {
+   .hwcg_reg = 0x2664,
+   .hwcg_bit = 6,
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(6) | BIT(4),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
+static struct clk_branch ebi2_aon_clk = {
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(8),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_aon_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
 static struct clk_hw *gcc_mdm9615_hws[] = {
&cxo.hw,
 };
@@ -1637,6 +1665,8 @@ enum {
[PMIC_ARB1_H_CLK] = &pmic_arb1_h_clk.clkr,
[PMIC_SSBI2_CLK] = &pmic_ssbi2_clk.clkr,
[RPM_MSG_RAM_H_CLK] = &rpm_msg_ram_h_clk.clkr,
+   [EBI2_CLK] = &ebi2_clk.clkr,
+   [EBI2_AON_CLK] = &ebi2_aon_clk.clkr,
 };
 
 static const struct qcom_reset_map gcc_mdm9615_resets[] = {
diff --git a/include/dt-bindings/clock/qcom,gcc-mdm9615.h 
b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
index 9ab2c40..57cdca6 100644
--- a/include/dt-bindings/clock/qcom,gcc-mdm9615.h
+++ b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
@@ -323,5 +323,8 @@
 #define CE3_H_CLK  305
 #define USB_HS1_SYSTEM_CLK_SRC 306
 #define USB_HS1_SYSTEM_CLK 307
+#define EBI2_CLK   308
+#define EBI2_AON_CLK   309
+
 
 #endif
-- 
1.7.9.5

[RFC PATCH 0/4] Enable NAND on Sierra Wireless WP8548 board

2016-12-22 Thread Zoran Markovic

Enable NAND flash on Sierra Wireless's WP8548 module used on MangOH
Green board. The patch set consists of device tree descriptions for
ADM DMA engine, NAND controller and NAND flash partitioned for
Sierra Wireless Legato framework, as well as definition of EBI2
clock used by NAND controller.

This patch set depends on Andy Gross's driver for ADM DMA engine:
https://lwn.net/Articles/636881/

Zoran Markovic (4):
  dt-bindings: mdm9615: Add ADM DMA engine
  clk: mdm9615: Add EBI2 clock
  dt-bindings: mdm9615: Add NAND controller
  dt-bindings: wp8548: Add on-board NAND flash

 arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi   |   50 ++
 arch/arm/boot/dts/qcom-mdm9615.dtsi  |   35 +-
 drivers/clk/qcom/gcc-mdm9615.c   |   30 
 include/dt-bindings/clock/qcom,gcc-mdm9615.h |3 ++
 4 files changed, 117 insertions(+), 1 deletion(-)

-- 
1.7.9.5

[RFC PATCH 3/4] dt-bindings: mdm9615: Add NAND controller

2016-12-22 Thread Zoran Markovic

Add dt description of NAND controller on MDM9615.

Signed-off-by: Zoran Markovic 
---
 arch/arm/boot/dts/qcom-mdm9615.dtsi |   16 
 1 file changed, 16 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615.dtsi
index fbc7d68..6d42ff3 100644
--- a/arch/arm/boot/dts/qcom-mdm9615.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi
@@ -373,6 +373,22 @@
qcom,ee = <0>;
};
 
+   nand0: nand@1b40 {
+   compatible = "qcom,ipq806x-nand";
+   reg = <0x1b40 0x800>;
+   clocks = <&gcc EBI2_CLK>,
+<&gcc EBI2_AON_CLK>;
+   clock-names = "core", "aon";
+
+   dmas = <&adm_dma 3>;
+   dma-names = "rxtx";
+   qcom,cmd-crci = <15>;
+   qcom,data-crci = <3>;
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   };
+
amba {
compatible = "arm,amba-bus";
#address-cells = <1>;
-- 
1.7.9.5

[RFC PATCH 1/4] dt-bindings: mdm9615: Add ADM DMA engine

2016-12-22 Thread Zoran Markovic

Add configuration for ADM DMA engine on MDM9615, used by the EBI2
NAND controller. This commit requires the ADM DMA patches from
Andy Gross:
https://lwn.net/Articles/636881/

Signed-off-by: Zoran Markovic 

---
 arch/arm/boot/dts/qcom-mdm9615.dtsi |   19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615.dtsi
index 5ae4ec5..fbc7d68 100644
--- a/arch/arm/boot/dts/qcom-mdm9615.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi
@@ -336,7 +336,24 @@
};
};
 
-   sdcc1bam: dma@12182000{
+   adm_dma: dma@1830 {
+   compatible = "qcom,adm";
+   reg = <0x1830 0x10>;
+   interrupts = <0 170 0>;
+   #dma-cells = <1>;
+
+   clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>;
+   clock-names = "core", "iface";
+
+   resets = <&gcc ADM0_RESET>,
+<&gcc ADM0_C0_RESET>,
+<&gcc ADM0_C1_RESET>,
+<&gcc ADM0_C2_RESET>;
+   reset-names = "clk", "c0", "c1", "c2";
+   qcom,ee = <0>;
+   };
+
+   sdcc1bam:dma@12182000{
compatible = "qcom,bam-v1.3.0";
reg = <0x12182000 0x8000>;
interrupts = ;
-- 
1.7.9.5

[RFC PATCH 3/4] dt-bindings: mdm9615: Add NAND controller

2016-12-22 Thread Zoran Markovic

Add dt description of NAND controller on MDM9615.

Cc: Andy Gross 
Cc: David Brown 
Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Russell King 
Cc: linux-arm-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicet...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Signed-off-by: Zoran Markovic 
---
 arch/arm/boot/dts/qcom-mdm9615.dtsi |   16 
 1 file changed, 16 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615.dtsi
index fbc7d68..6d42ff3 100644
--- a/arch/arm/boot/dts/qcom-mdm9615.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi
@@ -373,6 +373,22 @@
qcom,ee = <0>;
};
 
+   nand0: nand@1b40 {
+   compatible = "qcom,ipq806x-nand";
+   reg = <0x1b40 0x800>;
+   clocks = <&gcc EBI2_CLK>,
+<&gcc EBI2_AON_CLK>;
+   clock-names = "core", "aon";
+
+   dmas = <&adm_dma 3>;
+   dma-names = "rxtx";
+   qcom,cmd-crci = <15>;
+   qcom,data-crci = <3>;
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   };
+
amba {
compatible = "arm,amba-bus";
#address-cells = <1>;
-- 
1.7.9.5

[RFC PATCH 2/4] clk: mdm9615: Add EBI2 clock

2016-12-22 Thread Zoran Markovic

Add definition of EBI2 clock used by MDM9615 NAND controller.

Cc: Andy Gross 
Cc: David Brown 
Cc: Michael Turquette 
Cc: Stephen Boyd 
Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Neil Armstrong 
Cc: linux-arm-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicet...@vger.kernel.org
Signed-off-by: Zoran Markovic 
---
 drivers/clk/qcom/gcc-mdm9615.c   |   30 ++
 include/dt-bindings/clock/qcom,gcc-mdm9615.h |3 +++
 2 files changed, 33 insertions(+)

diff --git a/drivers/clk/qcom/gcc-mdm9615.c b/drivers/clk/qcom/gcc-mdm9615.c
index 581a17f..e9e98b1 100644
--- a/drivers/clk/qcom/gcc-mdm9615.c
+++ b/drivers/clk/qcom/gcc-mdm9615.c
@@ -1563,6 +1563,34 @@ enum {
},
 };
 
+static struct clk_branch ebi2_clk = {
+   .hwcg_reg = 0x2664,
+   .hwcg_bit = 6,
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(6)|BIT(4),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
+static struct clk_branch ebi2_aon_clk = {
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(8),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_always_on_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
 static struct clk_hw *gcc_mdm9615_hws[] = {
&cxo.hw,
 };
@@ -1637,6 +1665,8 @@ enum {
[PMIC_ARB1_H_CLK] = &pmic_arb1_h_clk.clkr,
[PMIC_SSBI2_CLK] = &pmic_ssbi2_clk.clkr,
[RPM_MSG_RAM_H_CLK] = &rpm_msg_ram_h_clk.clkr,
+   [EBI2_CLK] = &ebi2_clk.clkr,
+   [EBI2_AON_CLK] = &ebi2_aon_clk.clkr,
 };
 
 static const struct qcom_reset_map gcc_mdm9615_resets[] = {
diff --git a/include/dt-bindings/clock/qcom,gcc-mdm9615.h 
b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
index 9ab2c40..57cdca6 100644
--- a/include/dt-bindings/clock/qcom,gcc-mdm9615.h
+++ b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
@@ -323,5 +323,8 @@
 #define CE3_H_CLK  305
 #define USB_HS1_SYSTEM_CLK_SRC 306
 #define USB_HS1_SYSTEM_CLK 307
+#define EBI2_CLK   309
+#define EBI2_AON_CLK   310
+
 
 #endif
-- 
1.7.9.5

[RFC PATCH 4/4] dt-bindings: wp8548: Add on-board NAND flash

2016-12-22 Thread Zoran Markovic

Add description of NAND flash on Sierra Wireless WP8548 module
(and MangOH board).

Signed-off-by: Zoran Markovic 
---
 arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi |   50 
 1 file changed, 50 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
index 7869898..a4d1158 100644
--- a/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
@@ -54,6 +54,56 @@
};
 };
 
+&nand0 {
+   nandcs@0 {
+   compatible = "qcom,nandcs";
+   reg = <0>;
+
+   linux,mtd-name = "micron,mt29f4g08";
+   #address-cells = <1>;
+   #size-cells = <0>;
+   nand-ecc-strength = <4>;
+   nand-ecc-step-size = <512>;
+
+   partitions {
+   compatible = "fixed-partitions";
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   bootloader@0x051c {
+   reg = <0x51c 0x10>;
+   read-only;
+   };
+
+   kernel@0x052c {
+   reg = <0x52c 0x140>;
+   read-only;
+   };
+
+   rootfs@0x066c {
+   reg = <0x66c 0x314>;
+   read-only;
+   };
+
+   user0@0x0980 {
+   reg = <0x980 0x278>;
+   };
+
+   user1@0x0bf8 {
+   reg = <0xbf8 0x8B8>;
+   };
+
+   user2@0x14b0 {
+   reg = <0x14b0 0x50>;
+   };
+
+   user3@0x1500 {
+   reg = <0x1500 0x20>;
+   };
+   };
+   };
+};
+
 &msmgpio {
pinctrl-0 = <&reset_out_pins>;
pinctrl-names = "default";
-- 
1.7.9.5

[RFC PATCH 0/4] Enable NAND on Sierra Wireless WP8548 board

2016-12-22 Thread Zoran Markovic

Enable NAND flash on Sierra Wireless's WP8548 module used on MangOH
Green board. The patch set consists of device tree descriptions for
ADM DMA engine, NAND controller and NAND flash partitioned for
Sierra Wireless Legato framework, as well as definition of EBI2
clock used by NAND controller.

This patch set depends on Andy Gross's driver for ADM DMA engine:
https://lwn.net/Articles/636881/

Zoran Markovic (4):
  dt-bindings: mdm9615: Add ADM DMA engine
  clk: mdm9615: Add EBI2 clock
  dt-bindings: mdm9615: Add NAND controller
  dt-bindings: wp8548: Add on-board NAND flash

 arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi   |   50 ++
 arch/arm/boot/dts/qcom-mdm9615.dtsi  |   35 +-
 drivers/clk/qcom/gcc-mdm9615.c   |   30 
 include/dt-bindings/clock/qcom,gcc-mdm9615.h |3 ++
 4 files changed, 117 insertions(+), 1 deletion(-)

-- 
1.7.9.5

[RFC PATCH 2/4] clk: mdm9615: Add EBI2 clock

2016-12-22 Thread Zoran Markovic

Add definition of EBI2 clock used by MDM9615 NAND controller.

Signed-off-by: Zoran Markovic 
---
 drivers/clk/qcom/gcc-mdm9615.c   |   30 ++
 include/dt-bindings/clock/qcom,gcc-mdm9615.h |3 +++
 2 files changed, 33 insertions(+)

diff --git a/drivers/clk/qcom/gcc-mdm9615.c b/drivers/clk/qcom/gcc-mdm9615.c
index 581a17f..e9e98b1 100644
--- a/drivers/clk/qcom/gcc-mdm9615.c
+++ b/drivers/clk/qcom/gcc-mdm9615.c
@@ -1563,6 +1563,34 @@ enum {
},
 };
 
+static struct clk_branch ebi2_clk = {
+   .hwcg_reg = 0x2664,
+   .hwcg_bit = 6,
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(6)|BIT(4),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
+static struct clk_branch ebi2_aon_clk = {
+   .halt_reg = 0x2fcc,
+   .halt_bit = 23,
+   .clkr = {
+   .enable_reg = 0x2664,
+   .enable_mask = BIT(8),
+   .hw.init = &(struct clk_init_data){
+   .name = "ebi2_always_on_clk",
+   .ops = &clk_branch_ops,
+   },
+   },
+};
+
 static struct clk_hw *gcc_mdm9615_hws[] = {
&cxo.hw,
 };
@@ -1637,6 +1665,8 @@ enum {
[PMIC_ARB1_H_CLK] = &pmic_arb1_h_clk.clkr,
[PMIC_SSBI2_CLK] = &pmic_ssbi2_clk.clkr,
[RPM_MSG_RAM_H_CLK] = &rpm_msg_ram_h_clk.clkr,
+   [EBI2_CLK] = &ebi2_clk.clkr,
+   [EBI2_AON_CLK] = &ebi2_aon_clk.clkr,
 };
 
 static const struct qcom_reset_map gcc_mdm9615_resets[] = {
diff --git a/include/dt-bindings/clock/qcom,gcc-mdm9615.h 
b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
index 9ab2c40..57cdca6 100644
--- a/include/dt-bindings/clock/qcom,gcc-mdm9615.h
+++ b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
@@ -323,5 +323,8 @@
 #define CE3_H_CLK  305
 #define USB_HS1_SYSTEM_CLK_SRC 306
 #define USB_HS1_SYSTEM_CLK 307
+#define EBI2_CLK   309
+#define EBI2_AON_CLK   310
+
 
 #endif
-- 
1.7.9.5

[RFC PATCH 1/4] dt-bindings: mdm9615: Add ADM DMA engine

2016-12-22 Thread Zoran Markovic

Add configuration for ADM DMA engine on MDM9615, used by the EBI2
NAND controller. This commit requires the ADM DMA patches from
Andy Gross:
https://lkml.org/lkml/2015/3/17/19

Cc: Andy Gross 
Cc: David Brown 
Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Russell King 
Cc: linux-arm-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicet...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Signed-off-by: Zoran Markovic 
---
 arch/arm/boot/dts/qcom-mdm9615.dtsi |   19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615.dtsi
index 5ae4ec5..fbc7d68 100644
--- a/arch/arm/boot/dts/qcom-mdm9615.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi
@@ -336,7 +336,24 @@
};
};
 
-   sdcc1bam: dma@12182000{
+   adm_dma: dma@1830 {
+   compatible = "qcom,adm";
+   reg = <0x1830 0x10>;
+   interrupts = <0 170 0>;
+   #dma-cells = <1>;
+
+   clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>;
+   clock-names = "core", "iface";
+
+   resets = <&gcc ADM0_RESET>,
+<&gcc ADM0_C0_RESET>,
+<&gcc ADM0_C1_RESET>,
+<&gcc ADM0_C2_RESET>;
+   reset-names = "clk", "c0", "c1", "c2";
+   qcom,ee = <0>;
+   };
+
+   sdcc1bam:dma@12182000{
compatible = "qcom,bam-v1.3.0";
reg = <0x12182000 0x8000>;
interrupts = ;
-- 
1.7.9.5

[RFC PATCH 4/4] dt-bindings: wp8548: Add on-board NAND flash

2016-12-22 Thread Zoran Markovic

Add description of NAND flash on Sierra Wireless WP8548 module
(and MangOH board).

Cc: Andy Gross 
Cc: David Brown 
Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Russell King 
Cc: linux-arm-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: devicet...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Signed-off-by: Zoran Markovic 
---
 arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi |   50 
 1 file changed, 50 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi 
b/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
index 7869898..a4d1158 100644
--- a/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
+++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi
@@ -54,6 +54,56 @@
};
 };
 
+&nand0 {
+   nandcs@0 {
+   compatible = "qcom,nandcs";
+   reg = <0>;
+
+   linux,mtd-name = "micron,mt29f4g08";
+   #address-cells = <1>;
+   #size-cells = <0>;
+   nand-ecc-strength = <4>;
+   nand-ecc-step-size = <512>;
+
+   partitions {
+   compatible = "fixed-partitions";
+   #address-cells = <1>;
+   #size-cells = <1>;
+
+   bootloader@0x051c {
+   reg = <0x51c 0x10>;
+   read-only;
+   };
+
+   kernel@0x052c {
+   reg = <0x52c 0x140>;
+   read-only;
+   };
+
+   rootfs@0x066c {
+   reg = <0x66c 0x314>;
+   read-only;
+   };
+
+   user0@0x0980 {
+   reg = <0x980 0x278>;
+   };
+
+   user1@0x0bf8 {
+   reg = <0xbf8 0x8B8>;
+   };
+
+   user2@0x14b0 {
+   reg = <0x14b0 0x50>;
+   };
+
+   user3@0x1500 {
+   reg = <0x1500 0x20>;
+   };
+   };
+   };
+};
+
 &msmgpio {
pinctrl-0 = <&reset_out_pins>;
pinctrl-names = "default";
-- 
1.7.9.5

[RFC PATCH 2/2] sched: Add documentation for idlestat scheduler benchmarking tool

2014-03-24 Thread Zoran Markovic

This patch documents the proposed functionality of idlestat tool and
states its intended use for scheduler benchmarking. The documentation
file describes the design of the tool, what kernel functionality it
relies upon, and what information is contained in the output report.
It also contains a simple linear model for estimating CPU power
consumption during idlestat run.

Idlestat focuses itself on CPU and cluster power states in precise
intervals in time. This is of particular use when the benchmarked
process is a load synthesis tool: idlestat could focus its acquisition
period to a particular sub-period in the load sequence. Output results
from idlestat can be applied to a power model in order to estimate the
power consumption of CPUs and clusters during the benchmark interval.
Initial measurements on ARM Versatile Express TC2 platform show a model
error of ~2.6% for the linear power model described in the documentation.

Cc: Rob Landley 
Cc: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Cc: Daniel Lezcano 
Signed-off-by: Zoran Markovic 
---
 Documentation/scheduler/idlestat.txt |   79 ++
 1 file changed, 79 insertions(+)
 create mode 100644 Documentation/scheduler/idlestat.txt

diff --git a/Documentation/scheduler/idlestat.txt 
b/Documentation/scheduler/idlestat.txt
new file mode 100644
index 000..8e6b695
--- /dev/null
+++ b/Documentation/scheduler/idlestat.txt
@@ -0,0 +1,79 @@
+This document captures the desired operation of the idlestat tool.
+
+With the advent of battery-powered Linux devices, it became important to add
+a power-aware component to the existing CFS scheduler solution. Future
+developments in this field need to be benchmarked using a simple tool that
+monitors power parameters during system runs and provides sufficient info for
+developers to assess how changes to scheduler code affected CPU power
+consumption. The idlestat tool attempts to capture this.
+
+Idlestat uses kernel's FTRACE function to monitor and capture C-state and
+P-state transitions of CPUs over a time interval. It extracts the following
+information from trace file:
+   - Times when CPUs entered and exited a certain C-state
+   - Times when CPUs entered and exited a certain P-state
+   - Raised IRQs
+
+Following a successful run, idlestat calculates and reports the following
+information:
+   - Total, average, minimum and maximum time spent in each C-state,
+ per-CPU.
+   - Total, average, minimum and maximum time spent in each P-state,
+ per-CPU.
+   - Total, average, minimum and maximum time during which all CPUs in
+ a cluster were in the same C-state, per-cluster.
+   - Number of times a certain IRQ caused a CPU to exit idle state,
+ per-CPU and per-IRQ.
+
+The tool parses sysfs entries to determine the CPU/cluster topology, as well
+as supported C-states and P-states per CPU. It is unaware of CPU/cluster power
+consumption in each C-state and P-state, but if these parameters are
+externally known, a ballpark estimate of the energy consumed during idlestat
+run can be calculated as follows:
+
+energy = sum_per_cpu(PCi*(TCi-TCCi)) + sum_per_cluster(PCCi*TCCi) +
+sum_per_cpu(PPi*TPi)
+
+where:
+PCi- is the power consumption of CPU in Ci power state
+TCi- is the total time the CPU has spent in Ci power state
+PCCi   - is the power consumption of cluster in Ci power state
+TCCi   - is the total time the cluster has spent in Ci power state
+PPi- is the power consumption of CPU in Pi power state
+TPi- is the total time the CPU has spent in Pi power state
+
+Below is an example report of one idlestat run on a dual-core system:
+clusterA@state  hits  total(us) avg(us) min(us) max(us)
+   C1   108215879554.00  543.35 0.0023163.00
+   C2   0  0.000.00 0.000.00
+   C3   78   2929290.0037555.00 0.00101441.00
+  cpu0@statehits  total(us) avg(us) min(us) max(us)
+   C1   6744 6407808.00  950.15 0.0023194.00
+   C2   3   8819.00 2939.67 549.00  5310.00
+   C3   75   2960110.0039468.13 213.00  101441.00
+   350  1047  204490.00  195.31 0.004578.00
+   700  5628  396247.00   70.41 0.001465.00
+   920  0  0.000.00 0.000.00
+  cpu0 wakeups  namecount
+   irq109   ehci_hcd:usb1   1727
+   irq029   twd 4524
+   irq069   gp_timer60
+   irq115   mmc07
+   irq044   DMA 3
+  cpu1@statehits  total(us) avg(us) min(us) max(us)
+   C1   6544 6398931.00  977.83 0.0036255.00
+   C2   1   1129.00 1129.00 1129.00 1129.00
+   C3   77   29552

[RFC PATCH 0/2] sched: proposal for idlestat scheduler benchmarking tool

2014-03-24 Thread Zoran Markovic

Conclusions from Energy Aware Scheduling sessions at the latest Kernel Summit
identified a need for tools that would assess power consumption of the system
These tools would be used to prove efficiency of scheduler patches by
comparing power consumption before and after they were applied.

Attached is the proposal for the idlestat tool. The purpose of this patch
is to solicit feedback on tool's features, possible enhancements, etc.

Source code and sample idlestat report are provided for reference.

Please review and provide comments in anticipation of further development.

Regards, Zoran

Zoran Markovic (2):
  power: Add idlestat tool for benchmarking energy-aware scheduler
  sched: Add documentation for idlestat scheduler benchmarking tool

 Documentation/scheduler/idlestat.txt |   79 +++
 tools/power/idlestat/.gitignore  |   50 ++
 tools/power/idlestat/Makefile|   34 +
 tools/power/idlestat/idlestat.c  | 1229 ++
 tools/power/idlestat/idlestat.h  |  106 +++
 tools/power/idlestat/list.h  |  588 
 tools/power/idlestat/topology.c  |  503 ++
 tools/power/idlestat/topology.h  |   77 +++
 tools/power/idlestat/trace.c |   87 +++
 tools/power/idlestat/trace.h |   43 ++
 tools/power/idlestat/utils.c |  115 
 tools/power/idlestat/utils.h |   35 +
 12 files changed, 2946 insertions(+)
 create mode 100644 Documentation/scheduler/idlestat.txt
 create mode 100644 tools/power/idlestat/.gitignore
 create mode 100644 tools/power/idlestat/Makefile
 create mode 100644 tools/power/idlestat/idlestat.c
 create mode 100644 tools/power/idlestat/idlestat.h
 create mode 100644 tools/power/idlestat/list.h
 create mode 100644 tools/power/idlestat/topology.c
 create mode 100644 tools/power/idlestat/topology.h
 create mode 100644 tools/power/idlestat/trace.c
 create mode 100644 tools/power/idlestat/trace.h
 create mode 100644 tools/power/idlestat/utils.c
 create mode 100644 tools/power/idlestat/utils.h

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv2] usb: move hub init and LED blink work to power efficient workqueue

2014-02-07 Thread Zoran Markovic

I believe there may still be use cases where you want to wake up the
same CPU that scheduled the work.

Thanks for the Ack. Can you please queue this for 3.14?

Regards, Zoran

On 2 February 2014 08:10, Alan Stern  wrote:
> On Sat, 1 Feb 2014, Zoran Markovic wrote:
>
>> From: Shaibal Dutta 
>>
>> Allow the scheduler to select the best CPU to handle hub initalization
>> and LED blinking work. This extends idle residency times on idle CPUs
>> and conserves power.
>>
>> This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.
>>
>> Cc: Greg Kroah-Hartman 
>> Cc: Alan Stern 
>> Cc: Sarah Sharp 
>> Cc: Xenia Ragiadakou 
>> Cc: Julius Werner 
>> Cc: Krzysztof Mazur 
>> Cc: Matthias Beyer 
>> Cc: Dan Williams 
>> Cc: Mathias Nyman 
>> Cc: Thomas Pugliese 
>> Signed-off-by: Shaibal Dutta 
>> [zoran.marko...@linaro.org: Rebased to latest kernel. Added commit message.
>> Changed reference from system to power efficient workqueue for LEDs in
>> check_highspeed() and hub_port_connect_change().]
>> Signed-off-by: Zoran Markovic 
>
> Acked-off-by: Alan Stern 
>
> Is there some reason why schedule_delayed_work() doesn't use the
> power-efficient work queue by default?
>
> Alan Stern
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv2] usb: move hub init and LED blink work to power efficient workqueue

2014-02-01 Thread Zoran Markovic

From: Shaibal Dutta 

Allow the scheduler to select the best CPU to handle hub initalization
and LED blinking work. This extends idle residency times on idle CPUs
and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Greg Kroah-Hartman 
Cc: Alan Stern 
Cc: Sarah Sharp 
Cc: Xenia Ragiadakou 
Cc: Julius Werner 
Cc: Krzysztof Mazur 
Cc: Matthias Beyer 
Cc: Dan Williams 
Cc: Mathias Nyman 
Cc: Thomas Pugliese 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel. Added commit message.
Changed reference from system to power efficient workqueue for LEDs in
check_highspeed() and hub_port_connect_change().]
Signed-off-by: Zoran Markovic 
---
 drivers/usb/core/hub.c |   19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index babba88..e11a7e9 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -504,7 +504,8 @@ static void led_work (struct work_struct *work)
changed++;
}
if (changed)
-   schedule_delayed_work(&hub->leds, LED_CYCLE_PERIOD);
+   queue_delayed_work(system_power_efficient_wq,
+   &hub->leds, LED_CYCLE_PERIOD);
 }
 
 /* use a short timeout for hub/port status fetches */
@@ -1046,7 +1047,8 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
if (type == HUB_INIT) {
delay = hub_power_on(hub, false);
PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func2);
-   schedule_delayed_work(&hub->init_work,
+   queue_delayed_work(system_power_efficient_wq,
+   &hub->init_work,
msecs_to_jiffies(delay));
 
/* Suppress autosuspend until init is done */
@@ -1200,7 +1202,8 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
/* Don't do a long sleep inside a workqueue routine */
if (type == HUB_INIT2) {
PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3);
-   schedule_delayed_work(&hub->init_work,
+   queue_delayed_work(system_power_efficient_wq,
+   &hub->init_work,
msecs_to_jiffies(delay));
return; /* Continues at init3: below */
} else {
@@ -1214,7 +1217,8 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
if (status < 0)
dev_err(hub->intfdev, "activate --> %d\n", status);
if (hub->has_indicators && blinkenlights)
-   schedule_delayed_work(&hub->leds, LED_CYCLE_PERIOD);
+   queue_delayed_work(system_power_efficient_wq,
+   &hub->leds, LED_CYCLE_PERIOD);
 
/* Scan all ports that need attention */
kick_khubd(hub);
@@ -4316,7 +4320,8 @@ check_highspeed (struct usb_hub *hub, struct usb_device 
*udev, int port1)
/* hub LEDs are probably harder to miss than syslog */
if (hub->has_indicators) {
hub->indicator[port1-1] = INDICATOR_GREEN_BLINK;
-   schedule_delayed_work (&hub->leds, 0);
+   queue_delayed_work(system_power_efficient_wq,
+   &hub->leds, 0);
}
}
kfree(qual);
@@ -4545,7 +4550,9 @@ static void hub_port_connect_change(struct usb_hub *hub, 
int port1,
if (hub->has_indicators) {
hub->indicator[port1-1] =
INDICATOR_AMBER_BLINK;
-   schedule_delayed_work (&hub->leds, 0);
+   queue_delayed_work(
+   system_power_efficient_wq,
+   &hub->leds, 0);
}
status = -ENOTCONN; /* Don't retry */
goto loop_disable;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] thermal: add generic cpu hotplug cooling device

2014-01-31 Thread Zoran Markovic

Hi Eduardo,
The merge window for 3.14 is now open and I'm wondering if you had a
chance to look at these numbers?
Thanks,
Zoran

On 30 December 2013 12:48, Zoran Markovic  wrote:
> Eduardo,
>
>>> What is the workload you're running besides the proprietary heater code?
> I re-did experiments from Linaro's site pointed by Amit while
> profiling _cpu_down() and _cpu_up() times:
>>> [1] https://wiki.linaro.org/WorkingGroups/PowerManagement/Archives/Hotplug
>
> I am attaching a spreadsheet with some results and graphs:
>
> Sheet 1 (thermal_ramp) has three plots. Topmost is an unbound thermal
> ramp that levels off at ~48C. Middle plot is a thermal ramp with cpu
> hotplug kicking in as a cooling device at 38C. Bottom plot is a
> thermal ramp with cpu hotplug kicking in at 38C and cpufreq kicking in
> at 40C. One interesting thing to note is that the middle plot slowly
> drifts towards 40C even though cooling is set to 38C. I attribute this
> to the logic of step-wise governor combined with polling mode: if
> temperature is dropping above trip point, cooling is reduced. Adding
> another cooling device at 40C as a back-stop seems to keep temperature
> in check. In all cases running code was ARM's max_power test that
> maximizes CPU usage, as evidenced by results of 'top':
>   PID USER  PR  NI  VIRT  RES  SHR S  %CPU %MEMTIME+  COMMAND
>33 root  20   0 000 R 100.0  0.0  45:46.43 thread1
>32 root  20   0 000 R  91.4  0.0  44:48.14 thread0
>  1344 root  20   0 000 R   8.6  0.0   0:03.64 kworker/u4:1
>  1380 root  20   0  2476  996  712 R   0.3  0.1   0:00.07 top
>
> Sheet 2 (idle) has two plots. Top one represents latency of
> _cpu_down() while gradually adding instances of cyclictest process,
> from 0 to 10; 20 samples were captured in each case. Bottom one
> represents latency of _cpu_up() in the same test. Other than running
> cyclictest, the system was mostly idle.
>
> Sheet 3 (max_power) repeated the same test as in sheet 2, but it was
> running ARM's max_power test in the background.
>
> A quick look at the latency graphs shows that loading the system
> causes a stochastic - but not deterministic - component added to
> latencies. Minimum latency times appear unchanged.
>
>> - Homogeneous dual core Cortex-A9 environment.
>> - They go up to 48C when fully loaded. Can you explain where is your
>> sensor location? Gradient to hotspot, etc? 48C at A9s or board temperature?
> Thermal sensor is located at L2 cache, with gradient to sensor likely
> smaller than sensor inaccuracy.
>
>> - This code looks promising on embedded dual core system. However, it
>> does not necessarily mean it works fine on, say server side. How about a
>> system with 8/16/32 cores? How about a more heterogeneous workload? Not
>> to talk about heterogeneous cores. I think in more complicated scenarios
>> the data you provided above might even change. The difference between
>> your minimum and maximum shutdown/startup times are quite considerable,
>> so I am assuming your variance is not negligible, imaging if we scale
>> this up, what happens?
> Agreed that this is difficult to characterize across all platform
> types. Maybe other list members could comment the behaviour on their
> platforms? Passing in a cpu mask defines CPUs that contribute to
> cooling of a single zone, so there is some flexibility in defining
> cooling strategy. Hopefully this is good enough for a start...
>
>>
>> - The other point is that this type of cooling device must be taken in
>> very sensible way. Shutting down circuitry may not be the best strategy
>> for thermal. In fact, if you think about it, given you have a workload
>> well balanced between, say, two cores, as same of your environment,
>> turning one off it means you need to deal the very same load in only one
>> CPU. In other words, turning of circuitry means, from thermal standpoint
>> that you are increasing you heat/area ratio. Sometimes, you actually
>> want to increase this ratio in order to properly cool down your system.
> In this particular test case since both CPUs are fully loaded,
> temperature is reduced at the expense of parallelism (i.e. execution
> time), so overall heat/area is still reduced. If particular areas are
> heat-sensitive, then it makes sense to define a separate thermal zone
> (and sensor) for each of them. Just a thought.
>
> Looking forward to further discussion.
>
> Regards,
> Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] firmware: use power efficient workqueue for unloading and aborting fw load

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

Allow the scheduler to select the most appropriate CPU for running the
firmware load timeout routine and delayed routine for firmware unload.
This extends idle residency times and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Ming Lei 
Cc: Greg Kroah-Hartman 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel, added commit message.
Fixed code alignment.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/firmware_class.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8a97ddf..ae34219 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -900,7 +900,8 @@ static int _request_firmware_load(struct firmware_priv 
*fw_priv,
dev_set_uevent_suppress(f_dev, false);
dev_dbg(f_dev, "firmware: requesting %s\n", buf->fw_id);
if (timeout != MAX_SCHEDULE_TIMEOUT)
-   schedule_delayed_work(&fw_priv->timeout_work, timeout);
+   queue_delayed_work(system_power_efficient_wq,
+  &fw_priv->timeout_work, timeout);
 
kobject_uevent(&fw_priv->dev.kobj, KOBJ_ADD);
}
@@ -1570,8 +1571,8 @@ static void device_uncache_fw_images_work(struct 
work_struct *work)
  */
 static void device_uncache_fw_images_delay(unsigned long delay)
 {
-   schedule_delayed_work(&fw_cache.work,
-   msecs_to_jiffies(delay));
+   queue_delayed_work(system_power_efficient_wq, &fw_cache.work,
+  msecs_to_jiffies(delay));
 }
 
 static int fw_pm_notify(struct notifier_block *notify_block,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] mmc: move clock gating work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

Instead of binding the clock gating work to the CPU that scheduled it,
allow the scheduler to select the best CPU to handle it. This extends
idle residency times and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Chris Ball 
Cc: Guennadi Liakhovetski 
Cc: Ulf Hansson 
Cc: H Hartley Sweeten 
Cc: Andrew Morton 
Cc: Simon Baatz 
Cc: Laurent Pinchart 
Cc: Tejun Heo 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel. Added commit message.
Fixed code alignment.]
Signed-off-by: Zoran Markovic 
---
 drivers/mmc/core/host.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 49bc403..a787f1b 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -207,8 +207,9 @@ void mmc_host_clk_release(struct mmc_host *host)
host->clk_requests--;
if (mmc_host_may_gate_card(host->card) &&
!host->clk_requests)
-   schedule_delayed_work(&host->clk_gate_work,
- msecs_to_jiffies(host->clkgate_delay));
+   queue_delayed_work(system_power_efficient_wq,
+  &host->clk_gate_work,
+  msecs_to_jiffies(host->clkgate_delay));
spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] usb: move hub init and LED blink work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

Allow the scheduler to select the best CPU to handle hub initalization
and LED blinking work. This extends idle residency times on idle CPUs
and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Greg Kroah-Hartman 
Cc: Alan Stern 
Cc: Sarah Sharp 
Cc: Xenia Ragiadakou 
Cc: Julius Werner 
Cc: Krzysztof Mazur 
Cc: Matthias Beyer 
Cc: Dan Williams 
Cc: Mathias Nyman 
Cc: Thomas Pugliese 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel. Added commit message.
Changed reference from system to power efficient workqueue for LEDs in
check_highspeed() and hub_port_connect_change().]
Signed-off-by: Zoran Markovic 
---
 drivers/usb/core/hub.c |   23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index babba88..ae07ffe 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -504,7 +504,8 @@ static void led_work (struct work_struct *work)
changed++;
}
if (changed)
-   schedule_delayed_work(&hub->leds, LED_CYCLE_PERIOD);
+   queue_delayed_work(system_power_efficient_wq,
+  &hub->leds, LED_CYCLE_PERIOD);
 }
 
 /* use a short timeout for hub/port status fetches */
@@ -1046,8 +1047,9 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
if (type == HUB_INIT) {
delay = hub_power_on(hub, false);
PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func2);
-   schedule_delayed_work(&hub->init_work,
-   msecs_to_jiffies(delay));
+   queue_delayed_work(system_power_efficient_wq,
+  &hub->init_work,
+  msecs_to_jiffies(delay));
 
/* Suppress autosuspend until init is done */
usb_autopm_get_interface_no_resume(
@@ -1200,8 +1202,9 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
/* Don't do a long sleep inside a workqueue routine */
if (type == HUB_INIT2) {
PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3);
-   schedule_delayed_work(&hub->init_work,
-   msecs_to_jiffies(delay));
+   queue_delayed_work(system_power_efficient_wq,
+  &hub->init_work,
+  msecs_to_jiffies(delay));
return; /* Continues at init3: below */
} else {
msleep(delay);
@@ -1214,7 +1217,8 @@ static void hub_activate(struct usb_hub *hub, enum 
hub_activation_type type)
if (status < 0)
dev_err(hub->intfdev, "activate --> %d\n", status);
if (hub->has_indicators && blinkenlights)
-   schedule_delayed_work(&hub->leds, LED_CYCLE_PERIOD);
+   queue_delayed_work(system_power_efficient_wq,
+  &hub->leds, LED_CYCLE_PERIOD);
 
/* Scan all ports that need attention */
kick_khubd(hub);
@@ -4316,7 +4320,8 @@ check_highspeed (struct usb_hub *hub, struct usb_device 
*udev, int port1)
/* hub LEDs are probably harder to miss than syslog */
if (hub->has_indicators) {
hub->indicator[port1-1] = INDICATOR_GREEN_BLINK;
-   schedule_delayed_work (&hub->leds, 0);
+   queue_delayed_work(system_power_efficient_wq,
+  &hub->leds, 0);
}
}
kfree(qual);
@@ -4545,7 +4550,9 @@ static void hub_port_connect_change(struct usb_hub *hub, 
int port1,
if (hub->has_indicators) {
hub->indicator[port1-1] =
INDICATOR_AMBER_BLINK;
-   schedule_delayed_work (&hub->leds, 0);
+   queue_delayed_work(
+   system_power_efficient_wq,
+   &hub->leds, 0);
}
status = -ENOTCONN; /* Don't retry */
goto loop_disable;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] power: move pm_qos update timeout handler to power-efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

To avoid waking up idle CPUs, allow the scheduler to select the best CPU
to handle pm_qos update timeouts. This extends idle residency times and
conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Pavel Machek 
Cc: "Rafael J. Wysocki" 
Cc: Len Brown 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel. Fixed code alignment.
Added commit message.]
Signed-off-by: Zoran Markovic 
---
 kernel/power/qos.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 8dff9b4..5e35a3a 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -405,7 +405,8 @@ void pm_qos_update_request_timeout(struct pm_qos_request 
*req, s32 new_value,
pm_qos_array[req->pm_qos_class]->constraints,
&req->node, PM_QOS_UPDATE_REQ, new_value);
 
-   schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
+   queue_delayed_work(system_power_efficient_wq,
+  &req->work, usecs_to_jiffies(timeout_us));
 }
 
 /**
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] rcu: move SRCU grace period work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

Signed-off-by: Zoran Markovic 

On 31 January 2014 11:53, Zoran Markovic  wrote:
> From: Shaibal Dutta 
>
> For better use of CPU idle time, allow the scheduler to select the CPU
> on which the SRCU grace period work would be scheduled. This improves
> idle residency time and conserves power.
>
> This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.
>
> Cc: Lai Jiangshan 
> Cc: "Paul E. McKenney" 
> Cc: Dipankar Sarma 
> Signed-off-by: Shaibal Dutta 
> [zoran.marko...@linaro.org: Rebased to latest kernel version. Added commit
> message. Fixed code alignment.]
> ---
>  kernel/rcu/srcu.c |5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
> index 3318d82..a1ebe6d 100644
> --- a/kernel/rcu/srcu.c
> +++ b/kernel/rcu/srcu.c
> @@ -398,7 +398,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head 
> *head,
> rcu_batch_queue(&sp->batch_queue, head);
> if (!sp->running) {
> sp->running = true;
> -   schedule_delayed_work(&sp->work, 0);
> +   queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
> }
> spin_unlock_irqrestore(&sp->queue_lock, flags);
>  }
> @@ -674,7 +674,8 @@ static void srcu_reschedule(struct srcu_struct *sp)
> }
>
> if (pending)
> -   schedule_delayed_work(&sp->work, SRCU_INTERVAL);
> +   queue_delayed_work(system_power_efficient_wq,
> +  &sp->work, SRCU_INTERVAL);
>  }
>
>  /*
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] rcu: move SRCU grace period work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

For better use of CPU idle time, allow the scheduler to select the CPU
on which the SRCU grace period work would be scheduled. This improves
idle residency time and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Lai Jiangshan 
Cc: "Paul E. McKenney" 
Cc: Dipankar Sarma 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel version. Added commit
message. Fixed code alignment.]
---
 kernel/rcu/srcu.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 3318d82..a1ebe6d 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -398,7 +398,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head 
*head,
rcu_batch_queue(&sp->batch_queue, head);
if (!sp->running) {
sp->running = true;
-   schedule_delayed_work(&sp->work, 0);
+   queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
}
spin_unlock_irqrestore(&sp->queue_lock, flags);
 }
@@ -674,7 +674,8 @@ static void srcu_reschedule(struct srcu_struct *sp)
}
 
if (pending)
-   schedule_delayed_work(&sp->work, SRCU_INTERVAL);
+   queue_delayed_work(system_power_efficient_wq,
+  &sp->work, SRCU_INTERVAL);
 }
 
 /*
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] timekeeping: move clock sync work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

For better use of CPU idle time, allow the scheduler to select the CPU
on which the CMOS clock sync work would be scheduled. This improves
idle residency time and conserver power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Added commit message. Aligned code.]
Signed-off-by: Zoran Markovic 
---
 kernel/time/ntp.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index af8d1d4..419a52c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -514,12 +514,13 @@ static void sync_cmos_clock(struct work_struct *work)
next.tv_sec++;
next.tv_nsec -= NSEC_PER_SEC;
}
-   schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
+   queue_delayed_work(system_power_efficient_wq,
+  &sync_cmos_work, timespec_to_jiffies(&next));
 }
 
 void ntp_notify_cmos_timer(void)
 {
-   schedule_delayed_work(&sync_cmos_work, 0);
+   queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
 }
 
 #else
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv2] net: core: move core networking work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

This patch moves the following work to the power efficient workqueue:
  - Transmit work of netpoll
  - Destination cache garbage collector work
  - Link watch event handler work

In general, assignment of CPUs to pending work could be deferred to
the scheduler in order to extend idle residency time and improve
power efficiency. I would value community's opinion on the migration
of this work to the power efficient workqueue, with an emphasis on
migration of netpoll's transmit work.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" 
Cc: Jiri Pirko 
Cc: YOSHIFUJI Hideaki 
Cc: Eric Dumazet 
Cc: Julian Anastasov 
Cc: Flavio Leitner 
Cc: Neil Horman 
Cc: Patrick McHardy 
Cc: John Fastabend 
Cc: Amerigo Wang 
Cc: Joe Perches 
Cc: Jason Wang 
Cc: Antonio Quartulli 
Cc: Simon Horman 
Cc: Nikolay Aleksandrov 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel version. Edited
calls to mod_delayed_work to reference power efficient workqueue.
Added commit message. Fixed code alignment.]
Signed-off-by: Zoran Markovic 
---
 net/core/dst.c|5 +++--
 net/core/link_watch.c |5 +++--
 net/core/netpoll.c|6 --
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..57fba10 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -135,7 +135,8 @@ loop:
 */
if (expires > 4*HZ)
expires = round_jiffies_relative(expires);
-   schedule_delayed_work(&dst_gc_work, expires);
+   queue_delayed_work(system_power_efficient_wq,
+  &dst_gc_work, expires);
}
 
spin_unlock_bh(&dst_garbage.lock);
@@ -223,7 +224,7 @@ void __dst_free(struct dst_entry *dst)
if (dst_garbage.timer_inc > DST_GC_INC) {
dst_garbage.timer_inc = DST_GC_INC;
dst_garbage.timer_expires = DST_GC_MIN;
-   mod_delayed_work(system_wq, &dst_gc_work,
+   mod_delayed_work(system_power_efficient_wq, &dst_gc_work,
 dst_garbage.timer_expires);
}
spin_unlock_bh(&dst_garbage.lock);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839..6899935 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -135,9 +135,10 @@ static void linkwatch_schedule_work(int urgent)
 * override the existing timer.
 */
if (test_bit(LW_URGENT, &linkwatch_flags))
-   mod_delayed_work(system_wq, &linkwatch_work, 0);
+   mod_delayed_work(system_power_efficient_wq, &linkwatch_work, 0);
else
-   schedule_delayed_work(&linkwatch_work, delay);
+   queue_delayed_work(system_power_efficient_wq,
+  &linkwatch_work, delay);
 }
 
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3de..6685938 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -101,7 +101,8 @@ static void queue_process(struct work_struct *work)
__netif_tx_unlock(txq);
local_irq_restore(flags);
 
-   schedule_delayed_work(&npinfo->tx_work, HZ/10);
+   queue_delayed_work(system_power_efficient_wq,
+  &npinfo->tx_work, HZ/10);
return;
}
__netif_tx_unlock(txq);
@@ -423,7 +424,8 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct 
sk_buff *skb,
 
if (status != NETDEV_TX_OK) {
skb_queue_tail(&npinfo->txq, skb);
-   schedule_delayed_work(&npinfo->tx_work,0);
+   queue_delayed_work(system_power_efficient_wq,
+  &npinfo->tx_work, 0);
}
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv2] net: ipv4: move inetpeer garbage collector work to power efficient workqueue

2014-01-31 Thread Zoran Markovic

From: Shaibal Dutta 

Garbage collector work does not have to be bound to the CPU that scheduled
it. By moving work to the power-efficient workqueue, the selection of
CPU executing the work is left to the scheduler. This extends idle
residency times and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" 
Cc: Alexey Kuznetsov 
Cc: James Morris 
Cc: Hideaki YOSHIFUJI 
Cc: Patrick McHardy 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel version. Added
commit message. Fixed code alignment.]
Signed-off-by: Zoran Markovic 
---
 net/ipv4/inetpeer.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f4244..7e3da6c6 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -161,7 +161,8 @@ static void inetpeer_gc_worker(struct work_struct *work)
list_splice(&list, &gc_list);
spin_unlock_bh(&gc_lock);
 
-   schedule_delayed_work(&gc_work, gc_delay);
+   queue_delayed_work(system_power_efficient_wq,
+  &gc_work, gc_delay);
 }
 
 /* Called from ip_output.c:ip_init  */
@@ -576,7 +577,8 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
list_add_tail(&p->gc_list, &gc_list);
spin_unlock_bh(&gc_lock);
 
-   schedule_delayed_work(&gc_work, gc_delay);
+   queue_delayed_work(system_power_efficient_wq,
+  &gc_work, gc_delay);
 }
 
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] net: core: move core networking work to power efficient workqueue

2014-01-30 Thread Zoran Markovic

From: Shaibal Dutta 

This patch moves the following work to the power efficient workqueue:
  - Transmit work of netpoll
  - Destination cache garbage collector work
  - Link watch event handler work

In general, assignment of CPUs to pending work could be deferred to
the scheduler in order to extend idle residency time and improve
power efficiency. I would value community's opinion on the migration
of this work to the power efficient workqueue, with an emphasis on
migration of netpoll's transmit work.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" 
Cc: Jiri Pirko 
Cc: YOSHIFUJI Hideaki 
Cc: Eric Dumazet 
Cc: Julian Anastasov 
Cc: Flavio Leitner 
Cc: Neil Horman 
Cc: Patrick McHardy 
Cc: John Fastabend 
Cc: Amerigo Wang 
Cc: Joe Perches 
Cc: Jason Wang 
Cc: Antonio Quartulli 
Cc: Simon Horman 
Cc: Nikolay Aleksandrov 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel version. Edited
calls to mod_delayed_work to reference power efficient workqueue.
Added commit message.]
Signed-off-by: Zoran Markovic 
---
 net/core/dst.c|5 +++--
 net/core/link_watch.c |5 +++--
 net/core/netpoll.c|6 --
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..cc28352 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -135,7 +135,8 @@ loop:
 */
if (expires > 4*HZ)
expires = round_jiffies_relative(expires);
-   schedule_delayed_work(&dst_gc_work, expires);
+   queue_delayed_work(system_power_efficient_wq,
+   &dst_gc_work, expires);
}
 
spin_unlock_bh(&dst_garbage.lock);
@@ -223,7 +224,7 @@ void __dst_free(struct dst_entry *dst)
if (dst_garbage.timer_inc > DST_GC_INC) {
dst_garbage.timer_inc = DST_GC_INC;
dst_garbage.timer_expires = DST_GC_MIN;
-   mod_delayed_work(system_wq, &dst_gc_work,
+   mod_delayed_work(system_power_efficient_wq, &dst_gc_work,
 dst_garbage.timer_expires);
}
spin_unlock_bh(&dst_garbage.lock);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839..0ae3994 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -135,9 +135,10 @@ static void linkwatch_schedule_work(int urgent)
 * override the existing timer.
 */
if (test_bit(LW_URGENT, &linkwatch_flags))
-   mod_delayed_work(system_wq, &linkwatch_work, 0);
+   mod_delayed_work(system_power_efficient_wq, &linkwatch_work, 0);
else
-   schedule_delayed_work(&linkwatch_work, delay);
+   queue_delayed_work(system_power_efficient_wq,
+   &linkwatch_work, delay);
 }
 
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3de..2c8f839 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -101,7 +101,8 @@ static void queue_process(struct work_struct *work)
__netif_tx_unlock(txq);
local_irq_restore(flags);
 
-   schedule_delayed_work(&npinfo->tx_work, HZ/10);
+   queue_delayed_work(system_power_efficient_wq,
+   &npinfo->tx_work, HZ/10);
return;
}
__netif_tx_unlock(txq);
@@ -423,7 +424,8 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct 
sk_buff *skb,
 
if (status != NETDEV_TX_OK) {
skb_queue_tail(&npinfo->txq, skb);
-   schedule_delayed_work(&npinfo->tx_work,0);
+   queue_delayed_work(system_power_efficient_wq,
+   &npinfo->tx_work, 0);
}
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] net: ipv4: move inetpeer garbage collector work to power efficient workqueue

2014-01-30 Thread Zoran Markovic

From: Shaibal Dutta 

Garbage collector work does not have to be bound to the CPU that scheduled
it. By moving work to the power-efficient workqueue, the selection of
CPU executing the work is left to the scheduler. This extends idle
residency times and conserves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: "David S. Miller" 
Cc: Alexey Kuznetsov 
Cc: James Morris 
Cc: Hideaki YOSHIFUJI 
Cc: Patrick McHardy 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel version. Added
commit message.]
Signed-off-by: Zoran Markovic 
---
 net/ipv4/inetpeer.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f4244..87155aa 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -161,7 +161,8 @@ static void inetpeer_gc_worker(struct work_struct *work)
list_splice(&list, &gc_list);
spin_unlock_bh(&gc_lock);
 
-   schedule_delayed_work(&gc_work, gc_delay);
+   queue_delayed_work(system_power_efficient_wq,
+   &gc_work, gc_delay);
 }
 
 /* Called from ip_output.c:ip_init  */
@@ -576,7 +577,8 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
list_add_tail(&p->gc_list, &gc_list);
spin_unlock_bh(&gc_lock);
 
-   schedule_delayed_work(&gc_work, gc_delay);
+   queue_delayed_work(system_power_efficient_wq,
+   &gc_work, gc_delay);
 }
 
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] net: wireless: move regulatory timeout work to power efficient workqueue

2014-01-30 Thread Zoran Markovic

From: Shaibal Dutta 

For better use of CPU idle time, allow the scheduler to select the CPU
on which the timeout work of regulatory settings would be executed.
This extends CPU idle residency time and saves power.

This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Johannes Berg 
Cc: "John W. Linville" 
Cc: "David S. Miller" 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel. Added commit message.]
Signed-off-by: Zoran Markovic 
---
 net/wireless/reg.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 9b897fc..6e21011 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1703,7 +1703,8 @@ static void reg_process_hint(struct regulatory_request 
*reg_request)
if (treatment == REG_REQ_OK ||
treatment == REG_REQ_ALREADY_SET)
return;
-   schedule_delayed_work(®_timeout, msecs_to_jiffies(3142));
+   queue_delayed_work(system_power_efficient_wq,
+  ®_timeout, msecs_to_jiffies(3142));
return;
case NL80211_REGDOM_SET_BY_DRIVER:
treatment = reg_process_hint_driver(wiphy, reg_request);
@@ -2294,7 +2295,8 @@ static int reg_set_rd_driver(const struct 
ieee80211_regdomain *rd,
 
request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx);
if (!request_wiphy) {
-   schedule_delayed_work(®_timeout, 0);
+   queue_delayed_work(system_power_efficient_wq,
+  ®_timeout, 0);
return -ENODEV;
}
 
@@ -2354,7 +2356,8 @@ static int reg_set_rd_country_ie(const struct 
ieee80211_regdomain *rd,
 
request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx);
if (!request_wiphy) {
-   schedule_delayed_work(®_timeout, 0);
+   queue_delayed_work(system_power_efficient_wq,
+  ®_timeout, 0);
return -ENODEV;
}
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] net: rfkill: move poll work to power efficient workqueue

2014-01-30 Thread Zoran Markovic

From: Shaibal Dutta 

This patch moves the rfkill poll_work to the power efficient workqueue.
This work does not have to be bound to the CPU that scheduled it, hence
the selection of CPU that executes it would be left to the scheduler.
Net result is that CPU idle times would be extended, resulting in power
savings.

This behaviour is enabled when CONFIG_WQ_POWER_EFFICIENT is selected.

Cc: Johannes Berg 
Cc: "John W. Linville" 
Cc: "David S. Miller" 
Signed-off-by: Shaibal Dutta 
[zoran.marko...@linaro.org: Rebased to latest kernel, added commit message.
Fixed workqueue selection after suspend/resume cycle.]
Signed-off-by: Zoran Markovic 
---
 net/rfkill/core.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index ed7e0b4..b3b16c0 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -789,7 +789,8 @@ void rfkill_resume_polling(struct rfkill *rfkill)
if (!rfkill->ops->poll)
return;
 
-   schedule_work(&rfkill->poll_work.work);
+   queue_delayed_work(system_power_efficient_wq,
+  &rfkill->poll_work, 0);
 }
 EXPORT_SYMBOL(rfkill_resume_polling);
 
@@ -894,7 +895,8 @@ static void rfkill_poll(struct work_struct *work)
 */
rfkill->ops->poll(rfkill, rfkill->data);
 
-   schedule_delayed_work(&rfkill->poll_work,
+   queue_delayed_work(system_power_efficient_wq,
+   &rfkill->poll_work,
round_jiffies_relative(POLL_INTERVAL));
 }
 
@@ -958,7 +960,8 @@ int __must_check rfkill_register(struct rfkill *rfkill)
INIT_WORK(&rfkill->sync_work, rfkill_sync_work);
 
if (rfkill->ops->poll)
-   schedule_delayed_work(&rfkill->poll_work,
+   queue_delayed_work(system_power_efficient_wq,
+   &rfkill->poll_work,
round_jiffies_relative(POLL_INTERVAL));
 
if (!rfkill->persistent || rfkill_epo_lock_active) {
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] thermal: add generic cpu hotplug cooling device

2013-12-12 Thread Zoran Markovic

Hi Eduardo,

> Yeah, I would like to see it. But what I was more interested in seeing
> is how long does it take to offline a CPU?
>
I profiled this over 70 shutdown/startup cycles of CPU1 on Capri-AP
(Cortex-A9x2) board and I get:
shutdown:  1445usec (average), 3159usec (maximum), 834usec (minimum)
startup:  707usec (average), 3159usec (maximum), 327usec (minimum)

It's using a 32KHz clock so time resolution is ~30usec.

Regards, Zoran

>>>>> On 20 September 2013 15:15, Zoran Markovic  
>>>>> wrote:
>>>>>> This patch implements a generic CPU hotplug cooling device. The
>>>>>> implementation scales down the number of running CPUs when temperature
>>>>>> increases through a thermal trip point and prevents booting CPUs
>>>>>> until thermal conditions are restored. Upon restoration, the action
>>>>>> of starting up a CPU is left to another entity (e.g. CPU offline
>>>>>> governor, for which a patch is in the works).
>>>>>>
>>>>>> In the past two years, ARM considerably reduced the time required for
>>>>>> CPUs to boot and shutdown; this time is now measured in microseconds.
>>>>>> This patch is predominantly intended for ARM big.LITTLE architectures
>>>>>> where big cores are expected to have a much bigger impact on thermal
>>>>>> budget than little cores, resulting in fast temperature ramps to a trip
>>>>>> point, i.e. thermal runaways. Switching off the big core(s) may be one
>>>>>> of the recovery mechanisms to restore system temperature, but the actual
>>>>>> strategy is left to the thermal governor.
>>>>>>
>>>>>> The assumption is that CPU shutdown/startup is a rare event, so no
>>>>>> attempt was made to make the code atomic, i.e. the code evidently races
>>>>>> with CPU hotplug driver. The set_cur_state() function offlines CPUs
>>>>>> iteratively one at a time, checking the cooling state before each CPU
>>>>>> shutdown. A hotplug notifier callback validates any CPU boot requests
>>>>>> against current cooling state and approves/denies accordingly. This
>>>>>> mechanism guarantees that the desired cooling state could be reached in a
>>>>>> maximum of d-c iterations, where d and c are the "desired" and "current"
>>>>>> cooling states expressed in the number of offline CPUs.
>>>>>>
>>>>>> Credits to Amit Daniel Kachhap for initial attempt to upstream this 
>>>>>> feature.
>>>>>>
>>>>>> Cc: Zhang Rui 
>>>>>> Cc: Eduardo Valentin 
>>>>>> Cc: Rob Landley 
>>>>>> Cc: Amit Daniel Kachhap 
>>>>>> Cc: Andrew Morton 
>>>>>> Cc: Durgadoss R 
>>>>>> Cc: Christian Daudt 
>>>>>> Cc: James King 
>>>>>> Signed-off-by: Zoran Markovic 
>>>>>> ---
>>>>>>  Documentation/thermal/cpu-cooling-api.txt |   17 ++
>>>>>>  drivers/thermal/Kconfig   |   10 +
>>>>>>  drivers/thermal/Makefile  |1 +
>>>>>>  drivers/thermal/cpu_hotplug.c |  362 
>>>>>> +
>>>>>>  include/linux/cpuhp_cooling.h |   57 +
>>>>>>  5 files changed, 447 insertions(+)
>>>>>>  create mode 100644 drivers/thermal/cpu_hotplug.c
>>>>>>  create mode 100644 include/linux/cpuhp_cooling.h
>>>>>>
>>>>>> diff --git a/Documentation/thermal/cpu-cooling-api.txt 
>>>>>> b/Documentation/thermal/cpu-cooling-api.txt
>>>>>> index fca24c9..2f94f68 100644
>>>>>> --- a/Documentation/thermal/cpu-cooling-api.txt
>>>>>> +++ b/Documentation/thermal/cpu-cooling-api.txt
>>>>>> @@ -30,3 +30,20 @@ the user. The registration APIs returns the cooling 
>>>>>> device pointer.
>>>>>>  This interface function unregisters the "thermal-cpufreq-%x" 
>>>>>> cooling device.
>>>>>>
>>>>>>  cdev: Cooling device pointer which has to be unregistered.
>>>>>> +
>>>>>> +1.2 cpu hotplug registration/unregistration APIs
>>>>>> +1.2.1 struct thermal_cooling_device *cpuhp_cooling_register(
>>>>>> +   st

Re: [RFC PATCH] thermal: add generic cpu hotplug cooling device

2013-12-02 Thread Zoran Markovic

Hi Eduardo,
I have some graphs created for Broadcom's Capri (Cortex-A9x2) device.
I do a full temperature ramp using ARM-proprietary test, which heats
it up to ~48C. By hot-unplugging CPU1 I can cool it down to ~40C
within seconds. Let me know if you'd like to see the graphs.
Regards, Zoran

On 29 November 2013 06:08, Eduardo Valentin  wrote:
> Hello Zoran,
>
> On 27-11-2013 17:56, Zoran Markovic wrote:
>> Pinging again... Does anyone have any opinion on this feature?
>
> Sorry for not answering you. Yes there is interest in such work.
> Besides, your patch is not the very first attempt to do so. If I
> remember correctly, when Amit D. K was originally sending the current
> cpu cooling device, it included a hotplug part. That is why it was named
> cpucooling and not cpufreqcooling. Anyways, the major concerns by that
> time was the latencies to off line a CPU, mainly due to task and
> structure migration.
>
> Thus the question is, have you measure the behavior of your system when
> using this cooling device? Does it present any cooling effectiveness
> during high system load scenarios for instance? In case you have data,
> would you be able to share them?
>
>
>> Thanks,
>> Zoran
>>
>> On 4 October 2013 15:52, Zoran Markovic  wrote:
>>> Any comments on this proposed feature and implementation? Apparently
>>> it's also useful for server systems.
>
>
>
>
>>> Thanks,
>>> Zoran
>>>
>>> On 20 September 2013 15:15, Zoran Markovic  
>>> wrote:
>>>> This patch implements a generic CPU hotplug cooling device. The
>>>> implementation scales down the number of running CPUs when temperature
>>>> increases through a thermal trip point and prevents booting CPUs
>>>> until thermal conditions are restored. Upon restoration, the action
>>>> of starting up a CPU is left to another entity (e.g. CPU offline
>>>> governor, for which a patch is in the works).
>>>>
>>>> In the past two years, ARM considerably reduced the time required for
>>>> CPUs to boot and shutdown; this time is now measured in microseconds.
>>>> This patch is predominantly intended for ARM big.LITTLE architectures
>>>> where big cores are expected to have a much bigger impact on thermal
>>>> budget than little cores, resulting in fast temperature ramps to a trip
>>>> point, i.e. thermal runaways. Switching off the big core(s) may be one
>>>> of the recovery mechanisms to restore system temperature, but the actual
>>>> strategy is left to the thermal governor.
>>>>
>>>> The assumption is that CPU shutdown/startup is a rare event, so no
>>>> attempt was made to make the code atomic, i.e. the code evidently races
>>>> with CPU hotplug driver. The set_cur_state() function offlines CPUs
>>>> iteratively one at a time, checking the cooling state before each CPU
>>>> shutdown. A hotplug notifier callback validates any CPU boot requests
>>>> against current cooling state and approves/denies accordingly. This
>>>> mechanism guarantees that the desired cooling state could be reached in a
>>>> maximum of d-c iterations, where d and c are the "desired" and "current"
>>>> cooling states expressed in the number of offline CPUs.
>>>>
>>>> Credits to Amit Daniel Kachhap for initial attempt to upstream this 
>>>> feature.
>>>>
>>>> Cc: Zhang Rui 
>>>> Cc: Eduardo Valentin 
>>>> Cc: Rob Landley 
>>>> Cc: Amit Daniel Kachhap 
>>>> Cc: Andrew Morton 
>>>> Cc: Durgadoss R 
>>>> Cc: Christian Daudt 
>>>> Cc: James King 
>>>> Signed-off-by: Zoran Markovic 
>>>> ---
>>>>  Documentation/thermal/cpu-cooling-api.txt |   17 ++
>>>>  drivers/thermal/Kconfig   |   10 +
>>>>  drivers/thermal/Makefile  |1 +
>>>>  drivers/thermal/cpu_hotplug.c |  362 
>>>> +
>>>>  include/linux/cpuhp_cooling.h |   57 +
>>>>  5 files changed, 447 insertions(+)
>>>>  create mode 100644 drivers/thermal/cpu_hotplug.c
>>>>  create mode 100644 include/linux/cpuhp_cooling.h
>>>>
>>>> diff --git a/Documentation/thermal/cpu-cooling-api.txt 
>>>> b/Documentation/thermal/cpu-cooling-api.txt
>>>> index fca24c9..2f94f68 100644
>>>> --- a/Documentation/thermal/cpu-cooling-api.txt
>>>> +++ b/Docume

Re: [RFC PATCH] thermal: add generic cpu hotplug cooling device

2013-11-27 Thread Zoran Markovic

Pinging again... Does anyone have any opinion on this feature?
Thanks,
Zoran

On 4 October 2013 15:52, Zoran Markovic  wrote:
> Any comments on this proposed feature and implementation? Apparently
> it's also useful for server systems.
> Thanks,
> Zoran
>
> On 20 September 2013 15:15, Zoran Markovic  wrote:
>> This patch implements a generic CPU hotplug cooling device. The
>> implementation scales down the number of running CPUs when temperature
>> increases through a thermal trip point and prevents booting CPUs
>> until thermal conditions are restored. Upon restoration, the action
>> of starting up a CPU is left to another entity (e.g. CPU offline
>> governor, for which a patch is in the works).
>>
>> In the past two years, ARM considerably reduced the time required for
>> CPUs to boot and shutdown; this time is now measured in microseconds.
>> This patch is predominantly intended for ARM big.LITTLE architectures
>> where big cores are expected to have a much bigger impact on thermal
>> budget than little cores, resulting in fast temperature ramps to a trip
>> point, i.e. thermal runaways. Switching off the big core(s) may be one
>> of the recovery mechanisms to restore system temperature, but the actual
>> strategy is left to the thermal governor.
>>
>> The assumption is that CPU shutdown/startup is a rare event, so no
>> attempt was made to make the code atomic, i.e. the code evidently races
>> with CPU hotplug driver. The set_cur_state() function offlines CPUs
>> iteratively one at a time, checking the cooling state before each CPU
>> shutdown. A hotplug notifier callback validates any CPU boot requests
>> against current cooling state and approves/denies accordingly. This
>> mechanism guarantees that the desired cooling state could be reached in a
>> maximum of d-c iterations, where d and c are the "desired" and "current"
>> cooling states expressed in the number of offline CPUs.
>>
>> Credits to Amit Daniel Kachhap for initial attempt to upstream this feature.
>>
>> Cc: Zhang Rui 
>> Cc: Eduardo Valentin 
>> Cc: Rob Landley 
>> Cc: Amit Daniel Kachhap 
>> Cc: Andrew Morton 
>> Cc: Durgadoss R 
>> Cc: Christian Daudt 
>> Cc: James King 
>> Signed-off-by: Zoran Markovic 
>> ---
>>  Documentation/thermal/cpu-cooling-api.txt |   17 ++
>>  drivers/thermal/Kconfig   |   10 +
>>  drivers/thermal/Makefile  |1 +
>>  drivers/thermal/cpu_hotplug.c |  362 
>> +
>>  include/linux/cpuhp_cooling.h |   57 +
>>  5 files changed, 447 insertions(+)
>>  create mode 100644 drivers/thermal/cpu_hotplug.c
>>  create mode 100644 include/linux/cpuhp_cooling.h
>>
>> diff --git a/Documentation/thermal/cpu-cooling-api.txt 
>> b/Documentation/thermal/cpu-cooling-api.txt
>> index fca24c9..2f94f68 100644
>> --- a/Documentation/thermal/cpu-cooling-api.txt
>> +++ b/Documentation/thermal/cpu-cooling-api.txt
>> @@ -30,3 +30,20 @@ the user. The registration APIs returns the cooling 
>> device pointer.
>>  This interface function unregisters the "thermal-cpufreq-%x" cooling 
>> device.
>>
>>  cdev: Cooling device pointer which has to be unregistered.
>> +
>> +1.2 cpu hotplug registration/unregistration APIs
>> +1.2.1 struct thermal_cooling_device *cpuhp_cooling_register(
>> +   struct cpumask *cpus, const char *ext)
>> +
>> +This function creates and registers a cpu hotplug cooling device with
>> +the name "cpu-hotplug-%s".
>> +
>> +cpus: cpumask of cpu cores participating in cooling.
>> +ext: instance-specific name of device
>> +
>> +1.2.2 void cpuhotplug_cooling_unregister(struct thermal_cooling_device 
>> *cdev)
>> +
>> +This function unregisters and frees the cpu hotplug cooling device cdev.
>> +
>> +cdev: Pointer to cooling device to unregister.
>> +
>> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
>> index 52b6ed7..3509100 100644
>> --- a/drivers/thermal/Kconfig
>> +++ b/drivers/thermal/Kconfig
>> @@ -79,6 +79,16 @@ config CPU_THERMAL
>>
>>   If you want this support, you should say Y here.
>>
>> +config CPU_THERMAL_HOTPLUG
>> +   bool "Generic CPU hotplug cooling"
>> +   depends on HOTPLUG_CPU
>> +   help
>> + Shutdown CPUs to prevent the device from overheating. This feature
>> + uses generic CPU hot-unplug capabilities to control de

[PATCHv6] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-10-17 Thread Zoran Markovic

From: Benoit Goby 

Rather than hard-lock the kernel, dump the suspend/resume thread stack and
panic() to capture a message in pstore when a driver takes too long to
suspend/resume. Default suspend/resume watchdog timeout is set to 12
seconds to be longer than the usbhid 10 second timeout, but could be
changed at compile time.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot. Information about failing device can be
retrieved in subsequent boot session by mounting pstore and inspecting
the log. Laptops with EFI-enabled pstore could also benefit from
this feature.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

This feature can be enabled/disabled during kernel configuration.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message. Moved call to dpm_wd_set() before device_lock() in
device_resume(). Minor changes to add compile-time inclusion of the feature.
Renamed 'dpm_wd...' to 'dpm_watchdog...'. Fixed compile errors/warnings for
x86_64 and s390.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   72 +
 kernel/power/Kconfig  |   16 ++
 2 files changed, 88 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9f098a8..f2633da 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -30,6 +30,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -390,6 +392,70 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+#ifdef CONFIG_DPM_WATCHDOG
+struct dpm_watchdog {
+   struct device   *dev;
+   struct task_struct  *tsk;
+   struct timer_list   timer;
+};
+
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
+   struct dpm_watchdog wd
+
+/**
+ * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_watchdog_handler(unsigned long data)
+{
+   struct dpm_watchdog *wd = (void *)data;
+
+   dev_emerg(wd->dev, " DPM device timeout \n");
+   show_stack(wd->tsk, NULL);
+   panic("%s %s: unrecoverable failure\n",
+   dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_watchdog_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
+{
+   struct timer_list *timer = &wd->timer;
+
+   wd->dev = dev;
+   wd->tsk = current;
+
+   init_timer_on_stack(timer);
+   /* use same timeout value for both suspend and resume */
+   timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+   timer->function = dpm_watchdog_handler;
+   timer->data = (unsigned long)wd;
+   add_timer(timer);
+}
+
+/**
+ * dpm_watchdog_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_watchdog_clear(struct dpm_watchdog *wd)
+{
+   struct timer_list *timer = &wd->timer;
+
+   del_timer_sync(timer);
+   destroy_timer_on_stack(timer);
+}
+#else
+#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
+#define dpm_watchdog_set(x, y)
+#define dpm_watchdog_clear(x)
+#endif
+
 /*- Resume routines -*/
 
 /**
@@ -576,6 +642,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   DECLARE_DPM_WATCHDOG_ON_STACK(wd);
 
TRACE_DEVICE(dev);
TRACE_RESUME(0);
@@ -584,6 +651,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
goto Complete;
 
dpm_wait(dev->parent, async);
+   dpm_watchdog_set(&wd, dev);
device_lock(dev);
 
/*
@@ -642,6 +710,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 
  Unlock:

Re: [RFC PATCH] timekeeping: Correct run-time detection of real-time clock.

2013-10-15 Thread Zoran Markovic

Hi Feng,
Looking at the OMAP implementation, persistent_clock is updated on
every read of the 32K counter. If the read doesn't happen often enough
to accurately update persistent_clock, then the 32K counter would fail
the definition of a persistent clock and some other timekeeping source
should be used.

Regards, Zoran

On 12 October 2013 00:48, Feng Tang  wrote:
> Hi Zoran,
>
> Thanks for the patch! (This reply may be t late :))
>
> One question just for curiosity: for the counter_32K timer, it's running
> at 32K Hz and has one 32b counter. I understand it is only for suspend
> time calculation use, but the wrap time for it is about
> 4G/32K ~= 128K seconds ~= 35 hours
> What if one suspend time is longer than that?
>
> - Feng
>
> On Fri, May 17, 2013 at 11:24:05AM -0700, Zoran Markovic wrote:
>> Since commit <31ade30692dc9680bfc95700d794818fa3f754ac>, timekeeping_init()
>> checks for presence of persistent clock by attempting to read a non-zero
>> time value from real-time clock. This is an issue on platforms where
>> persistent_clock (instead of a RTC) is implemented as a free-running counter
>> starting from zero on each boot and running during suspend. Examples are some
>> ARM platforms (e.g. PandaBoard). An attempt to read such a clock during
>> timekeeping_init() may return zero value and falsely declare persistent clock
>> as missing. Additionally, in the above case suspend times may be accounted
>> twice (once from timekeeping_resume() and once from rtc_resume()), resulting
>> in a gradual drift of system time.
>>
>> This patch does a run-time correction of the issue by doing the same check
>> during timekeeping_suspend().
>>
>> A better long-term solution would have to return error when trying to read
>> non-existing clock and zero when trying to read an uninitialized clock, but
>> that would require changing all persistent_clock implementations.
>>
>> This patch addresses the immediate breakage, for now.
>>
>> Cc: John Stultz 
>> Cc: Thomas Gleixner 
>> Cc: Feng Tang 
>> Cc: sta...@vger.kernel.org
>> Signed-off-by: Zoran Markovic 
>> ---
>>  kernel/time/timekeeping.c |8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
>> index 98cd470..baeeb5c 100644
>> --- a/kernel/time/timekeeping.c
>> +++ b/kernel/time/timekeeping.c
>> @@ -975,6 +975,14 @@ static int timekeeping_suspend(void)
>>
>>   read_persistent_clock(&timekeeping_suspend_time);
>>
>> + /*
>> +  * On some systems the persistent_clock can not be detected at
>> +  * timekeeping_init by its return value, so if we see a valid
>> +  * value returned, update the persistent_clock_exists flag.
>> +  */
>> + if (timekeeping_suspend_time.tv_sec || 
>> timekeeping_suspend_time.tv_nsec)
>> + persistent_clock_exist = true;
>> +
>>   raw_spin_lock_irqsave(&timekeeper_lock, flags);
>>   write_seqcount_begin(&timekeeper_seq);
>>   timekeeping_forward_now(tk);
>> --
>> 1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv5] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-10-10 Thread Zoran Markovic

From: Benoit Goby 

Rather than hard-lock the kernel, dump the suspend/resume thread stack and
panic() to capture a message in pstore when a driver takes too long to
suspend/resume. Default suspend/resume watchdog timeout is set to 12
seconds to be longer than the usbhid 10 second timeout, but could be
changed at compile time.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot. Information about failing device can be
retrieved in subsequent boot session by mounting pstore and inspecting
the log. Laptops with EFI-enabled pstore could also benefit from
this feature.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

This feature can be enabled/disabled during kernel configuration.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message. Moved call to dpm_wd_set() before device_lock() in
device_resume(). Minor changes to add compile-time inclusion of the feature.
Renamed 'dpm_wd...' to 'dpm_watchdog...'.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   68 +
 kernel/power/Kconfig  |   16 +++
 2 files changed, 84 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9f098a8..06fbc62 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -30,6 +30,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -55,6 +57,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+struct dpm_watchdog {
+   struct device   *dev;
+   struct task_struct  *tsk;
+   struct timer_list   timer;
+};
+
 static int async_error;
 
 static char *pm_verb(int event)
@@ -390,6 +398,60 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+#ifdef CONFIG_DPM_WATCHDOG
+/**
+ * dpm_watchdog_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_watchdog_handler(unsigned long data)
+{
+   struct dpm_watchdog *wd = (void *)data;
+
+   dev_emerg(wd->dev, " DPM device timeout \n");
+   show_stack(wd->tsk, NULL);
+   panic("%s %s: unrecoverable failure\n",
+   dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_watchdog_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
+{
+   struct timer_list *timer = &wd->timer;
+
+   wd->dev = dev;
+   wd->tsk = get_current();
+
+   init_timer_on_stack(timer);
+   /* use same timeout value for both suspend and resume */
+   timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+   timer->function = dpm_watchdog_handler;
+   timer->data = (unsigned long)wd;
+   add_timer(timer);
+}
+
+/**
+ * dpm_watchdog_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_watchdog_clear(struct dpm_watchdog *wd)
+{
+   struct timer_list *timer = &wd->timer;
+
+   del_timer_sync(timer);
+   destroy_timer_on_stack(timer);
+}
+#else
+#define dpm_watchdog_set(x, y)
+#define dpm_watchdog_clear(x)
+#endif
+
 /*- Resume routines -*/
 
 /**
@@ -576,6 +638,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct dpm_watchdog wd;
 
TRACE_DEVICE(dev);
TRACE_RESUME(0);
@@ -584,6 +647,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
goto Complete;
 
dpm_wait(dev->parent, async);
+   dpm_watchdog_set(&wd, dev);
device_lock(dev);
 
/*
@@ -642,6 +706,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool a

Re: [RFC PATCHv4] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-10-09 Thread Zoran Markovic

Hi Rafael,
Just wondering if you would like anything changed in this patch in
order to get it into 3.13. I'd prefer not missing yet another merge
window...
Thanks,
Zoran

On 25 September 2013 15:31, Zoran Markovic  wrote:
> From: Benoit Goby 
>
> Rather than hard-lock the kernel, dump the suspend/resume thread stack and
> panic() to capture a message in pstore when a driver takes too long to
> suspend/resume. Default suspend/resume watchdog timeout is set to 12
> seconds to be longer than the usbhid 10 second timeout, but could be
> changed at compile time.
>
> Exclude from the watchdog the time spent waiting for children that
> are resumed asynchronously and time every device, whether or not they
> resumed synchronously.
>
> This patch is targeted for mobile devices where a suspend/resume lockup
> could cause a system reboot. Information about failing device can be
> retrieved in subsequent boot session by mounting pstore and inspecting
> the log. Laptops with EFI-enabled pstore could also benefit from
> this feature.
>
> The hardware watchdog timer is likely suspended during this time and
> couldn't be relied upon. The soft-lockup detector would eventually tell
> that tasks are not scheduled, but would provide little context as to why.
> The patch hence uses system timer and assumes it is still active while the
> devices are suspended/resumed.
>
> This feature can be enabled/disabled during kernel configuration.
>
> Cc: Android Kernel Team 
> Cc: Colin Cross 
> Cc: Todd Poynor 
> Cc: San Mehat 
> Cc: Benoit Goby 
> Cc: John Stultz 
> Cc: Pavel Machek 
> Cc: Rafael J. Wysocki 
> Cc: Len Brown 
> Cc: Greg Kroah-Hartman 
> Original-author: San Mehat 
> Signed-off-by: Benoit Goby 
> [zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
> tweaked commit message. Moved call to dpm_wd_set() before device_lock() in
> device_resume(). Minor changes to add compile-time inclusion of the feature.]
> Signed-off-by: Zoran Markovic 
> ---
>  drivers/base/power/main.c |   68 
> +
>  kernel/power/Kconfig  |   16 +++
>  2 files changed, 84 insertions(+)
>
> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
> index 9f098a8..9b7e6b6 100644
> --- a/drivers/base/power/main.c
> +++ b/drivers/base/power/main.c
> @@ -30,6 +30,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
>  #include "../base.h"
>  #include "power.h"
>
> @@ -55,6 +57,12 @@ struct suspend_stats suspend_stats;
>  static DEFINE_MUTEX(dpm_list_mtx);
>  static pm_message_t pm_transition;
>
> +struct dpm_watchdog {
> +   struct device   *dev;
> +   struct task_struct  *tsk;
> +   struct timer_list   timer;
> +};
> +
>  static int async_error;
>
>  static char *pm_verb(int event)
> @@ -390,6 +398,60 @@ static int dpm_run_callback(pm_callback_t cb, struct 
> device *dev,
> return error;
>  }
>
> +#ifdef CONFIG_DPM_WD
> +/**
> + * dpm_wd_handler - Driver suspend / resume watchdog handler.
> + *
> + * Called when a driver has timed out suspending or resuming.
> + * There's not much we can do here to recover so panic() to
> + * capture a crash-dump in pstore.
> + */
> +static void dpm_wd_handler(unsigned long data)
> +{
> +   struct dpm_watchdog *wd = (void *)data;
> +
> +   dev_emerg(wd->dev, " DPM device timeout \n");
> +   show_stack(wd->tsk, NULL);
> +   panic("%s %s: unrecoverable failure\n",
> +   dev_driver_string(wd->dev), dev_name(wd->dev));
> +}
> +
> +/**
> + * dpm_wd_set - Enable pm watchdog for given device.
> + * @wd: Watchdog. Must be allocated on the stack.
> + * @dev: Device to handle.
> + */
> +static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
> +{
> +   struct timer_list *timer = &wd->timer;
> +
> +   wd->dev = dev;
> +   wd->tsk = get_current();
> +
> +   init_timer_on_stack(timer);
> +   /* use same timeout value for both suspend and resume */
> +   timer->expires = jiffies + HZ * CONFIG_DPM_WD_TIMEOUT;
> +   timer->function = dpm_wd_handler;
> +   timer->data = (unsigned long)wd;
> +   add_timer(timer);
> +}
> +
> +/**
> + * dpm_wd_clear - Disable suspend/resume watchdog.
> + * @wd: Watchdog to disable.
> + */
> +static void dpm_wd_clear(struct dpm_watchdog *wd)
> +{
> +   struct timer_list *timer = &wd->timer;
> +
> +   del_timer_sync(timer);
> +   destroy_timer_on_stack(timer);
> +}
> +#else
> +#define dpm_wd_set(x, y)
> +#

Re: [RFC PATCH] thermal: add generic cpu hotplug cooling device

2013-10-04 Thread Zoran Markovic

Any comments on this proposed feature and implementation? Apparently
it's also useful for server systems.
Thanks,
Zoran

On 20 September 2013 15:15, Zoran Markovic  wrote:
> This patch implements a generic CPU hotplug cooling device. The
> implementation scales down the number of running CPUs when temperature
> increases through a thermal trip point and prevents booting CPUs
> until thermal conditions are restored. Upon restoration, the action
> of starting up a CPU is left to another entity (e.g. CPU offline
> governor, for which a patch is in the works).
>
> In the past two years, ARM considerably reduced the time required for
> CPUs to boot and shutdown; this time is now measured in microseconds.
> This patch is predominantly intended for ARM big.LITTLE architectures
> where big cores are expected to have a much bigger impact on thermal
> budget than little cores, resulting in fast temperature ramps to a trip
> point, i.e. thermal runaways. Switching off the big core(s) may be one
> of the recovery mechanisms to restore system temperature, but the actual
> strategy is left to the thermal governor.
>
> The assumption is that CPU shutdown/startup is a rare event, so no
> attempt was made to make the code atomic, i.e. the code evidently races
> with CPU hotplug driver. The set_cur_state() function offlines CPUs
> iteratively one at a time, checking the cooling state before each CPU
> shutdown. A hotplug notifier callback validates any CPU boot requests
> against current cooling state and approves/denies accordingly. This
> mechanism guarantees that the desired cooling state could be reached in a
> maximum of d-c iterations, where d and c are the "desired" and "current"
> cooling states expressed in the number of offline CPUs.
>
> Credits to Amit Daniel Kachhap for initial attempt to upstream this feature.
>
> Cc: Zhang Rui 
> Cc: Eduardo Valentin 
> Cc: Rob Landley 
> Cc: Amit Daniel Kachhap 
> Cc: Andrew Morton 
> Cc: Durgadoss R 
> Cc: Christian Daudt 
> Cc: James King 
> Signed-off-by: Zoran Markovic 
> ---
>  Documentation/thermal/cpu-cooling-api.txt |   17 ++
>  drivers/thermal/Kconfig   |   10 +
>  drivers/thermal/Makefile  |1 +
>  drivers/thermal/cpu_hotplug.c |  362 
> +
>  include/linux/cpuhp_cooling.h |   57 +
>  5 files changed, 447 insertions(+)
>  create mode 100644 drivers/thermal/cpu_hotplug.c
>  create mode 100644 include/linux/cpuhp_cooling.h
>
> diff --git a/Documentation/thermal/cpu-cooling-api.txt 
> b/Documentation/thermal/cpu-cooling-api.txt
> index fca24c9..2f94f68 100644
> --- a/Documentation/thermal/cpu-cooling-api.txt
> +++ b/Documentation/thermal/cpu-cooling-api.txt
> @@ -30,3 +30,20 @@ the user. The registration APIs returns the cooling device 
> pointer.
>  This interface function unregisters the "thermal-cpufreq-%x" cooling 
> device.
>
>  cdev: Cooling device pointer which has to be unregistered.
> +
> +1.2 cpu hotplug registration/unregistration APIs
> +1.2.1 struct thermal_cooling_device *cpuhp_cooling_register(
> +   struct cpumask *cpus, const char *ext)
> +
> +This function creates and registers a cpu hotplug cooling device with
> +the name "cpu-hotplug-%s".
> +
> +cpus: cpumask of cpu cores participating in cooling.
> +ext: instance-specific name of device
> +
> +1.2.2 void cpuhotplug_cooling_unregister(struct thermal_cooling_device *cdev)
> +
> +This function unregisters and frees the cpu hotplug cooling device cdev.
> +
> +cdev: Pointer to cooling device to unregister.
> +
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 52b6ed7..3509100 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -79,6 +79,16 @@ config CPU_THERMAL
>
>   If you want this support, you should say Y here.
>
> +config CPU_THERMAL_HOTPLUG
> +   bool "Generic CPU hotplug cooling"
> +   depends on HOTPLUG_CPU
> +   help
> + Shutdown CPUs to prevent the device from overheating. This feature
> + uses generic CPU hot-unplug capabilities to control device
> + temperature. When the temperature increases over a trip point, a
> + random subset of CPUs is shut down to reach the desired cooling
> + state.
> +
>  config THERMAL_EMULATION
> bool "Thermal emulation mode support"
> help
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 5ee0db0..0bd08be 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -12,6 +12,7 @@ thermal_sys-$(CONFIG_THERMA

[RFC PATCHv4] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-09-25 Thread Zoran Markovic

From: Benoit Goby 

Rather than hard-lock the kernel, dump the suspend/resume thread stack and
panic() to capture a message in pstore when a driver takes too long to
suspend/resume. Default suspend/resume watchdog timeout is set to 12
seconds to be longer than the usbhid 10 second timeout, but could be
changed at compile time.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot. Information about failing device can be
retrieved in subsequent boot session by mounting pstore and inspecting
the log. Laptops with EFI-enabled pstore could also benefit from
this feature.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

This feature can be enabled/disabled during kernel configuration.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message. Moved call to dpm_wd_set() before device_lock() in
device_resume(). Minor changes to add compile-time inclusion of the feature.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   68 +
 kernel/power/Kconfig  |   16 +++
 2 files changed, 84 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 9f098a8..9b7e6b6 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -30,6 +30,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -55,6 +57,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+struct dpm_watchdog {
+   struct device   *dev;
+   struct task_struct  *tsk;
+   struct timer_list   timer;
+};
+
 static int async_error;
 
 static char *pm_verb(int event)
@@ -390,6 +398,60 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+#ifdef CONFIG_DPM_WD
+/**
+ * dpm_wd_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_wd_handler(unsigned long data)
+{
+   struct dpm_watchdog *wd = (void *)data;
+
+   dev_emerg(wd->dev, " DPM device timeout \n");
+   show_stack(wd->tsk, NULL);
+   panic("%s %s: unrecoverable failure\n",
+   dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_wd_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
+{
+   struct timer_list *timer = &wd->timer;
+
+   wd->dev = dev;
+   wd->tsk = get_current();
+
+   init_timer_on_stack(timer);
+   /* use same timeout value for both suspend and resume */
+   timer->expires = jiffies + HZ * CONFIG_DPM_WD_TIMEOUT;
+   timer->function = dpm_wd_handler;
+   timer->data = (unsigned long)wd;
+   add_timer(timer);
+}
+
+/**
+ * dpm_wd_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_wd_clear(struct dpm_watchdog *wd)
+{
+   struct timer_list *timer = &wd->timer;
+
+   del_timer_sync(timer);
+   destroy_timer_on_stack(timer);
+}
+#else
+#define dpm_wd_set(x, y)
+#define dpm_wd_clear(x)
+#endif
+
 /*- Resume routines -*/
 
 /**
@@ -576,6 +638,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct dpm_watchdog wd;
 
TRACE_DEVICE(dev);
TRACE_RESUME(0);
@@ -584,6 +647,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
goto Complete;
 
dpm_wait(dev->parent, async);
+   dpm_wd_set(&wd, dev);
device_lock(dev);
 
/*
@@ -642,6 +706,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 
  Unlock:
device_unlock(dev);
+   dpm_wd_clear(&wd);
 
  Complete:
complete_all(&dev->power.compl

[RFC PATCH] thermal: add generic cpu hotplug cooling device

2013-09-20 Thread Zoran Markovic

This patch implements a generic CPU hotplug cooling device. The
implementation scales down the number of running CPUs when temperature
increases through a thermal trip point and prevents booting CPUs
until thermal conditions are restored. Upon restoration, the action
of starting up a CPU is left to another entity (e.g. CPU offline
governor, for which a patch is in the works).

In the past two years, ARM considerably reduced the time required for
CPUs to boot and shutdown; this time is now measured in microseconds.
This patch is predominantly intended for ARM big.LITTLE architectures
where big cores are expected to have a much bigger impact on thermal
budget than little cores, resulting in fast temperature ramps to a trip
point, i.e. thermal runaways. Switching off the big core(s) may be one
of the recovery mechanisms to restore system temperature, but the actual
strategy is left to the thermal governor.

The assumption is that CPU shutdown/startup is a rare event, so no
attempt was made to make the code atomic, i.e. the code evidently races
with CPU hotplug driver. The set_cur_state() function offlines CPUs
iteratively one at a time, checking the cooling state before each CPU
shutdown. A hotplug notifier callback validates any CPU boot requests
against current cooling state and approves/denies accordingly. This
mechanism guarantees that the desired cooling state could be reached in a
maximum of d-c iterations, where d and c are the "desired" and "current"
cooling states expressed in the number of offline CPUs.

Credits to Amit Daniel Kachhap for initial attempt to upstream this feature.

Cc: Zhang Rui 
Cc: Eduardo Valentin 
Cc: Rob Landley 
Cc: Amit Daniel Kachhap 
Cc: Andrew Morton 
Cc: Durgadoss R 
Cc: Christian Daudt 
Cc: James King 
Signed-off-by: Zoran Markovic 
---
 Documentation/thermal/cpu-cooling-api.txt |   17 ++
 drivers/thermal/Kconfig   |   10 +
 drivers/thermal/Makefile  |1 +
 drivers/thermal/cpu_hotplug.c |  362 +
 include/linux/cpuhp_cooling.h |   57 +
 5 files changed, 447 insertions(+)
 create mode 100644 drivers/thermal/cpu_hotplug.c
 create mode 100644 include/linux/cpuhp_cooling.h

diff --git a/Documentation/thermal/cpu-cooling-api.txt 
b/Documentation/thermal/cpu-cooling-api.txt
index fca24c9..2f94f68 100644
--- a/Documentation/thermal/cpu-cooling-api.txt
+++ b/Documentation/thermal/cpu-cooling-api.txt
@@ -30,3 +30,20 @@ the user. The registration APIs returns the cooling device 
pointer.
 This interface function unregisters the "thermal-cpufreq-%x" cooling 
device.
 
 cdev: Cooling device pointer which has to be unregistered.
+
+1.2 cpu hotplug registration/unregistration APIs
+1.2.1 struct thermal_cooling_device *cpuhp_cooling_register(
+   struct cpumask *cpus, const char *ext)
+
+This function creates and registers a cpu hotplug cooling device with
+the name "cpu-hotplug-%s".
+
+cpus: cpumask of cpu cores participating in cooling.
+ext: instance-specific name of device
+
+1.2.2 void cpuhotplug_cooling_unregister(struct thermal_cooling_device *cdev)
+
+This function unregisters and frees the cpu hotplug cooling device cdev.
+
+cdev: Pointer to cooling device to unregister.
+
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 52b6ed7..3509100 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -79,6 +79,16 @@ config CPU_THERMAL
 
  If you want this support, you should say Y here.
 
+config CPU_THERMAL_HOTPLUG
+   bool "Generic CPU hotplug cooling"
+   depends on HOTPLUG_CPU
+   help
+ Shutdown CPUs to prevent the device from overheating. This feature
+ uses generic CPU hot-unplug capabilities to control device
+ temperature. When the temperature increases over a trip point, a
+ random subset of CPUs is shut down to reach the desired cooling
+ state.
+
 config THERMAL_EMULATION
bool "Thermal emulation mode support"
help
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 5ee0db0..0bd08be 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -12,6 +12,7 @@ thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)  += user_space.o
 
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)  += cpu_cooling.o
+thermal_sys-$(CONFIG_CPU_THERMAL_HOTPLUG)  += cpu_hotplug.o
 
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)+= spear_thermal.o
diff --git a/drivers/thermal/cpu_hotplug.c b/drivers/thermal/cpu_hotplug.c
new file mode 100644
index 000..8c3021e
--- /dev/null
+++ b/drivers/thermal/cpu_hotplug.c
@@ -0,0 +1,362 @@
+/*
+ *  drivers/thermal/cpu_hotplug.c
+ *
+ *  Copyright (C) 2013  Broadcom Corporation Ltd.
+ *  Copyright (C) 2013  Zoran Markovic 
+ *
+ * ~~
+ *  This pro

Re: [RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-09-05 Thread Zoran Markovic

Hi Ulf,
Thanks for reviewing this, it was very helpful!

> 1. mmc_detect_change does obviously not have to be run the same number
> of times as the mmc_rescan function. In other words, the calls to
> __pm_stay_awake is not paired with __pm_relay, I suppose this does not
> matter?
It shouldn't, since a single __pm_relax() would cancel all previous
calls to __pm_stay_awake() on the same wakeup source. What is
important is that mmc_rescan() is scheduled after __pm_stay_awake() to
make sure wakeup source is released.

> 2. mmc_detect_change can for example be called while the device
> suspend sequence is progressing. At this point the rescan work is
> disabled, thus __pm_relax will not be called, until the next rescan
> work as executed which is after the complete resume cycle
> (mmc_pm_notify:PM_POST_SUSPEND). Is that an issue?
If started, mmc_detect_change() should run uninterrupted to call
__pm_stay_awake(), which should abort any previous suspend requests.
The abort sequence should restart the rescan work, so __pm_relax()
eventually gets called.

>> /* If there is a non-removable card registered, only scan once */
>> -   if ((host->caps & MMC_CAP_NONREMOVABLE) && host->rescan_entered)
>> +   if ((host->caps & MMC_CAP_NONREMOVABLE) && host->rescan_entered) {
>> +   __pm_relax(host->ws);
>
> By calling __pm_relax here, this indicates to me that is seems like
> you might have prevented, even for a very small timeslot, with a
> MMC_CAP_NONREMOVABLE card/host from the system to suspend.
>
> For sure, you must not prevent the suspend even for small timeslots,
> when MMC_CAP_NONREMOVABLE is set.
I agree. It appears that the corresponding __pm_stay_awake() is
indiscriminately called on system resume regardless of card type, so
this needs to be fixed.

>> mmc_release_host(host);
>>
>>   out:
>> -   if (host->caps & MMC_CAP_NEEDS_POLL)
>> +   if (extend_wakeup)
>> +   /* extra 1/2 second should be enough, hopefully */
>> +   __pm_wakeup_event(host->ws, MSEC_PER_SEC/2);
>> +   else
>> +   __pm_relax(host->ws);
>> +
>> +   if (host->caps & MMC_CAP_NEEDS_POLL) {
>> +   __pm_stay_awake(host->ws);
>
> This does not make sense.
>
> So when using polling mode to detect card insert/remove, you will
> prevent suspend forever? Maybe I missed a point somewhere?
>
>> mmc_schedule_delayed_work(&host->detect, HZ);
>> +   }
>>  }
You are right, and I find it interesting that the same wake_lock()
call exists in the Android kernel. Would someone from the Android team
be able to comment?

>> /* clear pm flags now and let card drivers set them as needed */
>> @@ -2628,7 +2645,8 @@ int mmc_suspend_host(struct mmc_host *host)
>>  {
>
> This function has become deprecated. You need to rebase this patch and
> please do not add some new code in here.
>
If suspend is now initiated from the bus level, will there be a
host-level suspend/resume function at all? I need to know where this
code should move in the next revision of patch...

Regards, Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv3] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-08-28 Thread Zoran Markovic

> Is there any practical reason why it should go into the next release?

Android folks find this useful, albeit a debug feature.

Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv3] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-08-28 Thread Zoran Markovic

Hi Rafael,
> It doesn't look too bad from a quick look, but there's a couple of things
> I don't like in it still (relatively minor).

If there are things you would like changed in this patch, please let
me know. It would be nice to catch the 3.12 merge window.
Thanks,
- Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv3] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-08-22 Thread Zoran Markovic

Rafael,
I haven't seen any other proposals/alternatives on how to do this. Is
there anything else we should do to get this upstream? I believe this
is a valuable debug feature for Android and it now explicitly depends
on pstore...
Thanks,
Zoran

On 30 July 2013 13:38, Zoran Markovic  wrote:
> From: Benoit Goby 
>
> Rather than hard-lock the kernel, dump the suspend/resume thread stack and
> panic() to capture a message in pstore when a driver takes too long to
> suspend/resume. Default suspend/resume watchdog timeout is set to 12
> seconds to be longer than the usbhid 10 second timeout, but could be
> changed at compile time.
>
> Exclude from the watchdog the time spent waiting for children that
> are resumed asynchronously and time every device, whether or not they
> resumed synchronously.
>
> This patch is targeted for mobile devices where a suspend/resume lockup
> could cause a system reboot. Information about failing device can be
> retrieved in subsequent boot session by mounting pstore and inspecting
> the log.
>
> The hardware watchdog timer is likely suspended during this time and
> couldn't be relied upon. The soft-lockup detector would eventually tell
> that tasks are not scheduled, but would provide little context as to why.
> The patch hence uses system timer and assumes it is still active while the
> devices are suspended/resumed.
>
> This feature can be enabled/disabled during kernel configuration.
>
> Cc: Android Kernel Team 
> Cc: Colin Cross 
> Cc: Todd Poynor 
> Cc: San Mehat 
> Cc: Benoit Goby 
> Cc: John Stultz 
> Cc: Pavel Machek 
> Cc: Rafael J. Wysocki 
> Cc: Len Brown 
> Cc: Greg Kroah-Hartman 
> Original-author: San Mehat 
> Signed-off-by: Benoit Goby 
> [zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
> tweaked commit message. Minor changes to add compile-time inclusion of
> the feature.]
> Signed-off-by: Zoran Markovic 
> ---
> v3:
> * Added explicit dependency on pstore
> * Collapsed recovery options to system panic only
> * Logged driver string in panic message
>
>  drivers/base/power/main.c |   70 
> +
>  kernel/power/Kconfig  |   16 +++
>  2 files changed, 86 insertions(+)
>
> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
> index 5a9b656..c19aec0 100644
> --- a/drivers/base/power/main.c
> +++ b/drivers/base/power/main.c
> @@ -29,6 +29,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
>  #include "../base.h"
>  #include "power.h"
>
> @@ -54,6 +56,12 @@ struct suspend_stats suspend_stats;
>  static DEFINE_MUTEX(dpm_list_mtx);
>  static pm_message_t pm_transition;
>
> +struct dpm_watchdog {
> +   struct device   *dev;
> +   struct task_struct  *tsk;
> +   struct timer_list   timer;
> +};
> +
>  static int async_error;
>
>  /**
> @@ -384,6 +392,60 @@ static int dpm_run_callback(pm_callback_t cb, struct 
> device *dev,
> return error;
>  }
>
> +#ifdef CONFIG_DPM_WD
> +/**
> + * dpm_wd_handler - Driver suspend / resume watchdog handler.
> + *
> + * Called when a driver has timed out suspending or resuming.
> + * There's not much we can do here to recover so panic() to
> + * capture a crash-dump in pstore.
> + */
> +static void dpm_wd_handler(unsigned long data)
> +{
> +   struct dpm_watchdog *wd = (void *)data;
> +
> +   dev_emerg(wd->dev, " DPM device timeout \n");
> +   show_stack(wd->tsk, NULL);
> +   panic("%s %s: unrecoverable failure\n",
> +   dev_driver_string(wd->dev), dev_name(wd->dev));
> +}
> +
> +/**
> + * dpm_wd_set - Enable pm watchdog for given device.
> + * @wd: Watchdog. Must be allocated on the stack.
> + * @dev: Device to handle.
> + */
> +static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
> +{
> +   struct timer_list *timer = &wd->timer;
> +
> +   wd->dev = dev;
> +   wd->tsk = get_current();
> +
> +   init_timer_on_stack(timer);
> +   /* use same timeout value for both suspend and resume */
> +   timer->expires = jiffies + HZ * CONFIG_DPM_WD_TIMEOUT;
> +   timer->function = dpm_wd_handler;
> +   timer->data = (unsigned long)wd;
> +   add_timer(timer);
> +}
> +
> +/**
> + * dpm_wd_clear - Disable suspend/resume watchdog.
> + * @wd: Watchdog to disable.
> + */
> +static void dpm_wd_clear(struct dpm_watchdog *wd)
> +{
> +   struct timer_list *timer = &wd->timer;
> +
> +   del_timer_sync(timer);
> +   destroy_timer_on_stack(timer);

Re: [RFC PATCH] pm: prevent suspend until power supply events are processed

2013-08-22 Thread Zoran Markovic

Any opinions on this patch?
Regards, Zoran

On 2 August 2013 13:38, Zoran Markovic  wrote:
> This patch, originally authored by Arve Hjonnevag and Todd Poynor,
> prevents the system from entering suspend mode until the power
> supply plug, unplug, or any other change of state event is fully
> processed. This guarantees that the screen lights up and displays
> the battery charging state. The implementation uses the power
> supply wakeup_source object.
>
> Cc: Anton Vorontsov 
> Cc: David Woodhouse 
> Cc: Arve Hjonnevag 
> Cc: Todd Poynor 
> Cc: John Stultz 
> Signed-off-by: Zoran Markovic 
> ---
>  drivers/power/power_supply_core.c |   37 
> +++--
>  include/linux/power_supply.h  |2 ++
>  2 files changed, 33 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/power/power_supply_core.c 
> b/drivers/power/power_supply_core.c
> index 3b2d5df..e68d598 100644
> --- a/drivers/power/power_supply_core.c
> +++ b/drivers/power/power_supply_core.c
> @@ -67,23 +67,41 @@ static int __power_supply_changed_work(struct device 
> *dev, void *data)
>
>  static void power_supply_changed_work(struct work_struct *work)
>  {
> +   unsigned long flags;
> struct power_supply *psy = container_of(work, struct power_supply,
> changed_work);
>
> dev_dbg(psy->dev, "%s\n", __func__);
>
> -   class_for_each_device(power_supply_class, NULL, psy,
> - __power_supply_changed_work);
> -
> -   power_supply_update_leds(psy);
> -
> -   kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
> +   spin_lock_irqsave(&psy->changed_lock, flags);
> +   if (psy->changed) {
> +   psy->changed = false;
> +   spin_unlock_irqrestore(&psy->changed_lock, flags);
> +   class_for_each_device(power_supply_class, NULL, psy,
> + __power_supply_changed_work);
> +   power_supply_update_leds(psy);
> +   kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
> +   spin_lock_irqsave(&psy->changed_lock, flags);
> +   }
> +   /* dependent power supplies (e.g. battery) may have changed
> +* state as a result of this event, so poll again and hold
> +* the wakeup_source until all events are processed.
> +*/
> +   if (!psy->changed)
> +   pm_relax(psy->dev);
> +   spin_unlock_irqrestore(&psy->changed_lock, flags);
>  }
>
>  void power_supply_changed(struct power_supply *psy)
>  {
> +   unsigned long flags;
> +
> dev_dbg(psy->dev, "%s\n", __func__);
>
> +   spin_lock_irqsave(&psy->changed_lock, flags);
> +   psy->changed = true;
> +   pm_stay_awake(psy->dev);
> +   spin_unlock_irqrestore(&psy->changed_lock, flags);
> schedule_work(&psy->changed_work);
>  }
>  EXPORT_SYMBOL_GPL(power_supply_changed);
> @@ -500,6 +518,11 @@ int power_supply_register(struct device *parent, struct 
> power_supply *psy)
> goto check_supplies_failed;
> }
>
> +   spin_lock_init(&psy->changed_lock);
> +   rc = device_init_wakeup(dev, true);
> +   if (rc)
> +   goto wakeup_init_failed;
> +
> rc = kobject_set_name(&dev->kobj, "%s", psy->name);
> if (rc)
> goto kobject_set_name_failed;
> @@ -529,6 +552,7 @@ create_triggers_failed:
>  register_cooler_failed:
> psy_unregister_thermal(psy);
>  register_thermal_failed:
> +wakeup_init_failed:
> device_del(dev);
>  kobject_set_name_failed:
>  device_add_failed:
> @@ -546,6 +570,7 @@ void power_supply_unregister(struct power_supply *psy)
> power_supply_remove_triggers(psy);
> psy_unregister_cooler(psy);
> psy_unregister_thermal(psy);
> +   device_init_wakeup(psy->dev, false);
> device_unregister(psy->dev);
>  }
>  EXPORT_SYMBOL_GPL(power_supply_unregister);
> diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
> index 804b906..253d412 100644
> --- a/include/linux/power_supply.h
> +++ b/include/linux/power_supply.h
> @@ -194,6 +194,8 @@ struct power_supply {
> /* private */
> struct device *dev;
> struct work_struct changed_work;
> +   spinlock_t changed_lock;
> +   bool changed;
>  #ifdef CONFIG_THERMAL
> struct thermal_zone_device *tzd;
> struct thermal_cooling_device *tcd;
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-08-22 Thread Zoran Markovic

Ulf,
> I got confirmation from Broadcom that all cell phone reference designs
> have card insert/removal configured as a wakeup IRQ. Unless our
> customers change that - which I doubt - this results in a considerable
> number of products implementing this feature.
>
> Please let me know how you wish to proceed.

I think this patch would be useful for all mobile applications. What
are the chances of getting this in the next kernel version?

Thanks,
Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] pm: prevent suspend until power supply events are processed

2013-08-02 Thread Zoran Markovic

This patch, originally authored by Arve Hjonnevag and Todd Poynor,
prevents the system from entering suspend mode until the power
supply plug, unplug, or any other change of state event is fully
processed. This guarantees that the screen lights up and displays
the battery charging state. The implementation uses the power
supply wakeup_source object.

Cc: Anton Vorontsov 
Cc: David Woodhouse 
Cc: Arve Hjonnevag 
Cc: Todd Poynor 
Cc: John Stultz 
Signed-off-by: Zoran Markovic 
---
 drivers/power/power_supply_core.c |   37 +++--
 include/linux/power_supply.h  |2 ++
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/power/power_supply_core.c 
b/drivers/power/power_supply_core.c
index 3b2d5df..e68d598 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -67,23 +67,41 @@ static int __power_supply_changed_work(struct device *dev, 
void *data)
 
 static void power_supply_changed_work(struct work_struct *work)
 {
+   unsigned long flags;
struct power_supply *psy = container_of(work, struct power_supply,
changed_work);
 
dev_dbg(psy->dev, "%s\n", __func__);
 
-   class_for_each_device(power_supply_class, NULL, psy,
- __power_supply_changed_work);
-
-   power_supply_update_leds(psy);
-
-   kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
+   spin_lock_irqsave(&psy->changed_lock, flags);
+   if (psy->changed) {
+   psy->changed = false;
+   spin_unlock_irqrestore(&psy->changed_lock, flags);
+   class_for_each_device(power_supply_class, NULL, psy,
+ __power_supply_changed_work);
+   power_supply_update_leds(psy);
+   kobject_uevent(&psy->dev->kobj, KOBJ_CHANGE);
+   spin_lock_irqsave(&psy->changed_lock, flags);
+   }
+   /* dependent power supplies (e.g. battery) may have changed
+* state as a result of this event, so poll again and hold
+* the wakeup_source until all events are processed.
+*/
+   if (!psy->changed)
+   pm_relax(psy->dev);
+   spin_unlock_irqrestore(&psy->changed_lock, flags);
 }
 
 void power_supply_changed(struct power_supply *psy)
 {
+   unsigned long flags;
+
dev_dbg(psy->dev, "%s\n", __func__);
 
+   spin_lock_irqsave(&psy->changed_lock, flags);
+   psy->changed = true;
+   pm_stay_awake(psy->dev);
+   spin_unlock_irqrestore(&psy->changed_lock, flags);
schedule_work(&psy->changed_work);
 }
 EXPORT_SYMBOL_GPL(power_supply_changed);
@@ -500,6 +518,11 @@ int power_supply_register(struct device *parent, struct 
power_supply *psy)
goto check_supplies_failed;
}
 
+   spin_lock_init(&psy->changed_lock);
+   rc = device_init_wakeup(dev, true);
+   if (rc)
+   goto wakeup_init_failed;
+
rc = kobject_set_name(&dev->kobj, "%s", psy->name);
if (rc)
goto kobject_set_name_failed;
@@ -529,6 +552,7 @@ create_triggers_failed:
 register_cooler_failed:
psy_unregister_thermal(psy);
 register_thermal_failed:
+wakeup_init_failed:
device_del(dev);
 kobject_set_name_failed:
 device_add_failed:
@@ -546,6 +570,7 @@ void power_supply_unregister(struct power_supply *psy)
power_supply_remove_triggers(psy);
psy_unregister_cooler(psy);
psy_unregister_thermal(psy);
+   device_init_wakeup(psy->dev, false);
device_unregister(psy->dev);
 }
 EXPORT_SYMBOL_GPL(power_supply_unregister);
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 804b906..253d412 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -194,6 +194,8 @@ struct power_supply {
/* private */
struct device *dev;
struct work_struct changed_work;
+   spinlock_t changed_lock;
+   bool changed;
 #ifdef CONFIG_THERMAL
struct thermal_zone_device *tzd;
struct thermal_cooling_device *tcd;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-08-01 Thread Zoran Markovic

Ulf,
I got confirmation from Broadcom that all cell phone reference designs
have card insert/removal configured as a wakeup IRQ. Unless our
customers change that - which I doubt - this results in a considerable
number of products implementing this feature.

Please let me know how you wish to proceed.

Cheers,
Zoran

On 26 June 2013 13:57, Ulf Hansson  wrote:
> On 24 June 2013 21:58, Zoran Markovic  wrote:
>>>> This patch is ported from the Android common tree, so you've probably
>>>> been using it.
>>>
>>> We removed more or less all Android code in the mmc subsystem, since
>>> it just didn't work. :-)
>>>
>>> The "deferred resume" was very useful though, so after some rework we
>>> kept it and could then improve the system resume time significantly.
>>
>> For what it's worth, I fixed one bug I noticed in the Android kernel:
>> if a system has a non-removable MMC device, a suspend/resume cycle on
>> this device would hold a wake lock forever. This was a visible issue
>> on the panda board I am using.
>>
>> If there are doubts on whether or not the system should stay awake
>> during a MMC mount, we have the option to make the calls to
>> wakeup_source_register/unregister configurable. Skipping these calls
>> would leave the .ws field NULL, in which case
>> __pm_stay_awake/__pm_relax/__pm_wakeup_event would do nothing.
>
> Even if we make this feature configurable, I can't see any host driver
> that would benefit from it as of today. The reason is simply that host
> drivers do not configure it's card detect irq as a wakeup irq. Myself
> is also having quite hard to see the benefit of doing that, but I
> don't know all the use cases.
>
> Let's see if we can get someone else to provide input...
>
>>
>> Thoughts?
>>
>> - Zoran
>
>
> Kind regards
> Ulf Hansson
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv3] drivers: power: Detect device suspend/resume lockup and log event in pstore.

2013-07-30 Thread Zoran Markovic

From: Benoit Goby 

Rather than hard-lock the kernel, dump the suspend/resume thread stack and
panic() to capture a message in pstore when a driver takes too long to
suspend/resume. Default suspend/resume watchdog timeout is set to 12
seconds to be longer than the usbhid 10 second timeout, but could be
changed at compile time.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot. Information about failing device can be
retrieved in subsequent boot session by mounting pstore and inspecting
the log.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

This feature can be enabled/disabled during kernel configuration.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message. Minor changes to add compile-time inclusion of
the feature.]
Signed-off-by: Zoran Markovic 
---
v3: 
* Added explicit dependency on pstore
* Collapsed recovery options to system panic only
* Logged driver string in panic message

 drivers/base/power/main.c |   70 +
 kernel/power/Kconfig  |   16 +++
 2 files changed, 86 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 5a9b656..c19aec0 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -54,6 +56,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+struct dpm_watchdog {
+   struct device   *dev;
+   struct task_struct  *tsk;
+   struct timer_list   timer;
+};
+
 static int async_error;
 
 /**
@@ -384,6 +392,60 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+#ifdef CONFIG_DPM_WD
+/**
+ * dpm_wd_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so panic() to
+ * capture a crash-dump in pstore.
+ */
+static void dpm_wd_handler(unsigned long data)
+{
+   struct dpm_watchdog *wd = (void *)data;
+
+   dev_emerg(wd->dev, " DPM device timeout \n");
+   show_stack(wd->tsk, NULL);
+   panic("%s %s: unrecoverable failure\n",
+   dev_driver_string(wd->dev), dev_name(wd->dev));
+}
+
+/**
+ * dpm_wd_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
+{
+   struct timer_list *timer = &wd->timer;
+
+   wd->dev = dev;
+   wd->tsk = get_current();
+
+   init_timer_on_stack(timer);
+   /* use same timeout value for both suspend and resume */
+   timer->expires = jiffies + HZ * CONFIG_DPM_WD_TIMEOUT;
+   timer->function = dpm_wd_handler;
+   timer->data = (unsigned long)wd;
+   add_timer(timer);
+}
+
+/**
+ * dpm_wd_clear - Disable suspend/resume watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_wd_clear(struct dpm_watchdog *wd)
+{
+   struct timer_list *timer = &wd->timer;
+
+   del_timer_sync(timer);
+   destroy_timer_on_stack(timer);
+}
+#else
+#define dpm_wd_set(x, y)
+#define dpm_wd_clear(x)
+#endif
+
 /*- Resume routines -*/
 
 /**
@@ -570,6 +632,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct dpm_watchdog wd;
 
TRACE_DEVICE(dev);
TRACE_RESUME(0);
@@ -585,6 +648,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 * a resumed device, even if the device hasn't been completed yet.
 */
dev->power.is_prepared = false;
+   dpm_wd_set(&wd, dev);
 
if (!dev->power.is_suspended)
goto Unlock;
@@ -636,6 +700,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 
  Unlock:
device_unlock(dev);
+   dpm_wd_clear(&wd);
 
  Co

[RFC PATCH] rtc: keep system awake until all expired RTC timers are handled

2013-06-26 Thread Zoran Markovic

Current implementation of RTC interface allows for system suspend to
occur in the following cases:
(a) if a timer is set in the past and rtc_timer_do_work() is scheduled
to handle it, and
(b) if rtc_timer_do_work() is called to handle expired timers whose
handlers implement a preemption point.

A pending suspend request may be honoured in the above cases causing
timer handling to be delayed until after the next resume. This is
undesirable since timer handlers may have time-critical code to execute.

This patch makes sure that the system stays awake until all expired
timers are handled.

Note that all calls to pm_stay_awake() are eventually paired with
the single pm_relax() call in rtc_timer_do_work(), which is launched
using schedule_work().

Cc: Alessandro Zummo 
Cc: John Stultz 
Cc: Arve Hjonnevag 
Cc: Todd Poynor 
Signed-off-by: Zoran Markovic 
---
 drivers/rtc/interface.c |   13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 42bd57d..dace26e 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -72,6 +72,7 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
} else
err = -EINVAL;
 
+   pm_stay_awake(rtc->dev.parent);
mutex_unlock(&rtc->ops_lock);
/* A timer might have just expired */
schedule_work(&rtc->irqwork);
@@ -113,6 +114,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
else
err = -EINVAL;
 
+   pm_stay_awake(rtc->dev.parent);
mutex_unlock(&rtc->ops_lock);
/* A timer might have just expired */
schedule_work(&rtc->irqwork);
@@ -771,9 +773,10 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, 
struct rtc_timer *timer)
alarm.time = rtc_ktime_to_tm(timer->node.expires);
alarm.enabled = 1;
err = __rtc_set_alarm(rtc, &alarm);
-   if (err == -ETIME)
+   if (err == -ETIME) {
+   pm_stay_awake(rtc->dev.parent);
schedule_work(&rtc->irqwork);
-   else if (err) {
+   } else if (err) {
timerqueue_del(&rtc->timerqueue, &timer->node);
timer->enabled = 0;
return err;
@@ -818,8 +821,10 @@ static void rtc_timer_remove(struct rtc_device *rtc, 
struct rtc_timer *timer)
alarm.time = rtc_ktime_to_tm(next->expires);
alarm.enabled = 1;
err = __rtc_set_alarm(rtc, &alarm);
-   if (err == -ETIME)
+   if (err == -ETIME) {
+   pm_stay_awake(rtc->dev.parent);
schedule_work(&rtc->irqwork);
+   }
}
 }
 
@@ -845,7 +850,6 @@ void rtc_timer_do_work(struct work_struct *work)
 
mutex_lock(&rtc->ops_lock);
 again:
-   pm_relax(rtc->dev.parent);
__rtc_read_time(rtc, &tm);
now = rtc_tm_to_ktime(tm);
while ((next = timerqueue_getnext(&rtc->timerqueue))) {
@@ -880,6 +884,7 @@ again:
} else
rtc_alarm_disable(rtc);
 
+   pm_relax(rtc->dev.parent);
mutex_unlock(&rtc->ops_lock);
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-06-24 Thread Zoran Markovic

>> This patch is ported from the Android common tree, so you've probably
>> been using it.
>
> We removed more or less all Android code in the mmc subsystem, since
> it just didn't work. :-)
>
> The "deferred resume" was very useful though, so after some rework we
> kept it and could then improve the system resume time significantly.

For what it's worth, I fixed one bug I noticed in the Android kernel:
if a system has a non-removable MMC device, a suspend/resume cycle on
this device would hold a wake lock forever. This was a visible issue
on the panda board I am using.

If there are doubts on whether or not the system should stay awake
during a MMC mount, we have the option to make the calls to
wakeup_source_register/unregister configurable. Skipping these calls
would leave the .ws field NULL, in which case
__pm_stay_awake/__pm_relax/__pm_wakeup_event would do nothing.

Thoughts?

- Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-06-14 Thread Zoran Markovic

> I am not sure I understand why this patch is needed. When a new card
> is inserted/removed and the upper levels gets notification about the
> new card, triggering the mounting/un-mounting of the file system, why
> should it be the lowest layer (mmc) that prevents the platform from
> enter suspend/sleep? Why do we need to prevent it at all?
>
> Note that notifier handling in mmc_pm_notify, was if I remember
> correctly, not completely developed when the original version of this
> patch was being discussed. mmc_pm_notify prevents cards from being
> inserted/removed in the middle of suspend->resume sequence, is that
> not enough?

I will try to speak on behalf of the original implementers in a hope
they would provide the original motivation for the patch.

My understanding is that any preemption in the procedure could be an
opportunity to suspend, as there may be a suspend request racing with
this code. This is why the calls to __pm_stay_awake() and
queue_delayed_work() are so tightly coupled. It would be up to the
delayed work procedure (mmc_rescan()) to decide whether or not it is
safe to suspend. If there are no changes in the MMC state or all
changes can be handled by mmc_rescan(), it is safe to call
__pm_relax(). Otherwise, userland may take over processing of this
event, and this is why the awake state needs to be extended by 1/2
second.

Regards, Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] mmc: Enable wakeup_sources for mmc core

2013-06-13 Thread Zoran Markovic

This is a reworked implementation of wakelocks for the MMC core from
Android kernel, originally authored by Colin Cross and San Mehat.
The patch makes sure that whenever a MMC device is inserted/removed,
the system stays awake until it's reconfigured for the new state.
It is assumed that 1/2 second is sufficient for the system to start
the configuration action for the newly detected MMC device, which might
include e.g. mounting the hosted file system(s).

The implementation uses wakeup_sources instead of wake_locks.

Feedback on the approach is greatly appreciated, in particular for the
1/2 second extension peroid.

Cc: San Mehat 
Cc: Colin Cross 
Cc: John Stultz 
Cc: Chris Ball 
Cc: Ulf Hansson 
Cc: Johan Rudholm 
Cc: Jaehoon Chung 
Cc: Konstantin Dorfman 
Cc: Guennadi Liakhovetski 
Cc: Tejun Heo 
Cc: Andrew Morton 
Signed-off-by: John Stultz 
[: tweaked commit message, reworked to use
wakeup_source_register/unregister instead of wakeup_source_init/trash,
added the missing __pm_relax() for non-removable devices in mmc_rescan().]
Signed-off-by: Zoran Markovic 
---
 drivers/mmc/core/core.c  |   31 +--
 drivers/mmc/core/host.c  |7 +++
 include/linux/mmc/host.h |2 ++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c40396f..d5230c7 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1656,6 +1657,7 @@ void mmc_detect_change(struct mmc_host *host, unsigned 
long delay)
spin_unlock_irqrestore(&host->lock, flags);
 #endif
host->detect_change = 1;
+   __pm_stay_awake(host->ws);
mmc_schedule_delayed_work(&host->detect, delay);
 }
 
@@ -2351,13 +2353,16 @@ void mmc_rescan(struct work_struct *work)
struct mmc_host *host =
container_of(work, struct mmc_host, detect.work);
int i;
+   bool extend_wakeup = false;
 
if (host->rescan_disable)
return;
 
/* If there is a non-removable card registered, only scan once */
-   if ((host->caps & MMC_CAP_NONREMOVABLE) && host->rescan_entered)
+   if ((host->caps & MMC_CAP_NONREMOVABLE) && host->rescan_entered) {
+   __pm_relax(host->ws);
return;
+   }
host->rescan_entered = 1;
 
mmc_bus_get(host);
@@ -2400,16 +2405,27 @@ void mmc_rescan(struct work_struct *work)
 
mmc_claim_host(host);
for (i = 0; i < ARRAY_SIZE(freqs); i++) {
-   if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
+   if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min))) {
+   /* stay awake extra time to process detected device */
+   extend_wakeup = true;
break;
+   }
if (freqs[i] <= host->f_min)
break;
}
mmc_release_host(host);
 
  out:
-   if (host->caps & MMC_CAP_NEEDS_POLL)
+   if (extend_wakeup)
+   /* extra 1/2 second should be enough, hopefully */
+   __pm_wakeup_event(host->ws, MSEC_PER_SEC/2);
+   else
+   __pm_relax(host->ws);
+
+   if (host->caps & MMC_CAP_NEEDS_POLL) {
+   __pm_stay_awake(host->ws);
mmc_schedule_delayed_work(&host->detect, HZ);
+   }
 }
 
 void mmc_start_host(struct mmc_host *host)
@@ -2433,7 +2449,8 @@ void mmc_stop_host(struct mmc_host *host)
 #endif
 
host->rescan_disable = 1;
-   cancel_delayed_work_sync(&host->detect);
+   if (cancel_delayed_work_sync(&host->detect))
+   __pm_relax(host->ws);
mmc_flush_scheduled_work();
 
/* clear pm flags now and let card drivers set them as needed */
@@ -2628,7 +2645,8 @@ int mmc_suspend_host(struct mmc_host *host)
 {
int err = 0;
 
-   cancel_delayed_work(&host->detect);
+   if (cancel_delayed_work(&host->detect))
+   __pm_relax(host->ws);
mmc_flush_scheduled_work();
 
mmc_bus_get(host);
@@ -2741,7 +2759,8 @@ int mmc_pm_notify(struct notifier_block *notify_block,
spin_lock_irqsave(&host->lock, flags);
host->rescan_disable = 1;
spin_unlock_irqrestore(&host->lock, flags);
-   cancel_delayed_work_sync(&host->detect);
+   if (cancel_delayed_work_sync(&host->detect))
+   __pm_relax(host->ws);
 
if (!host->bus_ops || host->bus_ops->suspend)
break;
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 2a3593d..3cbb3d7 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@

Re: [RFC PATCHv2 1/2] drivers: power: Add watchdog timer to catch drivers which lockup during suspend/resume.

2013-06-05 Thread Zoran Markovic

Rafael,

>>> We could do cancel_work_sync() as a recovery, but that call blocks until the
>>> running async task is flushed, which might never happen. So doing a panic()
>>> is pretty much the only option for recovering.
>>
>> Well, its usefulness is quite limited, then.  That said I'm still not 
>> convinced
>> that this actually is the case.
>
> It does block in my environment, AFAICS. Looking a bit further in the
> code, it looks like dpm_suspend() does an async_synchronize_full()
> which would wait for all async tasks to complete. This is a
> show-stopper because (under the circumstances) the assumption that
> every async suspend routine eventually completes doesn't hold.
>
> We could possibly select which async tasks to wait for, but this would
> add unnecessary complexity to a feature targeted for debugging. It
> seems that this approach - although sounding reasonable - needs to
> wait until we have a mechanism to cancel an async task.

Looks like the implementation of proposal for an async suspend +
wait_for_completion_timeout is quite complex due to above limitations.
How do we proceed from here? We have the following options:
1. Give up on the idea of having a suspend/resume watchdog.
2. Use the timer implementation (with possible modifications).
3. Wait for the implementation of (or implement) killing of an already
running async work.

Are there any other ideas floating around?

Thanks,
Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv2 1/2] drivers: power: Add watchdog timer to catch drivers which lockup during suspend/resume.

2013-05-31 Thread Zoran Markovic

>> We could do cancel_work_sync() as a recovery, but that call blocks until the
>> running async task is flushed, which might never happen. So doing a panic()
>> is pretty much the only option for recovering.
>
> Well, its usefulness is quite limited, then.  That said I'm still not 
> convinced
> that this actually is the case.

It does block in my environment, AFAICS. Looking a bit further in the
code, it looks like dpm_suspend() does an async_synchronize_full()
which would wait for all async tasks to complete. This is a
show-stopper because (under the circumstances) the assumption that
every async suspend routine eventually completes doesn't hold.

We could possibly select which async tasks to wait for, but this would
add unnecessary complexity to a feature targeted for debugging. It
seems that this approach - although sounding reasonable - needs to
wait until we have a mechanism to cancel an async task.

Regards, Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC PATCHv2 1/2] drivers: power: Add watchdog timer to catch drivers which lockup during suspend/resume.

2013-05-28 Thread Zoran Markovic

> What about this:
>  - Add one more list_head to struct dev_pm_info.
>  - Make dpm_prepare() create a new list for the next steps instead of moving
>devices out of dpm_list.
>  - Start an async work to carry out dpm_suspend() and make the main thread
>do wait_for_completion_timeout() for every device in dpm_list (in the
>reverse order).
>  - If it times out, mark the device in question as unusable, possibly resume
>the already suspended devices (except for descendants of the failed one)
>and abort the suspend.  Return a specific error code to user space so that
>it knows what happened.  [You can make this step configurable to BUG()
>instead of doing all those things if you think that will be more useful for
>platforms you care about.]
>  - Disable future suspends.
> And analogously for resume.
>
> That should allow people to investigate what happened on a system that
> (hopefully) is not completely dead and you still can have your "reboot if
> suspend hangs" feature if you like.

I looked into implementing this. The problem that I encountered is
that there is no reliable way of canceling an async task, and hence
the asynchronous __device_suspend() would be left racing with a
recovery from a suspend timeout. We could do cancel_work_sync() as a
recovery, but that call blocks until the running async task is
flushed, which might never happen. So doing a panic() is pretty much
the only option for recovering.
- Zoran
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv2] power: Add option to log time spent in suspend

2013-05-21 Thread Zoran Markovic

From: Colin Cross 

Below is a patch from android kernel that maintains a histogram of
suspend times. Please review and provide feedback.

Statistices on the time spent in suspend are kept in
/sys/kernel/debug/sleep_time.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Thomas Gleixner 
Signed-off-by: Colin Cross 
Signed-off-by: Todd Poynor 
[zoran.marko...@linaro.org: Re-formatted suspend time table to better fit
expected values. Moved accounting of suspend time into timekeeping core.
Removed CONFIG_SUSPEND_TIME flag and made the feature conditional on
CONFIG_DEBUG_FS. Changed the file name to sleep_time to better fit terminology
in timekeeping core. Changed seq_printf to seq_puts. Tweaked commit message]
Signed-off-by: Zoran Markovic 
---
 kernel/time/Makefile   |1 +
 kernel/time/timekeeping.c  |2 +
 kernel/time/timekeeping_debug.c|   72 
 kernel/time/timekeeping_internal.h |   14 +++
 4 files changed, 89 insertions(+)
 create mode 100644 kernel/time/timekeeping_debug.c
 create mode 100644 kernel/time/timekeeping_internal.h

diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ff7d9d2..d52ac8b 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += 
tick-broadcast.o
 obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
 obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
 obj-$(CONFIG_TIMER_STATS)  += timer_stats.o
+obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index baeeb5c..e7e2f05 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -25,6 +25,7 @@
 
 #include "tick-internal.h"
 #include "ntp_internal.h"
+#include "timekeeping_internal.h"
 
 static struct timekeeper timekeeper;
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@ -841,6 +842,7 @@ static void __timekeeping_inject_sleeptime(struct 
timekeeper *tk,
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
+   tk_debug_account_sleep_time(delta);
 }
 
 /**
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
new file mode 100644
index 000..802433a
--- /dev/null
+++ b/kernel/time/timekeeping_debug.c
@@ -0,0 +1,72 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static unsigned int sleep_time_bin[32] = {0};
+
+static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
+{
+   unsigned int bin;
+   seq_puts(s, "  time (secs)count\n");
+   seq_puts(s, "--\n");
+   for (bin = 0; bin < 32; bin++) {
+   if (sleep_time_bin[bin] == 0)
+   continue;
+   seq_printf(s, "%10u - %-10u %4u\n",
+   bin ? 1 << (bin - 1) : 0, 1 << bin,
+   sleep_time_bin[bin]);
+   }
+   return 0;
+}
+
+static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
+{
+   return single_open(file, tk_debug_show_sleep_time, NULL);
+}
+
+static const struct file_operations tk_debug_sleep_time_fops = {
+   .open   = tk_debug_sleep_time_open,
+   .read   = seq_read,
+   .llseek = seq_lseek,
+   .release= single_release,
+};
+
+static int __init tk_debug_sleep_time_init(void)
+{
+   struct dentry *d;
+
+   d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
+   &tk_debug_sleep_time_fops);
+   if (!d) {
+   pr_err("Failed to create sleep_time debug file\n");
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+late_initcall(tk_debug_sleep_time_init);
+
+void tk_debug_account_sleep_time(struct timespec *t)
+{
+   sleep_time_bin[fls(t->tv_sec)]++;
+}
+
diff --git a/kernel/time/timekeeping_internal.h 
b/kernel/time/timekeeping_internal.h
new file mode 100644
index 000..13323ea
--- /dev/null
+++ b/kernel/time/timekeeping_internal.h
@@ -0,0 +1

[RFC PATCH] timekeeping: Correct run-time detection of real-time clock.

2013-05-17 Thread Zoran Markovic

Since commit <31ade30692dc9680bfc95700d794818fa3f754ac>, timekeeping_init()
checks for presence of persistent clock by attempting to read a non-zero
time value from real-time clock. This is an issue on platforms where
persistent_clock (instead of a RTC) is implemented as a free-running counter
starting from zero on each boot and running during suspend. Examples are some
ARM platforms (e.g. PandaBoard). An attempt to read such a clock during
timekeeping_init() may return zero value and falsely declare persistent clock
as missing. Additionally, in the above case suspend times may be accounted
twice (once from timekeeping_resume() and once from rtc_resume()), resulting
in a gradual drift of system time.

This patch does a run-time correction of the issue by doing the same check
during timekeeping_suspend().

A better long-term solution would have to return error when trying to read
non-existing clock and zero when trying to read an uninitialized clock, but
that would require changing all persistent_clock implementations.

This patch addresses the immediate breakage, for now.

Cc: John Stultz 
Cc: Thomas Gleixner 
Cc: Feng Tang 
Cc: sta...@vger.kernel.org
Signed-off-by: Zoran Markovic 
---
 kernel/time/timekeeping.c |8 
 1 file changed, 8 insertions(+)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 98cd470..baeeb5c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -975,6 +975,14 @@ static int timekeeping_suspend(void)
 
read_persistent_clock(&timekeeping_suspend_time);
 
+   /*
+* On some systems the persistent_clock can not be detected at
+* timekeeping_init by its return value, so if we see a valid
+* value returned, update the persistent_clock_exists flag.
+*/
+   if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
+   persistent_clock_exist = true;
+
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&timekeeper_seq);
timekeeping_forward_now(tk);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCHv2 1/2] drivers: power: Add watchdog timer to catch drivers which lockup during suspend/resume.

2013-05-10 Thread Zoran Markovic

From: Benoit Goby 

Below is a patch from android kernel that detects a driver suspend/resume
lockup and captures dump in the kernel log. Please review and provide
comments.

Rather than hard-lock the kernel, dump the suspend/resume thread stack and
BUG() when a driver takes too long to suspend/resume.  The timeout is set to
12 seconds to be longer than the usbhid 10 second timeout.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot and catch user's attention. Information
about failing device can later be retrieved from captured log in
subsequent boot session.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message. Minor changes to merge code into kernel tip.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   66 +
 1 file changed, 66 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 5a9b656..a6a02c0 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -54,6 +56,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+struct dpm_watchdog {
+   struct device   *dev;
+   struct task_struct  *tsk;
+   struct timer_list   timer;
+};
+
 static int async_error;
 
 /**
@@ -384,6 +392,56 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+/**
+ * dpm_wd_handler - Driver suspend / resume watchdog handler.
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so BUG() out for
+ * a crash-dump
+ */
+static void dpm_wd_handler(unsigned long data)
+{
+   struct dpm_watchdog *wd = (void *)data;
+   struct device *dev  = wd->dev;
+   struct task_struct *tsk = wd->tsk;
+
+   dev_emerg(dev, " DPM device timeout \n");
+   show_stack(tsk, NULL);
+
+   BUG();
+}
+
+/**
+ * dpm_wd_set - Enable pm watchdog for given device.
+ * @wd: Watchdog. Must be allocated on the stack.
+ * @dev: Device to handle.
+ */
+static void dpm_wd_set(struct dpm_watchdog *wd, struct device *dev)
+{
+   struct timer_list *timer = &wd->timer;
+
+   wd->dev = dev;
+   wd->tsk = get_current();
+
+   init_timer_on_stack(timer);
+   timer->expires = jiffies + HZ * 12;
+   timer->function = dpm_wd_handler;
+   timer->data = (unsigned long)wd;
+   add_timer(timer);
+}
+
+/**
+ * dpm_wd_clear - Disable pm watchdog.
+ * @wd: Watchdog to disable.
+ */
+static void dpm_wd_clear(struct dpm_watchdog *wd)
+{
+   struct timer_list *timer = &wd->timer;
+
+   del_timer_sync(timer);
+   destroy_timer_on_stack(timer);
+}
+
 /*- Resume routines -*/
 
 /**
@@ -570,6 +628,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct dpm_watchdog wd;
 
TRACE_DEVICE(dev);
TRACE_RESUME(0);
@@ -585,6 +644,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 * a resumed device, even if the device hasn't been completed yet.
 */
dev->power.is_prepared = false;
+   dpm_wd_set(&wd, dev);
 
if (!dev->power.is_suspended)
goto Unlock;
@@ -636,6 +696,7 @@ static int device_resume(struct device *dev, pm_message_t 
state, bool async)
 
  Unlock:
device_unlock(dev);
+   dpm_wd_clear(&wd);
 
  Complete:
complete_all(&dev->power.completion);
@@ -1053,6 +1114,7 @@ static int __device_suspend(struct device *dev, 
pm_message_t state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct dpm_watchdog wd;
 
dpm_wait_for_children(dev, async);
 
@@ -1076,6 +1138,8 @@ static int __device_suspend(struct device *dev, 
pm_message_t state

[RFC PATCHv2 2/2] PM: compile-time configuration of device suspend/resume watchdogs.

2013-05-10 Thread Zoran Markovic

Power management debug option to configure device suspend/resume watchdogs.
Available options are:
  1. Enable/disable the feature.
  2. Select triggered watchdog action between:
- system panic (default)
- dump stacktrace
- log event
  3. Select timeout value for the watchdog(s).

Minor changes were made to watchdog code to accommodate this feature.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   37 ++
 kernel/power/Kconfig  |   48 +
 2 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index a6a02c0..8e0bb33 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -392,6 +392,26 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
return error;
 }
 
+#ifdef CONFIG_DPM_WD
+/**
+ * dpm_wd_action - recovery from suspend/resume watchdog timeout
+ * @wd: Watchdog. Must be allocated on the stack.
+ */
+#if defined(CONFIG_DPM_WD_ACTION_STACKTRACE)
+static inline void dpm_wd_action(struct dpm_watchdog *wd)
+{
+   show_stack(wd->tsk, NULL);
+}
+#elif defined(CONFIG_DPM_WD_ACTION_PANIC)
+static inline void dpm_wd_action(struct dpm_watchdog *wd)
+{
+   panic("%s: unrecoverable failure\n", dev_name(wd->dev));
+}
+#else /* CONFIG_DPM_WD_ACTION_LOG */
+/* event already logged in dpm_wd_handler() */
+#define dpm_wd_action(x)
+#endif
+
 /**
  * dpm_wd_handler - Driver suspend / resume watchdog handler.
  *
@@ -402,13 +422,9 @@ static int dpm_run_callback(pm_callback_t cb, struct 
device *dev,
 static void dpm_wd_handler(unsigned long data)
 {
struct dpm_watchdog *wd = (void *)data;
-   struct device *dev  = wd->dev;
-   struct task_struct *tsk = wd->tsk;
-
-   dev_emerg(dev, " DPM device timeout \n");
-   show_stack(tsk, NULL);
 
-   BUG();
+   dev_emerg(wd->dev, " DPM device timeout \n");
+   dpm_wd_action(wd);
 }
 
 /**
@@ -424,14 +440,15 @@ static void dpm_wd_set(struct dpm_watchdog *wd, struct 
device *dev)
wd->tsk = get_current();
 
init_timer_on_stack(timer);
-   timer->expires = jiffies + HZ * 12;
+   /* use same timeout value for both suspend and resume */
+   timer->expires = jiffies + HZ * CONFIG_DPM_WD_TIMEOUT;
timer->function = dpm_wd_handler;
timer->data = (unsigned long)wd;
add_timer(timer);
 }
 
 /**
- * dpm_wd_clear - Disable pm watchdog.
+ * dpm_wd_clear - Disable suspend/resume watchdog.
  * @wd: Watchdog to disable.
  */
 static void dpm_wd_clear(struct dpm_watchdog *wd)
@@ -441,6 +458,10 @@ static void dpm_wd_clear(struct dpm_watchdog *wd)
del_timer_sync(timer);
destroy_timer_on_stack(timer);
 }
+#else
+#define dpm_wd_set(x, y)
+#define dpm_wd_clear(x)
+#endif
 
 /*- Resume routines -*/
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index edc8bdd..339caa1 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -179,6 +179,54 @@ config PM_SLEEP_DEBUG
def_bool y
depends on PM_DEBUG && PM_SLEEP
 
+config DPM_WD
+   bool "Device suspend/resume watchdog"
+   depends on PM_DEBUG
+   ---help---
+ Sets up a watchdog timer to capture drivers that are
+ locked up attempting to suspend/resume a device.
+ A detected lockup causes a configurable watchdog action,
+ such as logging the event, dumping the stack trace or
+ kernel panic.
+
+choice
+   prompt "Watchdog recovery action"
+   default DPM_WD_ACTION_PANIC
+   depends on DPM_WD
+   ---help---
+ Select recovery action triggered by suspend/resume watchdog.
+
+config DPM_WD_ACTION_PANIC
+   bool "System panic"
+   ---help---
+ When selected, a lockup during device's suspend or
+ resume would cause a system panic. This would immediately 
+ capture user's attention. Panic message can be observed in 
+ subsequent boot session using pstore.
+
+config DPM_WD_ACTION_STACKTRACE
+   bool "Dump stack"
+   ---help---
+ When selected, a lockup during device's suspend or
+ resume would cause the caller's stack to be
+ captured in the system log. The stack trace shows
+ which driver call caused a lockup.
+
+config DPM_WD_ACTION_LOG
+   bool "Log event"
+   ---help---
+ When selected, a lockup during device's suspend or
+ resume would cause the watchdog timeout event to be
+ logged in the system log.
+
+endchoice
+
+conf

[RFC PATCHv2 0/2] power: device suspend/resume watchdog

2013-05-10 Thread Zoran Markovic

Hi all,
Attached are two patches addressing comments on the implementation
of device suspend (and resume) watchdogs from the android kernel. I have
squashed changes for the suspend and resume watchdogs as they address
pretty much the same functionality, and also added compile-time
configurability of the watchdogs.

Please be kind to review and comment if it is ready for upstreaming.

Best regards,
Zoran

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 

Benoit Goby (1):
  drivers: power: Add watchdog timer to catch drivers which lockup
during suspend/resume.

Zoran Markovic (1):
  PM: compile-time configuration of device suspend/resume watchdogs.

 drivers/base/power/main.c |   87 +
 kernel/power/Kconfig  |   48 +
 2 files changed, 135 insertions(+)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH] power: Add option to log time spent in suspend

2013-04-30 Thread Zoran Markovic

From: Colin Cross 

Below is a patch from android kernel that maintains a histogram of
suspend times. Please review and provide feedback.

Prints the time spent in suspend in the kernel log, and keeps statistics
on the time spent in suspend in /sys/kernel/debug/suspend_time

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Signed-off-by: Colin Cross 
Signed-off-by: Todd Poynor 
[zoran.marko...@linaro.org: Re-formatted suspend time table to better fit
expected values, tweaked commit message]
Signed-off-by: Zoran Markovic 
---
 kernel/power/Kconfig|7 +++
 kernel/power/Makefile   |1 +
 kernel/power/suspend_time.c |  111 +++
 3 files changed, 119 insertions(+)
 create mode 100644 kernel/power/suspend_time.c

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5dfdc9e..edc8bdd 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -274,3 +274,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
 config CPU_PM
bool
depends on SUSPEND || CPU_IDLE
+
+config SUSPEND_TIME
+   bool "Log time spent in suspend"
+   ---help---
+ Prints the time spent in suspend in the kernel log, and
+ keeps statistics on the time spent in suspend in
+ /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bf..578e20e 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o 
swap.o user.o \
   block_io.o
 obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
 obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
+obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)  += poweroff.o
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 000..a613ede
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+   unsigned int bin;
+   seq_printf(s, "  time (secs)count\n");
+   seq_printf(s, "--\n");
+   for (bin = 0; bin < 32; bin++) {
+   if (time_in_suspend_bins[bin] == 0)
+   continue;
+   seq_printf(s, "%10u - %-10u %4u\n",
+   bin ? 1 << (bin - 1) : 0, 1 << bin,
+   time_in_suspend_bins[bin]);
+   }
+   return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+   return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+   .open   = suspend_time_debug_open,
+   .read   = seq_read,
+   .llseek = seq_lseek,
+   .release= single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+   struct dentry *d;
+
+   d = debugfs_create_file("suspend_time", 0444, NULL, NULL,
+   &suspend_time_debug_fops);
+   if (!d) {
+   pr_err("Failed to create suspend_time debug file\n");
+   return -ENOMEM;
+   }
+
+   return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+   read_persistent_clock(&suspend_time_before);
+
+   return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+   struct timespec after;
+
+   read_persistent_clock(&after);
+
+   after = timespec_sub(after, suspend_time_before);
+
+   time_in_suspend_bins[fls(after.tv_sec)]++;
+
+   pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+   after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+   .suspend = suspend_time_syscore_suspend,
+   .resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)

[RFC PATCH] drivers: power: Add watchdog timer to catch drivers which lockup during suspend.

2013-04-30 Thread Zoran Markovic

From: Benoit Goby 

Below is a patch from android kernel that detects a driver suspend
lockup and captures dump in the kernel log. Please review and provide
comments.

Rather than hard-lock the kernel, dump the suspend thread stack and
BUG() when a driver takes too long to suspend.  The timeout is set to
12 seconds to be longer than the usbhid 10 second timeout.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

Cc: Android Kernel Team 
Cc: Colin Cross 
Cc: Todd Poynor 
Cc: San Mehat 
Cc: Benoit Goby 
Cc: John Stultz 
Cc: Pavel Machek 
Cc: Rafael J. Wysocki 
Cc: Len Brown 
Cc: Greg Kroah-Hartman 
Original-author: San Mehat 
Signed-off-by: Benoit Goby 
[zoran.marko...@linaro.org: Changed printk(KERN_EMERG,...) to pr_emerg(...),
tweaked commit message.]
Signed-off-by: Zoran Markovic 
---
 drivers/base/power/main.c |   45 +
 1 file changed, 45 insertions(+)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 15beb50..eb70c0e 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -29,6 +29,8 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "../base.h"
 #include "power.h"
 
@@ -54,6 +56,12 @@ struct suspend_stats suspend_stats;
 static DEFINE_MUTEX(dpm_list_mtx);
 static pm_message_t pm_transition;
 
+static void dpm_drv_timeout(unsigned long data);
+struct dpm_drv_wd_data {
+   struct device *dev;
+   struct task_struct *tsk;
+};
+
 static int async_error;
 
 /**
@@ -663,6 +671,30 @@ static bool is_async(struct device *dev)
 }
 
 /**
+ * dpm_drv_timeout - Driver suspend / resume watchdog handler
+ * @data: struct device which timed out
+ *
+ * Called when a driver has timed out suspending or resuming.
+ * There's not much we can do here to recover so
+ * BUG() out for a crash-dump
+ *
+ */
+static void dpm_drv_timeout(unsigned long data)
+{
+   struct dpm_drv_wd_data *wd_data = (void *)data;
+   struct device *dev = wd_data->dev;
+   struct task_struct *tsk = wd_data->tsk;
+
+   pr_emerg(" DPM device timeout: %s (%s)\n", dev_name(dev),
+   (dev->driver ? dev->driver->name : "no driver"));
+
+   pr_emerg("dpm suspend stack:\n");
+   show_stack(tsk, NULL);
+
+   BUG();
+}
+
+/**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
  *
@@ -1053,6 +1085,8 @@ static int __device_suspend(struct device *dev, 
pm_message_t state, bool async)
pm_callback_t callback = NULL;
char *info = NULL;
int error = 0;
+   struct timer_list timer;
+   struct dpm_drv_wd_data data;
 
dpm_wait_for_children(dev, async);
 
@@ -1076,6 +1110,14 @@ static int __device_suspend(struct device *dev, 
pm_message_t state, bool async)
if (dev->power.syscore)
goto Complete;
 
+   data.dev = dev;
+   data.tsk = get_current();
+   init_timer_on_stack(&timer);
+   timer.expires = jiffies + HZ * 12;
+   timer.function = dpm_drv_timeout;
+   timer.data = (unsigned long)&data;
+   add_timer(&timer);
+
device_lock(dev);
 
if (dev->pm_domain) {
@@ -1131,6 +1173,9 @@ static int __device_suspend(struct device *dev, 
pm_message_t state, bool async)
 
device_unlock(dev);
 
+   del_timer_sync(&timer);
+   destroy_timer_on_stack(&timer);
+
  Complete:
complete_all(&dev->power.completion);
if (error)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

63 matches

Mail list logo