Re: [PATCH v2 0/3] move slabinfo processing to common code

2012-10-19 Thread Glauber Costa
On 09/28/2012 07:03 PM, Glauber Costa wrote:
 Hi,
 
 This patch moves on with the slab caches commonization, by moving
 the slabinfo processing to common code in slab_common.c. It only touches
 slub and slab, since slob doesn't create that file, which is protected
 by a Kconfig switch.
 
 Enjoy,
 
 v2: return objects per slab and cache order in slabinfo structure as well
 
Hi

Any activity here ?

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] ARM: dts: Update board files for pwm support

2012-10-19 Thread Tony Prisk
This patch adds pwm support to arch-vt8500 board files, and adds
the use-case of pwm-backlight.

Signed-off-by: Tony Prisk li...@prisktech.co.nz
---
 arch/arm/boot/dts/vt8500-bv07.dts |8 
 arch/arm/boot/dts/vt8500.dtsi |   29 +
 arch/arm/boot/dts/wm8505-ref.dts  |8 
 arch/arm/boot/dts/wm8505.dtsi |   29 +
 arch/arm/boot/dts/wm8650-mid.dts  |8 
 arch/arm/boot/dts/wm8650.dtsi |   17 +
 6 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/vt8500-bv07.dts 
b/arch/arm/boot/dts/vt8500-bv07.dts
index 567cf4e..3cba367 100644
--- a/arch/arm/boot/dts/vt8500-bv07.dts
+++ b/arch/arm/boot/dts/vt8500-bv07.dts
@@ -33,4 +33,12 @@
};
};
};
+
+   backlight {
+   compatible = pwm-backlight;
+   pwms = pwm 0 5;
+
+   brightness-levels = 0 4 8 16 32 64 128 255;
+   default-brightness-level = 5;
+   };
 };
diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi
index d8645e9..e196b2e 100644
--- a/arch/arm/boot/dts/vt8500.dtsi
+++ b/arch/arm/boot/dts/vt8500.dtsi
@@ -40,14 +40,43 @@
#address-cells = 1;
#size-cells = 0;
 
+   ref25: ref25M {
+   #clock-cells = 0;
+   compatible = fixed-clock;
+   clock-frequency = 2500;
+   };
+
ref24: ref24M {
#clock-cells = 0;
compatible = fixed-clock;
clock-frequency = 2400;
};
+
+   pllb: pllb {
+   #clock-cells = 0;
+   compatible = via,vt8500-pll-clock;
+   clocks = ref25;
+   reg = 0x204;
+   };
+
+   clkpwm: pwm {
+   #clock-cells = 0;
+   compatible = via,vt8500-device-clock;
+   clocks = pllb;
+   divisor-reg = 0x348;
+   enable-reg = 0x250;
+   enable-bit = 14;
+   };
};
};
 
+   pwm: pwm@d822 {
+   #pwm-cells = 2;
+   compatible = via,vt8500-pwm;
+   reg = 0xd822 0x100;
+   clocks = clkpwm;
+   };
+
timer@d8130100 {
compatible = via,vt8500-timer;
reg = 0xd8130100 0x28;
diff --git a/arch/arm/boot/dts/wm8505-ref.dts b/arch/arm/boot/dts/wm8505-ref.dts
index fd4e248..f51c0ed 100644
--- a/arch/arm/boot/dts/wm8505-ref.dts
+++ b/arch/arm/boot/dts/wm8505-ref.dts
@@ -33,4 +33,12 @@
};
};
};
+
+   backlight {
+   compatible = pwm-backlight;
+   pwms = pwm 0 5;
+
+   brightness-levels = 0 4 8 16 32 64 128 255;
+   default-brightness-level = 5;
+   };
 };
diff --git a/arch/arm/boot/dts/wm8505.dtsi b/arch/arm/boot/dts/wm8505.dtsi
index b459691..83c8ec5 100644
--- a/arch/arm/boot/dts/wm8505.dtsi
+++ b/arch/arm/boot/dts/wm8505.dtsi
@@ -54,14 +54,43 @@
#address-cells = 1;
#size-cells = 0;
 
+   ref25: ref25M {
+   #clock-cells = 0;
+   compatible = fixed-clock;
+   clock-frequency = 2500;
+   };
+
ref24: ref24M {
#clock-cells = 0;
compatible = fixed-clock;
clock-frequency = 2400;
};
+
+   pllb: pllb {
+   #clock-cells = 0;
+   compatible = via,vt8500-pll-clock;
+   clocks = ref25;
+   reg = 0x204;
+   };
+
+   clkpwm: pwm {
+   #clock-cells = 0;
+   compatible = via,vt8500-device-clock;
+   

Re: [PATCH 1/10] memory-hotplug : check whether memory is offline or not when removing memory

2012-10-19 Thread Wen Congyang
At 10/06/2012 03:27 AM, KOSAKI Motohiro Wrote:
 On Thu, Oct 4, 2012 at 10:25 PM, Yasuaki Ishimatsu
 isimatu.yasu...@jp.fujitsu.com wrote:
 When calling remove_memory(), the memory should be offline. If the function
 is used to online memory, kernel panic may occur.

 So the patch checks whether memory is offline or not.
 
 You don't explain WHY we need the check.

This patch is no necessary now, because the newest kernel has checked
it.

Thanks
Wen Congyang

 
 
 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 Signed-off-by: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
  drivers/base/memory.c  |   39 +++
  include/linux/memory.h |5 +
  mm/memory_hotplug.c|   17 +++--
  3 files changed, 59 insertions(+), 2 deletions(-)

 Index: linux-3.6/drivers/base/memory.c
 ===
 --- linux-3.6.orig/drivers/base/memory.c2012-10-04 
 14:22:57.0 +0900
 +++ linux-3.6/drivers/base/memory.c 2012-10-04 14:45:46.653585860 +0900
 @@ -70,6 +70,45 @@ void unregister_memory_isolate_notifier(
  }
  EXPORT_SYMBOL(unregister_memory_isolate_notifier);

 +bool is_memblk_offline(unsigned long start, unsigned long size)
 
 Don't use memblk. Usually memblk mean struct numa_meminfo for x86/numa.
 Maybe memory_range_offlined() is better.
 
 And, this function don't take struct memory_block, then this file may be no 
 good
 place.
 
 And you need to write down function comment.
 
 
 +{
 +   struct memory_block *mem = NULL;
 +   struct mem_section *section;
 +   unsigned long start_pfn, end_pfn;
 +   unsigned long pfn, section_nr;
 +
 +   start_pfn = PFN_DOWN(start);
 +   end_pfn = PFN_UP(start + size);
 +
 +   for (pfn = start_pfn; pfn  end_pfn; pfn += PAGES_PER_SECTION) {
 +   section_nr = pfn_to_section_nr(pfn);
 +   if (!present_section_nr(section_nr))
 +   continue;
 +
 +   section = __nr_to_section(section_nr);
 +   /* same memblock? */
 +   if (mem)
 +   if ((section_nr = mem-start_section_nr) 
 +   (section_nr = mem-end_section_nr))
 +   continue;
 +
 +   mem = find_memory_block_hinted(section, mem);
 +   if (!mem)
 +   continue;
 +   if (mem-state == MEM_OFFLINE)
 +   continue;
 +
 +   kobject_put(mem-dev.kobj);
 +   return false;
 +   }
 +
 +   if (mem)
 +   kobject_put(mem-dev.kobj);
 +
 +   return true;
 +}
 +EXPORT_SYMBOL(is_memblk_offline);
 +
  /*
   * register_memory - Setup a sysfs device for a memory block
   */
 Index: linux-3.6/include/linux/memory.h
 ===
 --- linux-3.6.orig/include/linux/memory.h   2012-10-02 
 18:00:22.0 +0900
 +++ linux-3.6/include/linux/memory.h2012-10-04 14:44:40.902581028 +0900
 @@ -106,6 +106,10 @@ static inline int memory_isolate_notify(
  {
 return 0;
  }
 +static inline bool is_memblk_offline(unsigned long start, unsigned long 
 size)
 +{
 +   return false;
 +}
  #else
  extern int register_memory_notifier(struct notifier_block *nb);
  extern void unregister_memory_notifier(struct notifier_block *nb);
 @@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigne
  extern struct memory_block *find_memory_block_hinted(struct mem_section *,
 struct memory_block 
 *);
  extern struct memory_block *find_memory_block(struct mem_section *);
 +extern bool is_memblk_offline(unsigned long start, unsigned long size);
  #define CONFIG_MEM_BLOCK_SIZE  (PAGES_PER_SECTIONPAGE_SHIFT)
  enum mem_add_context { BOOT, HOTPLUG };
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 Index: linux-3.6/mm/memory_hotplug.c
 ===
 --- linux-3.6.orig/mm/memory_hotplug.c  2012-10-04 14:31:08.0 +0900
 +++ linux-3.6/mm/memory_hotplug.c   2012-10-04 14:58:22.449687986 +0900
 @@ -1045,8 +1045,21 @@ int offline_memory(u64 start, u64 size)

  int remove_memory(int nid, u64 start, u64 size)
  {
 
 Your remove_memory() don't remove anything. that's strange.
 
 
 -   /* It is not implemented yet*/
 -   return 0;
 +   int ret = 0;
 +   lock_memory_hotplug();
 +   /*
 +* The memory might become online by other task, even if you offine 
 it.
 +* So we check whether the memory has been onlined or not.
 +*/
 +   if (!is_memblk_offline(start, size)) {
 +

[PATCH 2/3] PWM: vt8500: Update vt8500 PWM driver support

2012-10-19 Thread Tony Prisk
This patch updates pwm-vt8500.c to support devicetree probing and
make use of the common clock subsystem.

Signed-off-by: Tony Prisk li...@prisktech.co.nz
---
 drivers/pwm/pwm-vt8500.c |   79 ++
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index ad14389..c2a71ee 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -1,7 +1,8 @@
 /*
  * drivers/pwm/pwm-vt8500.c
  *
- *  Copyright (C) 2010 Alexey Charkov alch...@gmail.com
+ * Copyright (C) 2012 Tony Prisk li...@prisktech.co.nz
+ * Copyright (C) 2010 Alexey Charkov alch...@gmail.com
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -21,14 +22,24 @@
 #include linux/io.h
 #include linux/pwm.h
 #include linux/delay.h
+#include linux/clk.h
 
 #include asm/div64.h
 
-#define VT8500_NR_PWMS 4
+#include linux/of.h
+#include linux/of_device.h
+#include linux/of_address.h
+
+/*
+ * SoC architecture allocates register space for 4 PWMs but only
+ * 2 are currently implemented.
+ */
+#define VT8500_NR_PWMS 2
 
 struct vt8500_chip {
struct pwm_chip chip;
void __iomem *base;
+   struct clk *clk;
 };
 
 #define to_vt8500_chip(chip)   container_of(chip, struct vt8500_chip, chip)
@@ -52,7 +63,7 @@ static int vt8500_pwm_config(struct pwm_chip *chip, struct 
pwm_device *pwm,
unsigned long long c;
unsigned long period_cycles, prescale, pv, dc;
 
-   c = 2500/2; /* wild guess --- need to implement clocks */
+   c = clk_get_rate(vt8500-clk);
c = c * period_ns;
do_div(c, 10);
period_cycles = c;
@@ -107,12 +118,22 @@ static struct pwm_ops vt8500_pwm_ops = {
.owner = THIS_MODULE,
 };
 
-static int __devinit pwm_probe(struct platform_device *pdev)
+static const struct of_device_id vt8500_pwm_dt_ids[] = {
+   { .compatible = via,vt8500-pwm, },
+   { /* Sentinel */ }
+};
+
+static int __devinit vt8500_pwm_probe(struct platform_device *pdev)
 {
struct vt8500_chip *chip;
-   struct resource *r;
+   struct device_node *np = pdev-dev.of_node;
int ret;
 
+   if (!np) {
+   dev_err(pdev-dev, invalid devicetree node\n);
+   return -EINVAL;
+   }
+
chip = devm_kzalloc(pdev-dev, sizeof(*chip), GFP_KERNEL);
if (chip == NULL) {
dev_err(pdev-dev, failed to allocate memory\n);
@@ -123,26 +144,32 @@ static int __devinit pwm_probe(struct platform_device 
*pdev)
chip-chip.ops = vt8500_pwm_ops;
chip-chip.base = -1;
chip-chip.npwm = VT8500_NR_PWMS;
+   chip-clk = of_clk_get(np, 0);
 
-   r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-   if (r == NULL) {
-   dev_err(pdev-dev, no memory resource defined\n);
-   return -ENODEV;
+   if (!chip-clk) {
+   dev_err(pdev-dev, clock source not specified\n);
+   return -EINVAL;
}
 
-   chip-base = devm_request_and_ioremap(pdev-dev, r);
-   if (chip-base == NULL)
+   chip-base = of_iomap(np, 0);
+   if (!chip-base) {
+   dev_err(pdev-dev, memory resource not available\n);
return -EADDRNOTAVAIL;
+   }
+
+   clk_prepare_enable(chip-clk);
 
ret = pwmchip_add(chip-chip);
-   if (ret  0)
+   if (ret  0) {
+   dev_err(pdev-dev, failed to add pwmchip\n);
return ret;
+   }
 
platform_set_drvdata(pdev, chip);
return ret;
 }
 
-static int __devexit pwm_remove(struct platform_device *pdev)
+static int __devexit vt8500_pwm_remove(struct platform_device *pdev)
 {
struct vt8500_chip *chip;
 
@@ -150,28 +177,24 @@ static int __devexit pwm_remove(struct platform_device 
*pdev)
if (chip == NULL)
return -ENODEV;
 
+   clk_disable_unprepare(chip-clk);
+
return pwmchip_remove(chip-chip);
 }
 
-static struct platform_driver pwm_driver = {
+static struct platform_driver vt8500_pwm_driver = {
+   .probe  = vt8500_pwm_probe,
+   .remove = __devexit_p(vt8500_pwm_remove),
.driver = {
.name   = vt8500-pwm,
.owner  = THIS_MODULE,
+   .of_match_table = vt8500_pwm_dt_ids,
},
-   .probe  = pwm_probe,
-   .remove = __devexit_p(pwm_remove),
 };
 
-static int __init pwm_init(void)
-{
-   return platform_driver_register(pwm_driver);
-}
-arch_initcall(pwm_init);
-
-static void __exit pwm_exit(void)
-{
-   platform_driver_unregister(pwm_driver);
-}
-module_exit(pwm_exit);
+module_platform_driver(vt8500_pwm_driver);
 
-MODULE_LICENSE(GPL);
+MODULE_DESCRIPTION(VT8500 PWM Driver);
+MODULE_AUTHOR(Tony Prisk li...@prisktech.co.nz);
+MODULE_LICENSE(GPL v2);
+MODULE_DEVICE_TABLE(of, vt8500_pwm_dt_ids);
-- 
1.7.9.5

--

[PATCH v3 resend] USB: PHY: Re-organize Tegra USB PHY driver

2012-10-19 Thread Venu Byravarasu
NVIDIA produces several Tegra SoCs viz Tegra20, Tegra30 etc.
In order to support USB PHY drivers on these SoCs, existing
PHY driver is split into SoC agnostic common USB PHY driver
and Tegra20-specific USB phy driver. This will facilitate
easy addition and deletion of phy drivers for Tegra SoCs.

Signed-off-by: Venu Byravarasu vbyravar...@nvidia.com
---
delta from v3 resend:
For unknown reasons, email-id in signed-off-by got corrupted.
Hence re-sending the patch, after fixing it.

delta from v3:
Rebased the v3 changes on top of xceiv branch.

delta from v2:
Added an if condition to check for device_node to be not NULL,
before dereferencing it.


 drivers/usb/host/ehci-tegra.c  |   20 +-
 drivers/usb/phy/Makefile   |1 +
 .../usb/phy/{tegra_usb_phy.c = tegra2_usb_phy.c}  |  372 ++-
 drivers/usb/phy/tegra2_usb_phy.h   |  178 +
 drivers/usb/phy/tegra_usb_phy.c|  725 +---
 include/linux/usb/tegra_usb_phy.h  |   34 +-
 6 files changed, 304 insertions(+), 1026 deletions(-)
 copy drivers/usb/phy/{tegra_usb_phy.c = tegra2_usb_phy.c} (57%)
 create mode 100644 drivers/usb/phy/tegra2_usb_phy.h

diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 6223d17..e08aea3 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -618,6 +618,9 @@ static int tegra_ehci_probe(struct platform_device *pdev)
int err = 0;
int irq;
int instance = pdev-id;
+   struct device_node *np = pdev-dev.of_node;
+   struct phy_params params;
+   int phy_type;
 
pdata = pdev-dev.platform_data;
if (!pdata) {
@@ -706,9 +709,22 @@ static int tegra_ehci_probe(struct platform_device *pdev)
}
}
 
+   phy_type = of_property_match_string(np, phy_type, utmi);
+   if (phy_type = 0)
+   params.type = TEGRA_USB_PHY_TYPE_UTMI;
+   else {
+   phy_type = of_property_match_string(np, phy_type, ulpi);
+   if (phy_type = 0)
+   params.type = TEGRA_USB_PHY_TYPE_ULPI;
+   else
+   params.type = TEGRA_USB_PHY_TYPE_INVALID;
+   }
+
+   params.mode = TEGRA_USB_PHY_MODE_HOST;
+   params.config = pdata-phy_config;
+
tegra-phy = tegra_usb_phy_open(pdev-dev, instance, hcd-regs,
-   pdata-phy_config,
-   TEGRA_USB_PHY_MODE_HOST);
+   params);
if (IS_ERR(tegra-phy)) {
dev_err(pdev-dev, Failed to open USB phy\n);
err = -ENXIO;
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index b069f29..21872e1 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_OMAP_USB2) += omap-usb2.o
 obj-$(CONFIG_USB_ISP1301)  += isp1301.o
 obj-$(CONFIG_MV_U3D_PHY)   += mv_u3d_phy.o
 obj-$(CONFIG_USB_EHCI_TEGRA)   += tegra_usb_phy.o
+obj-$(CONFIG_USB_EHCI_TEGRA)   += tegra2_usb_phy.o
diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra2_usb_phy.c
similarity index 57%
copy from drivers/usb/phy/tegra_usb_phy.c
copy to drivers/usb/phy/tegra2_usb_phy.c
index 9d13c81..2ff6dcb 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra2_usb_phy.c
@@ -1,9 +1,11 @@
 /*
  * Copyright (C) 2010 Google, Inc.
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
  *
  * Author:
  * Erik Gilling konk...@google.com
  * Benoit Goby ben...@android.com
+ * Venu Byravarasu vbyravar...@nvidia.com
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -29,191 +31,20 @@
 #include linux/usb/ulpi.h
 #include asm/mach-types.h
 #include linux/usb/tegra_usb_phy.h
+#include mach/iomap.h
 
-#define TEGRA_USB_BASE 0xC500
-#define TEGRA_USB_SIZE SZ_16K
-
-#define ULPI_VIEWPORT  0x170
-
-#define USB_PORTSC10x184
-#define   USB_PORTSC1_PTS(x)   (((x)  0x3)  30)
-#define   USB_PORTSC1_PSPD(x)  (((x)  0x3)  26)
-#define   USB_PORTSC1_PHCD (1  23)
-#define   USB_PORTSC1_WKOC (1  22)
-#define   USB_PORTSC1_WKDS (1  21)
-#define   USB_PORTSC1_WKCN (1  20)
-#define   USB_PORTSC1_PTC(x)   (((x)  0xf)  16)
-#define   USB_PORTSC1_PP   (1  12)
-#define   USB_PORTSC1_SUSP (1  7)
-#define   USB_PORTSC1_PE   (1  2)
-#define   USB_PORTSC1_CCS  (1  0)
-
-#define USB_SUSP_CTRL  0x400
-#define   USB_WAKE_ON_CNNT_EN_DEV  (1  3)
-#define   USB_WAKE_ON_DISCON_EN_DEV(1  4)
-#define   USB_SUSP_CLR (1  5)
-#define   USB_PHY_CLK_VALID(1  7)
-#define   UTMIP_RESET  (1  11)
-#define   UHSIC_RESET  (1  11)
-#define   UTMIP_PHY_ENABLE (1  12)
-#define   ULPI_PHY_ENABLE  (1  13)

[PATCH 3/3] DOC: PWM: Adding binding document for via,vt8500-pwm

2012-10-19 Thread Tony Prisk
Add a binding document describing the PWM controller found
on arch-vt8500 supported SoCs.

Signed-off-by: Tony Prisk li...@prisktech.co.nz
---
 .../devicetree/bindings/pwm/vt8500-pwm.txt |   17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pwm/vt8500-pwm.txt

diff --git a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt 
b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
new file mode 100644
index 000..e8ba133
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
@@ -0,0 +1,17 @@
+VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
+
+Required properties:
+- compatible: should be via,vt8500-pwm
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2.  The first cell specifies the per-chip index
+  of the PWM to use and the second cell is the period in nanoseconds.
+- clocks: pHandle to the PWM source clock
+
+Example:
+
+pwm1: pwm@d822 {
+   #pwm-cells = 2;
+   compatible = via,vt8500-pwm;
+   reg = 0xd822 0x1000;
+   clocks = clkpwm;
+};
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4] posix timers: allocate timer id per process

2012-10-19 Thread Eric Dumazet
On Fri, 2012-10-19 at 13:38 +0400, Stanislav Kinsbursky wrote:
 19.10.2012 11:56, Eric Dumazet пишет:
  I wonder if some applications relied on our idr, assuming they would get
  low values for their timer id.
  (We could imagine some applications use a table indexed by the timer id)
 
 Hmm.
 Probably, this particular case can be optimised by tuning min_id to id of 
 releasing timer (if id of this timer is less than current-signal min_id).
 Does this approach solves the issue you mentioned above?

Not generally, but I am not sure we want a per signal_struct idr ;)

Really that should be clearly explained in the changelog, so that buggy
applications can have a clue of what happened.

When we changed UDP source port selection being random instead of
sequential, maybe this broke some applications. That was an
implementation choice (with security impact).



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] arm/dts: am33xx rtc node

2012-10-19 Thread Afzal Mohammed
add am33xx rtc node.

Signed-off-by: Afzal Mohammed af...@ti.com
---

Based on v3.7-rc1,
Dependent on series rtc: omap dt support (for am33xx),
(https://lkml.org/lkml/2012/10/19/163)
Tested on Beagle Bone.

 arch/arm/boot/dts/am33xx.dtsi | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index bb31bff..356711e 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -210,5 +210,14 @@
interrupt-parent = intc;
interrupts = 91;
};
+
+   rtc {
+   compatible = ti,da830-rtc;
+   ti,hwmods = rtc;
+   reg = 0x44e3e000 0x1000;
+   interrupt-parent = intc;
+   interrupts = 75
+ 76;
+   };
};
 };
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4 v2] gpio/tc3589x: convert to use the simple irqdomain

2012-10-19 Thread Linus Walleij
The special checks for whether we have a base IRQ offset or not
is surplus if we use the simple IRQ domain. The IRQ offset
zero will be interpreted as a linear domain case.

Plus this makes sure we allocate descriptors where need be, or
warn if they are preallocated with SPARSE_IRQ.

Cc: Grant Likely grant.lik...@secretlab.ca
Cc: Rob Herring rob.herr...@calxeda.com
Cc: Lee Jones lee.jo...@linaro.org
Signed-off-by: Linus Walleij linus.wall...@linaro.org
---
ChangeLog v1-v2: add a comment that explains what is going on,
  it fooled me so it could fool somebody else too.
---
 drivers/gpio/gpio-tc3589x.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 1e48317..8c8447c 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -292,17 +292,15 @@ static int tc3589x_gpio_irq_init(struct tc3589x_gpio 
*tc3589x_gpio,
 {
int base = tc3589x_gpio-irq_base;
 
-   if (base) {
-   tc3589x_gpio-domain = irq_domain_add_legacy(
-   NULL, tc3589x_gpio-chip.ngpio, base,
-   0, tc3589x_irq_ops, tc3589x_gpio);
-   }
-   else {
-   tc3589x_gpio-domain = irq_domain_add_linear(
-   np, tc3589x_gpio-chip.ngpio,
-   tc3589x_irq_ops, tc3589x_gpio);
-   }
-
+   /*
+* If this results in a linear domain, irq_create_mapping() will
+* take care of allocating IRQ descriptors at runtime. When a base
+* is provided, the IRQ descriptors will be allocated when the
+* domain is instantiated.
+*/
+   tc3589x_gpio-domain = irq_domain_add_simple(np,
+   tc3589x_gpio-chip.ngpio, base, tc3589x_irq_ops,
+   tc3589x_gpio);
if (!tc3589x_gpio-domain) {
dev_err(tc3589x_gpio-dev, Failed to create irqdomain\n);
return -ENOSYS;
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4 v2] gpio/em: convert to linear IRQ domain

2012-10-19 Thread Linus Walleij
The code in the em driver seems to want to try to do the job of
the linear IRQ domain (allocate descriptors and grab a virtual
range). So why not just use the linear IRQ domain? The code is
now cut down so we don't need isolated functions for this.

Also note that we use irq_create_mapping() to make sure
descriptors are allocated for these IRQs.

Also fixed the FIXME to remove the domain after use.

Cc: Grant Likely grant.lik...@secretlab.ca
Cc: Magnus Damm d...@opensource.se
Signed-off-by: Linus Walleij linus.wall...@linaro.org
---
ChangeLog v1-v2:
- Use irq_create_mapping() so that descriptors get allocated at
  runtime for the linear domain.
- factor the irqdomain stuff into the probe() function.
---
 drivers/gpio/gpio-em.c | 48 
 1 file changed, 8 insertions(+), 40 deletions(-)

diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index efb4c2d..f6d74e1 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -35,7 +35,6 @@
 struct em_gio_priv {
void __iomem *base0;
void __iomem *base1;
-   unsigned int irq_base;
spinlock_t sense_lock;
struct platform_device *pdev;
struct gpio_chip gpio_chip;
@@ -163,7 +162,7 @@ static irqreturn_t em_gio_irq_handler(int irq, void *dev_id)
while ((pending = em_gio_read(p, GIO_MST))) {
offset = __ffs(pending);
em_gio_write(p, GIO_IIR, BIT(offset));
-   generic_handle_irq(irq_find_mapping(p-irq_domain, offset));
+   generic_handle_irq(irq_create_mapping(p-irq_domain, offset));
irqs_handled++;
}
 
@@ -214,7 +213,7 @@ static int em_gio_direction_output(struct gpio_chip *chip, 
unsigned offset,
 
 static int em_gio_to_irq(struct gpio_chip *chip, unsigned offset)
 {
-   return irq_find_mapping(gpio_to_priv(chip)-irq_domain, offset);
+   return irq_create_mapping(gpio_to_priv(chip)-irq_domain, offset);
 }
 
 static int em_gio_irq_domain_map(struct irq_domain *h, unsigned int virq,
@@ -234,40 +233,6 @@ static struct irq_domain_ops em_gio_irq_domain_ops = {
.map= em_gio_irq_domain_map,
 };
 
-static int __devinit em_gio_irq_domain_init(struct em_gio_priv *p)
-{
-   struct platform_device *pdev = p-pdev;
-   struct gpio_em_config *pdata = pdev-dev.platform_data;
-
-   p-irq_base = irq_alloc_descs(pdata-irq_base, 0,
- pdata-number_of_pins, numa_node_id());
-   if (p-irq_base  0) {
-   dev_err(pdev-dev, cannot get irq_desc\n);
-   return p-irq_base;
-   }
-   pr_debug(gio: hw base = %d, nr = %d, sw base = %d\n,
-pdata-gpio_base, pdata-number_of_pins, p-irq_base);
-
-   p-irq_domain = irq_domain_add_legacy(pdev-dev.of_node,
- pdata-number_of_pins,
- p-irq_base, 0,
- em_gio_irq_domain_ops, p);
-   if (!p-irq_domain) {
-   irq_free_descs(p-irq_base, pdata-number_of_pins);
-   return -ENXIO;
-   }
-
-   return 0;
-}
-
-static void em_gio_irq_domain_cleanup(struct em_gio_priv *p)
-{
-   struct gpio_em_config *pdata = p-pdev-dev.platform_data;
-
-   irq_free_descs(p-irq_base, pdata-number_of_pins);
-   /* FIXME: irq domain wants to be freed! */
-}
-
 static int __devinit em_gio_probe(struct platform_device *pdev)
 {
struct gpio_em_config *pdata = pdev-dev.platform_data;
@@ -334,8 +299,11 @@ static int __devinit em_gio_probe(struct platform_device 
*pdev)
irq_chip-irq_set_type = em_gio_irq_set_type;
irq_chip-flags = IRQCHIP_SKIP_SET_WAKE;
 
-   ret = em_gio_irq_domain_init(p);
-   if (ret) {
+   p-irq_domain = irq_domain_add_linear(pdev-dev.of_node,
+ pdata-number_of_pins,
+ em_gio_irq_domain_ops, p);
+   if (!p-irq_domain)
+   err = -ENXIO;
dev_err(pdev-dev, cannot initialize irq domain\n);
goto err3;
}
@@ -364,7 +332,7 @@ err6:
 err5:
free_irq(irq[0]-start, pdev);
 err4:
-   em_gio_irq_domain_cleanup(p);
+   irq_domain_remove(p-irq_domain);
 err3:
iounmap(p-base1);
 err2:
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4 v2] gpio/mvebu: convert to use irq_domain_add_simple()

2012-10-19 Thread Linus Walleij
The MVEBU driver probably just wants a few IRQs. Using the simple
domain has the upside of allocating IRQ descriptors if need be,
especially in a SPARSE_IRQ environment.

Cc: Rob Herring rob.herr...@calxeda.com
Cc: Grant Likely grant.lik...@secretlab.ca
Cc: Thomas Petazzoni thomas.petazz...@free-electrons.com
Cc: Sebastian Hesselbarth sebastian.hesselba...@gmail.com
Cc: Andrew Lunn and...@lunn.ch
Signed-off-by: Linus Walleij linus.wall...@linaro.org
---
ChangeLog v1-v2:
- Keep irq_create_mapping() and do not replace with
  irq_find_mapping() - if a linear domain is the outcome,
  we really need to allocate a descriptor on the first mapping
  call.
---
 drivers/gpio/gpio-mvebu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 902af43..e0bde06 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -645,8 +645,8 @@ static int __devinit mvebu_gpio_probe(struct 
platform_device *pdev)
   IRQ_NOREQUEST, IRQ_LEVEL | IRQ_NOPROBE);
 
/* Setup irq domain on top of the generic chip. */
-   mvchip-domain = irq_domain_add_legacy(np, mvchip-chip.ngpio,
-  mvchip-irqbase, 0,
+   mvchip-domain = irq_domain_add_simple(np, mvchip-chip.ngpio,
+  mvchip-irqbase,
   irq_domain_simple_ops,
   mvchip);
if (!mvchip-domain) {
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4 v2] gpio/tegra: convert to use linear irqdomain

2012-10-19 Thread Linus Walleij
The Tegra driver tries to do the work of irq_domain_add_linear()
by reserving a bunch of descriptors somewhere and keeping track
of the base offset, then calling irq_domain_add_legacy(). Let's
stop doing that and simply use the linear IRQ domain.

For this to work: use irq_create_mapping() in the IRQ iterator
so that the descriptors get allocated here.

Cc: Rob Herring rob.herr...@calxeda.com
Cc: Grant Likely grant.lik...@secretlab.ca
Cc: Stephen Warren swar...@nvidia.com
Signed-off-by: Linus Walleij linus.wall...@linaro.org
---
ChangeLog v1-v2:
- Make sure that irq_create_mapping() is called for every IRQ
  on probe() and update the commit message.
---
 drivers/gpio/gpio-tegra.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index d982593..c7c175a 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -380,7 +380,6 @@ static int __devinit tegra_gpio_probe(struct 
platform_device *pdev)
 {
const struct of_device_id *match;
struct tegra_gpio_soc_config *config;
-   int irq_base;
struct resource *res;
struct tegra_gpio_bank *bank;
int gpio;
@@ -417,14 +416,11 @@ static int __devinit tegra_gpio_probe(struct 
platform_device *pdev)
return -ENODEV;
}
 
-   irq_base = irq_alloc_descs(-1, 0, tegra_gpio_chip.ngpio, 0);
-   if (irq_base  0) {
-   dev_err(pdev-dev, Couldn't allocate IRQ numbers\n);
-   return -ENODEV;
-   }
-   irq_domain = irq_domain_add_legacy(pdev-dev.of_node,
-  tegra_gpio_chip.ngpio, irq_base, 0,
+   irq_domain = irq_domain_add_linear(pdev-dev.of_node,
+  tegra_gpio_chip.ngpio,
   irq_domain_simple_ops, NULL);
+   if (!irq_domain)
+   return -ENODEV;
 
for (i = 0; i  tegra_gpio_bank_count; i++) {
res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
@@ -464,7 +460,7 @@ static int __devinit tegra_gpio_probe(struct 
platform_device *pdev)
gpiochip_add(tegra_gpio_chip);
 
for (gpio = 0; gpio  tegra_gpio_chip.ngpio; gpio++) {
-   int irq = irq_find_mapping(irq_domain, gpio);
+   int irq = irq_create_mapping(irq_domain, gpio);
/* No validity check; all Tegra GPIOs are valid IRQs */
 
bank = tegra_gpio_banks[GPIO_BANK(gpio)];
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] arm/dts: am33xx rtc node

2012-10-19 Thread Mohammed, Afzal
+ linux-omap and Daniel

On Fri, Oct 19, 2012 at 16:20:21, Mohammed, Afzal wrote:
 add am33xx rtc node.
 
 Signed-off-by: Afzal Mohammed af...@ti.com
 ---
 
 Based on v3.7-rc1,
 Dependent on series rtc: omap dt support (for am33xx),
 (https://lkml.org/lkml/2012/10/19/163)
 Tested on Beagle Bone.
 
  arch/arm/boot/dts/am33xx.dtsi | 9 +
  1 file changed, 9 insertions(+)
 
 diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
 index bb31bff..356711e 100644
 --- a/arch/arm/boot/dts/am33xx.dtsi
 +++ b/arch/arm/boot/dts/am33xx.dtsi
 @@ -210,5 +210,14 @@
   interrupt-parent = intc;
   interrupts = 91;
   };
 +
 + rtc {
 + compatible = ti,da830-rtc;
 + ti,hwmods = rtc;
 + reg = 0x44e3e000 0x1000;
 + interrupt-parent = intc;
 + interrupts = 75
 +   76;
 + };
   };
  };
 -- 
 1.7.12
 
 

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf��^jǫy�m��@A�a���
0��h���i

[PATCH] mm: Simplify for_each_populated_zone()

2012-10-19 Thread Srivatsa S. Bhat
Move the check for populated_zone() to the control statement of the
'for' loop and get rid of the odd looking if/else block.

Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com
---

 include/linux/mmzone.h |7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 50aaca8..5bdf02e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -913,11 +913,8 @@ extern struct zone *next_zone(struct zone *zone);
 
 #define for_each_populated_zone(zone)  \
for (zone = (first_online_pgdat())-node_zones; \
-zone;  \
-zone = next_zone(zone))\
-   if (!populated_zone(zone))  \
-   ; /* do nothing */  \
-   else
+zone  populated_zone(zone);  \
+zone = next_zone(zone))
 
 static inline struct zone *zonelist_zone(struct zoneref *zoneref)
 {

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/6] cgroups: forbid pre_destroy callback to fail

2012-10-19 Thread Michal Hocko
On Fri 19-10-12 17:33:18, Li Zefan wrote:
 On 2012/10/17 21:30, Michal Hocko wrote:
  Now that mem_cgroup_pre_destroy callback doesn't fail finally we can
  safely move on and forbit all the callbacks to fail. The last missing
  piece is moving cgroup_call_pre_destroy after cgroup_clear_css_refs so
  that css_tryget fails so no new charges for the memcg can happen.
 
  The callbacks are also called from within cgroup_lock to guarantee that
  no new tasks show up. 
 
 I'm afraid this won't work. See commit 
 3fa59dfbc3b223f02c26593be69ce6fc9a940405
 (cgroup: fix potential deadlock in pre_destroy)

Very good point. Thanks for poiting this out. So we should call
pre_destroy at the very end? What about the following?
Or should be rather drop the lock after check_for_release(parent) or
sooner but after CGRP_REMOVED is set?
---
From 70ea8718aba1c1784b94bfb26aa2307195c07c0b Mon Sep 17 00:00:00 2001
From: Michal Hocko mho...@suse.cz
Date: Wed, 17 Oct 2012 13:42:06 +0200
Subject: [PATCH] cgroups: forbid pre_destroy callback to fail

Now that mem_cgroup_pre_destroy callback doesn't fail finally we can
safely move on and forbit all the callbacks to fail. The last missing
piece is moving cgroup_call_pre_destroy after cgroup_clear_css_refs so
that css_tryget fails so no new charges for the memcg can happen.
We cannot, however, move cgroup_call_pre_destroy right after because we
cannot call mem_cgroup_pre_destroy with the cgroup_lock held (see
3fa59dfb cgroup: fix potential deadlock in pre_destroy) so we have to
move it after the lock is released.

Changes since v1
- Li Zefan pointed out that mem_cgroup_pre_destroy cannot be called with
  cgroup_lock held

Signed-off-by: Michal Hocko mho...@suse.cz
---
 kernel/cgroup.c |   30 +-
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b7d9606..4c6adbd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -855,7 +855,7 @@ static struct inode *cgroup_new_inode(umode_t mode, struct 
super_block *sb)
  * Call subsys's pre_destroy handler.
  * This is called before css refcnt check.
  */
-static int cgroup_call_pre_destroy(struct cgroup *cgrp)
+static void cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
struct cgroup_subsys *ss;
int ret = 0;
@@ -864,15 +864,8 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
if (!ss-pre_destroy)
continue;
 
-   ret = ss-pre_destroy(cgrp);
-   if (ret) {
-   /* -pre_destroy() failure is being deprecated */
-   WARN_ON_ONCE(!ss-__DEPRECATED_clear_css_refs);
-   break;
-   }
+   BUG_ON(ss-pre_destroy(cgrp));
}
-
-   return ret;
 }
 
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -4161,7 +4154,6 @@ again:
mutex_unlock(cgroup_mutex);
return -EBUSY;
}
-   mutex_unlock(cgroup_mutex);
 
/*
 * In general, subsystem has no css-refcnt after pre_destroy(). But
@@ -4174,17 +4166,6 @@ again:
 */
set_bit(CGRP_WAIT_ON_RMDIR, cgrp-flags);
 
-   /*
-* Call pre_destroy handlers of subsys. Notify subsystems
-* that rmdir() request comes.
-*/
-   ret = cgroup_call_pre_destroy(cgrp);
-   if (ret) {
-   clear_bit(CGRP_WAIT_ON_RMDIR, cgrp-flags);
-   return ret;
-   }
-
-   mutex_lock(cgroup_mutex);
parent = cgrp-parent;
if (atomic_read(cgrp-count) || !list_empty(cgrp-children)) {
clear_bit(CGRP_WAIT_ON_RMDIR, cgrp-flags);
@@ -4206,6 +4187,7 @@ again:
return -EINTR;
goto again;
}
+
/* NO css_tryget() can success after here. */
finish_wait(cgroup_rmdir_waitq, wait);
clear_bit(CGRP_WAIT_ON_RMDIR, cgrp-flags);
@@ -4244,6 +4226,12 @@ again:
spin_unlock(cgrp-event_list_lock);
 
mutex_unlock(cgroup_mutex);
+
+   /*
+* Call pre_destroy handlers of subsys. Notify subsystems
+* that rmdir() request comes.
+*/
+   cgroup_call_pre_destroy(cgrp);
return 0;
 }
 
-- 
1.7.10.4


-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH v3 00/16] DMA Engine support for AM33XX

2012-10-19 Thread Matt Porter
On Fri, Oct 19, 2012 at 10:26:20AM +, Bedia, Vaibhav wrote:
 Hi Matt,
 
 On Thu, Oct 18, 2012 at 18:56:39, Porter, Matt wrote:
  Changes since v2:
  - Rebased on 3.7-rc1
  - Fixed bug in DT/pdata parsing first found by Gururaja
that turned out to be masked by some toolchains
  - Dropped unused mach-omap2/devices.c hsmmc patch
  - Added AM33XX crossbar DMA event mux support
  - Added am335x-evm support
  
  Changes since v1:
  - Rebased on top of mainline from 12250d8
  - Dropped the feature removal schedule patch
  - Implemented dma_request_slave_channel_compat() and
converted the mmc and spi drivers to use it
  - Dropped unneeded #address-cells and #size-cells from
EDMA DT support
  - Moved private EDMA header to linux/platform_data/ and
removed some unneeded definitions
  - Fixed parsing of optional properties
  
  TODO:
  - Add dmaengine support for per-channel caps so the
hack to set the maximum segments can be replaced with
a query to the dmaengine driver
  
  This series adds DMA Engine support for AM33xx, which uses
  an EDMA DMAC. The EDMA DMAC has been previously supported by only
  a private API implementation (much like the situation with OMAP
  DMA) found on the DaVinci family of SoCs.
  
  The series applies on top of 3.7-rc1 and the following patches:
  
  - GPMC fails to reserve memory fix:
http://www.spinics.net/lists/linux-omap/msg79675.html
  - TPS65910 regulator fix:
https://patchwork.kernel.org/patch/1593651/
  - dmaengine DT support from Vinod's dmaengine_dt branch in
git://git.infradead.org/users/vkoul/slave-dma.git since
027478851791df751176398be02a3b1c5f6aa824
  
  The approach taken is similar to how OMAP DMA is being converted to
  DMA Engine support. With the functional EDMA private API already
  existing in mach-davinci/dma.c, we first move that to an ARM common
  area so it can be shared. Adding DT and runtime PM support to the
  private EDMA API implementation allows it to run on AM33xx. AM33xx
  *only* boots using DT so we leverage Jon's generic DT DMA helpers to
  register EDMA DMAC with the of_dma framework and then add support
  for calling the dma_request_slave_channel() API to both the mmc
  and spi drivers.
  
  With this series both BeagleBone and the AM335x EVM have working
  MMC and SPI support.
  
  This is tested on BeagleBone with a SPI framebuffer driver and MMC
  rootfs. A trivial gpio DMA event misc driver was used to test the
  crossbar DMA event support. It is also tested on the AM335x EVM
  with the onboard SPI flash and MMC rootfs. The branch at
  https://github.com/ohporter/linux/tree/edma-dmaengine-v3 has the
  complete series, dependencies, and some test drivers/defconfigs.
  
 
 I didn't see all the patches that you posted on edma-dmaengine-v3
 but I do seem them on edma-dmaengine-am33xx-v3 branch.

I see I referenced the wrong branch in the cover letter. Thanks for
testing and noticing this. Sorry to make you hunt for the correct
branch in that repo. ;) 

https://github.com/ohporter/linux/tree/edma-dmaengine-am33xx-v3
is indeed the correct branch for those wanting to pull this in or
grab some of the not-to-be-merged drivers I used for testing.

 I added a couple of patches to enable earlyprintk and build the DTB
 appended kernel image uImage-dtb.am335x-evm
 
 Here's what i see
 
 [...]

snip

 [0.175354] edma: probe of 4900.edma failed with error -16

I missed an uninitialized pdata case in the bug fixes mentioned in
the changelog and the folks previously failing the same way didn't
hit the case I suspect you are hitting. Can you try this and let me
know how it works?

Thanks,
Matt

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index b761b7a..b43b327 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1619,7 +1619,7 @@ static int __init edma_probe(struct platform_device *pdev)
 
if (node) {
pdata = devm_kzalloc(dev,
-sizeof(struct edma_soc_info),
+EDMA_MAX_CC*sizeof(struct edma_soc_info),
 GFP_KERNEL);
edma_of_parse_dt(dev, node, pdata);
info = pdata;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Do not use cpu_to_node() to find an offlined cpu's node.

2012-10-19 Thread Peter Zijlstra
On Wed, 2012-10-17 at 20:29 -0700, David Rientjes wrote:
 
 Ok, thanks for the update.  I agree that we should be clearing the mapping 
 at node hot-remove since any cpu that would subsequently get onlined and 
 assume one of the previous cpu's ids is not guaranteed to have the same 
 affinity.

Would this mean we have to remap (and memcpy) per-cpu memory on
node-plug?

 I'm just really hoping that we don't touch the acpi code and that we can 
 remove both cpu_hotplug-unmap-cpu2node-when-the-cpu-is-hotremoved.patch 
 and cpu_hotplug-unmap-cpu2node-when-the-cpu-is-hotremoved-fix.patch from 
 -mm.

Yeah, none of this should be anywhere near ACPI, its got nothing to do
with ACPI. Furthermore it should be be same across all archs, not just
be weird and wonderful for x86.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread David Howells
Rusty Russell ru...@rustcorp.com.au wrote:

  (Side note: I hope people realize that the random key is generated
  with a 100-year lifespan. So if you build a kernel today, you do
  potentially have a year-2112 problem. I'm not horribly worried, but
  I *am* a bit worried about 32-bit time_t overflow and I hope 32-bit
  openssl doesn't do anything odd)
 
 Yep, David's original patch had that problem; he fixed the kernel's x509
 handling to use struct tm, not time_t, and now it Just Works.

That's assuming that 32-bit *openssl* gets it right when generating the key.
Trying it on my 32-bit laptop, I see:

  154:d=3  hl=2 l=  15 prim: GENERALIZEDTIME   :21120925112014Z

so I guess it does.

David
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread David Howells
Stephen Rothwell s...@canb.auug.org.au wrote:

 So, this still generates the keys during the normal build, right?  That
 would be a problem for build servers that have limited randomness
 available to them, I think.

openssl uses /dev/urandom (unlike gpg), so that's less of a problem.

David
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/1] HID: microsoft, fix invalid rdesc for 3k kbd

2012-10-19 Thread Jiri Slaby
Microsoft Digital Media Keyboard 3000 has two interfaces, and the
second one has a report descriptor with a bug. The second collection
says:
05 01 -- global; usage page -- 01 -- Generic Desktop Controls
09 80 -- local; usage -- 80 -- System Control
a1 01 -- main; collection -- 01 -- application

85 03 -- global; report ID -- 03
19 00 -- local; Usage Minimum -- 00
29 ff -- local; Usage Maximum -- ff
15 00 -- global; Logical Minimum -- 0
26 ff 00 -- global; Logical Maximum -- ff
81 00 -- main; input

c0 -- main; End Collection

I.e. it makes us think that there are all kinds of usages of system
control. That the keyboard is a not only a keyboard, but also a
joystick, mouse, gamepad, keypad, etc. The same as for the Wireless
Desktop Receiver, this should be Physical Min/Max. So fix that
appropriately.

References: https://bugzilla.novell.com/show_bug.cgi?id=776834
Signed-off-by: Jiri Slaby jsl...@suse.cz
---
 drivers/hid/hid-microsoft.c | 18 +-
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index 3acdcfc..f676c01 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -28,22 +28,30 @@
 #define MS_RDESC   0x08
 #define MS_NOGET   0x10
 #define MS_DUPLICATE_USAGES0x20
+#define MS_RDESC_3K0x40
 
-/*
- * Microsoft Wireless Desktop Receiver (Model 1028) has
- * 'Usage Min/Max' where it ought to have 'Physical Min/Max'
- */
 static __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc,
unsigned int *rsize)
 {
unsigned long quirks = (unsigned long)hid_get_drvdata(hdev);
 
+   /*
+* Microsoft Wireless Desktop Receiver (Model 1028) has
+* 'Usage Min/Max' where it ought to have 'Physical Min/Max'
+*/
if ((quirks  MS_RDESC)  *rsize == 571  rdesc[557] == 0x19 
rdesc[559] == 0x29) {
hid_info(hdev, fixing up Microsoft Wireless Receiver Model 
1028 report descriptor\n);
rdesc[557] = 0x35;
rdesc[559] = 0x45;
}
+   /* the same as above (s/usage/physical/) */
+   if ((quirks  MS_RDESC_3K)  *rsize == 106 
+   !memcmp((char []){ 0x19, 0x00, 0x29, 0xff },
+   rdesc[94], 4)) {
+   rdesc[94] = 0x35;
+   rdesc[96] = 0x45;
+   }
return rdesc;
 }
 
@@ -192,7 +200,7 @@ static const struct hid_device_id ms_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_PRESENTER_8K_USB),
.driver_data = MS_PRESENTER },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K),
-   .driver_data = MS_ERGONOMY },
+   .driver_data = MS_ERGONOMY | MS_RDESC_3K },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0),
.driver_data = MS_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
-- 
1.7.12.3


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] arm/dts: AM33XX: Add SPI device tree data

2012-10-19 Thread Matt Porter
On Fri, Oct 19, 2012 at 10:24:15AM +0200, Benoit Cousson wrote:
 Hi Avinash,
 
 This look good to me except the: status = disabled.
 
 The disabled should be reserved for variant that does not contain the IP.
 Is it the case here?

http://comments.gmane.org/gmane.linux.drivers.devicetree/18968 is what
I've been going by with the DTS support in the EDMA dmaengine series. It
does make the most sense to only enable what you need in the
board.dts.

-Matt

 On 09/18/2012 07:30 AM, Philip, Avinash wrote:
  Add McSPI data node to AM33XX device tree file. The McSPI module (and so
  as the driver) is reused from OMAP4.
  
  Signed-off-by: Philip, Avinash avinashphi...@ti.com
  Tested-by: Matt Porter mpor...@ti.com
  ---
  Changes since v1:
  - Corrected reg offset in reg DT entry.
  
  :100644 100644 ff3badb... 065fd54... M  arch/arm/boot/dts/am33xx.dtsi
   arch/arm/boot/dts/am33xx.dtsi |   25 +
   1 files changed, 25 insertions(+), 0 deletions(-)
  
  diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
  index ff3badb..065fd54 100644
  --- a/arch/arm/boot/dts/am33xx.dtsi
  +++ b/arch/arm/boot/dts/am33xx.dtsi
  @@ -219,5 +219,30 @@
  interrupt-parent = intc;
  interrupts = 91;
  };
  +
  +   spi0: spi@4803 {
  +   compatible = ti,omap4-mcspi;
  +   #address-cells = 1;
  +   #size-cells = 0;
  +   reg = 0x4803 0x400;
  +   interrupt-parent = intc;
  +   interrupt = 65;
  +   ti,spi-num-cs = 2;
  +   ti,hwmods = spi0;
  +   status = disabled;
  +
  +   };
  +
  +   spi1: spi@481a {
  +   compatible = ti,omap4-mcspi;
  +   #address-cells = 1;
  +   #size-cells = 0;
  +   reg = 0x481a 0x400;
  +   interrupt-parent = intc;
  +   interrupt = 125;
  +   ti,spi-num-cs = 2;
  +   ti,hwmods = spi1;
  +   status = disabled;
  +   };
  };
   };
  
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread Stephen Rothwell
Hi David,

On Fri, 19 Oct 2012 12:25:23 +0100 David Howells dhowe...@redhat.com wrote:

 Stephen Rothwell s...@canb.auug.org.au wrote:
 
  So, this still generates the keys during the normal build, right?  That
  would be a problem for build servers that have limited randomness
  available to them, I think.
 
 openssl uses /dev/urandom (unlike gpg), so that's less of a problem.

OK, thanks.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgpWpt7x7x0az.pgp
Description: PGP signature


Re: [PATCH 1/4] module: add syscall to load module from fd

2012-10-19 Thread Alon Ziv
H. Peter Anvin hpa at zytor.com writes:
  It is a bit more indirect, but also in practice it's a bit trickier than
  that.  We need to ensure the memory doesn't change underneath us and
  stays attached to that fd.  I can easily see that code slipping and
  ending in an exploit.
 
  But that may be my irrational fear of the mm :)
 
 You have to do the same thing with a file/file descriptor, I would think.
 
 However, I keep wondering about the use case for this, as opposed to 
 signatures.

Two things:
1. finit_module() lets LSMs make decisions based on full information on the
   module to be loaded
2. On some systems (such as Chromium OS) we have a trusted root OS (e.g. the
   entire root filesystem is protected using dm-verity); requiring signatures
   on top of this is a waste of resources


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread Alexander Holler

Am 19.10.2012 13:25, schrieb David Howells:

Stephen Rothwell s...@canb.auug.org.au wrote:


So, this still generates the keys during the normal build, right?  That
would be a problem for build servers that have limited randomness
available to them, I think.


openssl uses /dev/urandom (unlike gpg), so that's less of a problem.


Hmm, please don't forget the case where people want to build the kernel 
in some sandbox (like chroot or similiar) where the build-system doesn't 
have access to /dev.


I haven't checked what openssl does if that is the case, so maybe the 
script which calls it should either offer a verbose error message for 
that case, or should be prepared that openssl might fail because of a 
missing /dev/urandom.


If that's already done, just ignore my email, I haven't read the 
complete thread, sorry.


Regards,

Alexander
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] rename NUMA fault handling functions

2012-10-19 Thread Peter Zijlstra
On Thu, 2012-10-18 at 17:20 -0400, Rik van Riel wrote:
 Having the function name indicate what the function is used
 for makes the code a little easier to read.  Furthermore,
 the fault handling code largely consists of do__page
 functions. 

I don't much care either way, but I was thinking walken might want to
use something similar to do WSS estimation, in which case the NUMA name
is just as wrong.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] add credits for NUMA placement

2012-10-19 Thread Peter Zijlstra
On Thu, 2012-10-18 at 17:19 -0400, Rik van Riel wrote:
 The NUMA placement code has been rewritten several times, but
 the basic ideas took a lot of work to develop. The people who
 put in the work deserve credit for it. Thanks Andrea  Peter :)
 
 The Documentation/scheduler/numa-problem.txt file should
 probably be rewritten once we figure out the final details of
 what the NUMA code needs to do, and why.
 
 Signed-off-by: Rik van Riel r...@redhat.com

Acked-by: Peter Zijlstra a.p.zijls...@chello.nl

Thanks Rik!
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread Josh Boyer
On Thu, Oct 18, 2012 at 8:48 PM, Rusty Russell ru...@rustcorp.com.au wrote:
 Josh Boyer jwbo...@gmail.com writes:
 It might even be able to be moved entirely into scripts/Makefile.modinst
 but I haven't gotten that far yet.

 Thanks, I'll add this.

Excellent.

 Note it was word-wrapped here though :(

Sigh.  Sorry, Rusty.  I thought I had gmail's kinks worked out.

josh
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86/dt: use linear irq domain for ioapic(s).

2012-10-19 Thread Florian Fainelli
Sebastian Andrzej Siewior bigeasy at linutronix.de writes:
 
 No. You do have a compatible entry. It first appeared on the ce4100
 CPU. If it happens to also work on the n450 then it seems to be
 compatible with that one. This is documented somewhere…
 Usually you add 'compatible = your cpu, generic binding' in case
 you need a fixup / errata whatever for your cpu. Even if you compare
 all hpets from Intel there is the one or other difference / errata.

Can we make sure that his hits the future 3.6 stable releases? We had to merge
this back to your 3.6 kernel tree in order to have a functionnal CE4100 system.

Thank you!
--
Florian

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: RFC: sign the modules at install time

2012-10-19 Thread Josh Boyer
On Thu, Oct 18, 2012 at 9:16 PM, Rusty Russell ru...@rustcorp.com.au wrote:
 Josh Boyer jwbo...@gmail.com writes:
 On Thu, Oct 18, 2012 at 2:46 PM, Linus Torvalds
 torva...@linux-foundation.org wrote:
 On Wed, Oct 17, 2012 at 10:34 PM, Rusty Russell ru...@rustcorp.com.au 
 wrote:

 Hacking the keyid and signer-name to be extracted every time by
 sign-file takes my modules_install time from 18.6 seconds to 19.1.  We'd
 get that back easily by making sign-file a perl script anyway; it calls
 out to perl 3 times already.

 Ok, that tiny slowdown seems worth the cleanup, especially if we'd get
 it back from somebody re-writing it in perl.

 Want to sign off on the two patches, or put them in your git tree?

 I tested Rusty's version of the 'sign modules at module_install time'
 patch in a Fedora kernel build today.  It seems to work well enough,
 even if we wind up signing things twice.  A brief cleanup of my patch
 to add a modules_sign target on top of that is below.

 I'm surprised.  Only the first signature (create on the unstripped
 module) will be used by the kernel; this should fail to verify the
 stripped module.  A quick and dirty check is:

 grep -abo '~Module' 
 /tmp/mod/lib/modules/3.7.0-rc1+/kernel/sound/pci/snd-intel8x0.ko
 39828:~Module
 40432:~Module

 Perhaps eu-strip actually strips the appended signature?

Yes.  The abbreviated flow chart is:

build
install (signed at install time now)
find-debuginfo.sh (eu-strip) which strips off the signature from above
modules_sign to put it back

You are certainly correct in that if I am building outside of RPM and
run modules_install followed by modules_sign, it will append a second
signature.

A further check could be done in scripts/Makefile.modsign to see if each
module is already signed and skip it if so.  I can take a look at doing
that next week if people think it would be worthwhile.

josh
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RFC 02/15 v5] gpio: Add sysfs support to block GPIO API

2012-10-19 Thread Roland Stigge
On 10/19/2012 12:35 PM, Linus Walleij wrote:
 On Thu, Oct 18, 2012 at 12:07 PM, Roland Stigge sti...@antcom.de wrote:
 On 10/17/2012 09:05 PM, Greg KH wrote:

 +if (value != exported) {
 +if (value)
 +status = gpio_block_value_export(block);
 +else
 +status = gpio_block_value_unexport(block);

 That looks like a recipie for disaster.  Why do you allow userspace to
 do this?

 Exporting for gpio blocks is done as follows: writing 1 to the
 exported _device_ attribute of the gpio block creates the values
 attribute and at the same time requests the whole block (including all
 of its gpios) as sysfs.
 
 To me it reads like Greg's comment is basically pinpointing a flaw
 in Brownell's initial design of gpio sysfs: that new sysfs files are
 created and destroyed by writing into sysfs */export files from
 userspace?
 
 See commit: d8f388d8dc8d4f36539dd37c1fff62cc404ea0fc
 
 The block GPIO stuff is just following that design pattern.

So what do you think about my just proposed idea of always having the
value argument present, but only useable when exported is 1? Now
only talking about the block gpios, but later maybe also for gpios?

But I would only do this if you and Greg consider it reasonable.

Thanks in advance,

Roland
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] x86: ce4100: various fixes

2012-10-19 Thread Florian Fainelli
Hi all,

This patch serie contains shutdown/reboot fixes for the CE4100 platform as
well as a PCI controller fix for devices without an interrupt line.

Florian Fainelli (3):
  x86: ce4100: implement pm_poweroff
  x86: ce4100: force reboot method to be KBD
  x86: ce4100: fixup PCI configuration register access for devices
without interrupts

 arch/x86/pci/ce4100.c |   13 +
 arch/x86/platform/ce4100/ce4100.c |   10 ++
 2 files changed, 23 insertions(+)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] x86: ce4100: implement pm_poweroff

2012-10-19 Thread Florian Fainelli
The CE4100 platform is currently missing a proper pm_poweroff implementation
leading to poweroff making the CPU spin forever and the CE4100 platform does
not enter a low-power mode where the external Power Management Unit can
properly power off the system. Power off on this platform is implemented pretty
much like reboot, by writing to the SoC built-in 8051 microcontroller mapped at
I/O port 0xcf9, the value 0x4.

Signed-off-by: Florian Fainelli ffaine...@freebox.fr
---
 arch/x86/platform/ce4100/ce4100.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/platform/ce4100/ce4100.c 
b/arch/x86/platform/ce4100/ce4100.c
index 4c61b52..74f8774 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -27,6 +27,11 @@ static int ce4100_i8042_detect(void)
return 0;
 }
 
+static void ce4100_power_off(void)
+{
+   outb(0x4, 0xcf9);
+}
+
 #ifdef CONFIG_SERIAL_8250
 
 static unsigned int mem_serial_in(struct uart_port *p, int offset)
@@ -143,4 +148,6 @@ void __init x86_ce4100_early_setup(void)
x86_init.pci.init_irq = sdv_pci_init;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
 #endif
+
+   pm_power_off = ce4100_power_off;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] x86: ce4100: force reboot method to be KBD

2012-10-19 Thread Florian Fainelli
From: Maxime Bizon mbi...@freebox.fr

The default reboot is via ACPI for this platform, and the CEFDK bootloader
actually supports this, but will issue a system power off instead of a real
reboot. Setting the reboot method to be KBD instead of ACPI ensures proper
system reboot.

Signed-off-by: Florian Fainelli ffaine...@freebox.fr
---
 arch/x86/platform/ce4100/ce4100.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/platform/ce4100/ce4100.c 
b/arch/x86/platform/ce4100/ce4100.c
index 74f8774..8c9ed9a 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -21,6 +21,7 @@
 #include asm/i8259.h
 #include asm/io.h
 #include asm/io_apic.h
+#include asm/emergency-restart.h
 
 static int ce4100_i8042_detect(void)
 {
@@ -144,6 +145,8 @@ void __init x86_ce4100_early_setup(void)
x86_init.mpparse.find_smp_config = x86_init_noop;
x86_init.pci.init = ce4100_pci_init;
 
+   reboot_type = BOOT_KBD;
+
 #ifdef CONFIG_X86_IO_APIC
x86_init.pci.init_irq = sdv_pci_init;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] x86: ce4100: fixup PCI configuration register access for devices without interrupts

2012-10-19 Thread Florian Fainelli
From: Maxime Bizon mbi...@freebox.fr

Some CE4100 devices such as the:
- DFX module (01:0b.7)
- entertainment encryption device (01:10.0)
- multimedia controller (01:12.0)

do not have a device interrupt at all. This patch fixes the PCI controller
code to declare the missing PCI configuration register space, as well as a
fixup method for forcing the interrupt pin to be 0 for these devices. This is
required to ensure that pci drivers matching on these devices will be able to
call honor the various PCI subsystem calls touching the configuration space.

Signed-off-by: Florian Fainelli ffaine...@freebox.fr
---
 arch/x86/pci/ce4100.c |   13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 41bd2a2..b914e20 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -115,6 +115,16 @@ static void sata_revid_read(struct sim_dev_reg *reg, u32 
*value)
reg_read(reg, value);
 }
 
+static void reg_noirq_read(struct sim_dev_reg *reg, u32 *value)
+{
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(pci_config_lock, flags);
+   /* force interrupt pin value to 0 */
+   *value = reg-sim_reg.value  0xfff00ff;
+   raw_spin_unlock_irqrestore(pci_config_lock, flags);
+}
+
 static struct sim_dev_reg bus1_fixups[] = {
DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
@@ -144,6 +154,7 @@ static struct sim_dev_reg bus1_fixups[] = {
DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
+   DEFINE_REG(11, 7, 0x3c, 256, reg_init, reg_noirq_read, reg_write)
DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
@@ -161,8 +172,10 @@ static struct sim_dev_reg bus1_fixups[] = {
DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
+   DEFINE_REG(16, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write)
DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
+   DEFINE_REG(18, 0, 0x3c, 256, reg_init, reg_noirq_read, reg_write)
 };
 
 static void __init init_sim_regs(void)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[3.6.2] oops @ opteron server: mgag200 Fatal error during GPU init

2012-10-19 Thread Paweł Sikora
Hi,

on the new opteron server i'm observing an oops during matrox video 
initialization.
here's the dmesg from pure 3.6.2 kernel:

[   20.598985] [drm] Initialized drm 1.1.0 20060810
[   20.642302] [drm:mga_vram_init] *ERROR* can't reserve VRAM
[   20.642307] mgag200 :01:04.0: Fatal error during GPU init: -6
[   20.642319] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[   20.664413] IP: [a03c364f] drm_mode_config_cleanup+0x1f/0x1c0 [drm]
[   20.675905] PGD 40869b067 PUD 4086a4067 PMD 0 
[   20.687362] Oops:  [#1] SMP 
[   20.698748] Modules linked in: igb(+) usb_storage(+) mgag200(+) ttm 
crc32c_intel ghash_clmulni_intel drm_kms_helper drm aesni_intel usb_libusual 
dca ablk_helper uas i2c_algo_bit sysimgblt cryptd sysfillrect syscopyarea ptp 
aes_x86_64 pps_core evdev joydev pcspkr aes_generic hid_generic fam15h_power(+) 
i2c_piix4(+) atiixp(+) k10temp i2c_core microcode ide_core amd64_edac_mod 
edac_core hwmon edac_mce_amd processor button uhci_hcd ext3 jbd mbcache raid1 
md_mod usbhid hid ohci_hcd ehci_hcd usbcore usb_common uvesafb sd_mod 
crc_t10dif ahci libahci libata scsi_mod
[   20.750381] CPU 12 
[   20.750478] Pid: 463, comm: udevd Not tainted 3.6.2 #4 Supermicro H8DGU/H8DGU
[   20.776696] RIP: 0010:[a03c364f]  [a03c364f] 
drm_mode_config_cleanup+0x1f/0x1c0 [drm]
[   20.790249] RSP: 0018:8804086a3a88  EFLAGS: 00010296
[   20.803729] RAX:  RBX: 881007f41000 RCX: 0043
[   20.817409] RDX:  RSI: 0046 RDI: 881008d83000
[   20.831003] RBP: 8804086a3aa8 R08: 000a R09: 03ff
[   20.844580] R10:  R11: 03fe R12: 881008d83000
[   20.858085] R13: 881008d83460 R14: 881007f41000 R15: 881008d833a0
[   20.871607] FS:  7fc87267c800() GS:88101ec0() 
knlGS:
[   20.885316] CS:  0010 DS:  ES:  CR0: 80050033
[   20.899017] CR2:  CR3: 00040869a000 CR4: 000407e0
[   20.912916] DR0:  DR1:  DR2: 
[   20.926724] DR3:  DR6: 0ff0 DR7: 0400
[   20.940450] Process udevd (pid: 463, threadinfo 8804086a2000, task 
88040846ee00)
[   20.942880] Probing IDE interface ide1...
[   20.968028] Stack:
[   20.981616]  881007f41000 881007f41000 881008d83000 
a029a8e0
[   20.995514]  8804086a3ac8 a02942c7 fffa 
881008ddd000
[   21.009470]  8804086a3b58 a029462e 8804086a3af8 
a03c11a1
[   21.023443] Call Trace:
[   21.037295]  [a02942c7] mgag200_driver_unload+0x37/0x70 [mgag200]
[   21.051493]  [a029462e] mgag200_driver_load+0x32e/0x4b0 [mgag200]
[   21.065600]  [a03c11a1] ? drm_sysfs_device_add+0x81/0xb0 [drm]
[   21.079699]  [a03bd469] ? drm_get_minor+0x259/0x2f0 [drm]
[   21.093733]  [a03bfaae] drm_get_pci_dev+0x17e/0x2c0 [drm]
[   21.107675]  [a0299405] mga_pci_probe+0xb1/0xb9 [mgag200]
[   21.121582]  [8127f854] local_pci_probe+0x74/0x100
[   21.135386]  [8127f9f1] pci_device_probe+0x111/0x120
[   21.149106]  [813319e6] driver_probe_device+0x76/0x240
[   21.162801]  [81331c4b] __driver_attach+0x9b/0xa0
[   21.176411]  [81331bb0] ? driver_probe_device+0x240/0x240
[   21.190062]  [8132fd4d] bus_for_each_dev+0x4d/0x90
[   21.203724]  [81331509] driver_attach+0x19/0x20
[   21.217443]  [81331100] bus_add_driver+0x190/0x260
[   21.231260]  [a02c5000] ? 0xa02c4fff
[   21.245155]  [a02c5000] ? 0xa02c4fff
[   21.259047]  [813322d2] driver_register+0x72/0x170
[   21.272998]  [a02c5000] ? 0xa02c4fff
[   21.286900]  [8127e6c9] __pci_register_driver+0x59/0xd0
[   21.300840]  [a02c5000] ? 0xa02c4fff
[   21.314682]  [a03bfd0a] drm_pci_init+0x11a/0x130 [drm]
[   21.328540]  [a02c5000] ? 0xa02c4fff
[   21.342301]  [a02c5032] mgag200_init+0x32/0x1000 [mgag200]
[   21.356065]  [81002122] do_one_initcall+0x122/0x170
[   21.369741]  [810aa176] sys_init_module+0xfe6/0x1e50
[   21.383355]  [810a6920] ? free_notes_attrs+0x60/0x60
[   21.396935]  [814ae579] system_call_fastpath+0x16/0x1b
[   21.410479] Code: 5d 41 5e 5d c3 0f 1f 80 00 00 00 00 55 48 89 e5 41 55 41 
54 49 89 fc 4d 8d ac 24 60 04 00 00 53 48 83 ec 08 48 8b 87 60 04 00 00 48 8b 
18 48 8d 78 f8 48 83 eb 08 49 39 c5 74 1c 90 48 8b 47 40 
[   21.439403] RIP  [a03c364f] drm_mode_config_cleanup+0x1f/0x1c0 
[drm]
[   21.453651]  RSP 8804086a3a88
[   21.467829] CR2: 
[   21.481651] ---[ end trace ecb4d159319307e6 ]---


01:04.0 VGA compatible controller: Matrox Electronics Systems Ltd. MGA G200eW 
WPCM450 (rev 0a) (prog-if 00 [VGA controller])
Subsystem: Super Micro Computer 

[PATCH] pstore/ram: fix undefined usage of rounddown_pow_of_two.

2012-10-19 Thread Florian Fainelli
From: Maxime Bizon mbi...@freebox.fr

record_size / console_size / ftrace_size can be 0 (this is how you
disable the feature), but rounddown_pow_of_two(0) is undefined. This problem
has been present since commit 1894a253 (ramoops: Move to fs/pstore/ram.c).

Signed-off-by: Maxime Bizon mbi...@freebox.fr
Signed-off-by: Florian Fainelli ffaine...@freebox.fr
CC: sta...@vger.kernel.org
---
 fs/pstore/ram.c |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1a4f6da..0c2ae26 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -374,10 +374,14 @@ static int __devinit ramoops_probe(struct platform_device 
*pdev)
goto fail_out;
}
 
-   pdata-mem_size = rounddown_pow_of_two(pdata-mem_size);
-   pdata-record_size = rounddown_pow_of_two(pdata-record_size);
-   pdata-console_size = rounddown_pow_of_two(pdata-console_size);
-   pdata-ftrace_size = rounddown_pow_of_two(pdata-ftrace_size);
+   if (pdata-mem_size)
+   pdata-mem_size = rounddown_pow_of_two(pdata-mem_size);
+   if (pdata-record_size)
+   pdata-record_size = rounddown_pow_of_two(pdata-record_size);
+   if (pdata-console_size)
+   pdata-console_size = rounddown_pow_of_two(pdata-console_size);
+   if (pdata-ftrace_size)
+   pdata-ftrace_size = rounddown_pow_of_two(pdata-ftrace_size);
 
cxt-dump_read_cnt = 0;
cxt-size = pdata-mem_size;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:numa/core] numa: Add credits for NUMA placement

2012-10-19 Thread tip-bot for Rik van Riel
Commit-ID:  c1a305006e4dd428001852923c11806d754db9f1
Gitweb: http://git.kernel.org/tip/c1a305006e4dd428001852923c11806d754db9f1
Author: Rik van Riel r...@redhat.com
AuthorDate: Thu, 18 Oct 2012 17:19:28 -0400
Committer:  Ingo Molnar mi...@kernel.org
CommitDate: Fri, 19 Oct 2012 13:45:48 +0200

numa: Add credits for NUMA placement

The NUMA placement code has been rewritten several times, but
the basic ideas took a lot of work to develop. The people who
put in the work deserve credit for it. Thanks Andrea  Peter :)

[ The Documentation/scheduler/numa-problem.txt file should
  probably be rewritten once we figure out the final details of
  what the NUMA code needs to do, and why. ]

Signed-off-by: Rik van Riel r...@redhat.com
Acked-by: Peter Zijlstra a.p.zijls...@chello.nl
Cc: aarca...@redhat.com
Cc: Linus Torvalds torva...@linux-foundation.org
Cc: Andrew Morton a...@linux-foundation.org
Link: http://lkml.kernel.org/r/20121018171928.24d06...@cuia.bos.redhat.com
Signed-off-by: Ingo Molnar mi...@kernel.org

This is against tip.git numa/core
---
 CREDITS |1 +
 kernel/sched/fair.c |3 +++
 mm/memory.c |2 ++
 3 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/CREDITS b/CREDITS
index d8fe12a..b4cdc8f 100644
--- a/CREDITS
+++ b/CREDITS
@@ -125,6 +125,7 @@ D: Author of pscan that helps to fix lp/parport bugs
 D: Author of lil (Linux Interrupt Latency benchmark)
 D: Fixed the shm swap deallocation at swapoff time (try_to_unuse message)
 D: VM hacker
+D: NUMA task placement
 D: Various other kernel hacks
 S: Imola 40026
 S: Italy
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1e24aa1..e93032d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -18,6 +18,9 @@
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra pzijl...@redhat.com
+ *
+ *  NUMA placement, statistics and algorithm by Andrea Arcangeli,
+ *  CFS balancing changes by Peter Zijlstra. Copyright (C) 2012 Red Hat, Inc.
  */
 
 #include linux/latencytop.h
diff --git a/mm/memory.c b/mm/memory.c
index fc48fe8..9e56a44 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -36,6 +36,8 @@
  * (gerhard.wich...@pdb.siemens.de)
  *
  * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
+ *
+ * 2012 - NUMA placement page faults (Andrea Arcangeli, Peter Zijlstra)
  */
 
 #include linux/kernel_stat.h
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [RFC PATCH v3 00/16] DMA Engine support for AM33XX

2012-10-19 Thread Bedia, Vaibhav
On Fri, Oct 19, 2012 at 16:45:58, Porter, Matt wrote:
 On Fri, Oct 19, 2012 at 10:26:20AM +, Bedia, Vaibhav wrote:
[...]
  
  I didn't see all the patches that you posted on edma-dmaengine-v3
  but I do seem them on edma-dmaengine-am33xx-v3 branch.
 
 I see I referenced the wrong branch in the cover letter. Thanks for
 testing and noticing this. Sorry to make you hunt for the correct
 branch in that repo. ;) 
 

No problem.

 https://github.com/ohporter/linux/tree/edma-dmaengine-am33xx-v3
 is indeed the correct branch for those wanting to pull this in or
 grab some of the not-to-be-merged drivers I used for testing.
 
  I added a couple of patches to enable earlyprintk and build the DTB
  appended kernel image uImage-dtb.am335x-evm
  
  Here's what i see
  
  [...]
 
 snip
 
  [0.175354] edma: probe of 4900.edma failed with error -16
 
 I missed an uninitialized pdata case in the bug fixes mentioned in
 the changelog and the folks previously failing the same way didn't
 hit the case I suspect you are hitting. Can you try this and let me
 know how it works?
 

That doesn't help :(

Looking at the original crash log, I suspect something is not correct
with the irq portion, probably in the DT or the driver. 

genirq: Flags mismatch irq 28.  (edma) vs.  (edma)

The warning below that is coming due to fail case in edma_probe not tracking
the request_irq status properly and but IMO that's a separate issue.

BTW, I am trying this on the EVM.

Regards,
Vaibhav
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kconfig/menuconfig: use TAILQ instead of CIRCLEQ

2012-10-19 Thread Tetsuo Handa
Yann E. MORIN wrote:
 Some systems (eg. Cygwin, FreeBSD) are missing the CIRCLEQ macros.
 They were removed in Y2000 from FreeBSD:
 http://svnweb.freebsd.org/base?view=revisionrevision=70469
 
 The reason was that TAILQ are perfectly capable of doing the exact
 same things:
 
 http://www.mavetju.org/mail/view_thread.php?list=freebsd-archid=915145thread=yes
 
 (Thank Yaakov for the pointers!)
 
 So, switch to using TAILQ instead, which are more portable.
 
 Reported-by: Tetsuo Handa penguin-ker...@i-love.sakura.ne.jp
 Reported-by: Benjamin Poirier bpoir...@suse.de
 Signed-off-by: Yann E. MORIN yann.morin.1...@free.fr
 Cc: Yaakov Selkowitz yselkow...@gmail.com
 ---
  scripts/kconfig/expr.h  |4 ++--
  scripts/kconfig/mconf.c |4 ++--
  scripts/kconfig/menu.c  |6 +++---
  3 files changed, 7 insertions(+), 7 deletions(-)
 
Excuse me, but your patch does not solve my problem because kconfig started
using macros which does not exist in @(#)queue.h 8.3 (Berkeley) 12/13/93.
Kconfig still fails after applying your patch:

  HOSTCC  scripts/kconfig/mconf.o
scripts/kconfig/mconf.c: In function `update_text':
scripts/kconfig/mconf.c:326: warning: implicit declaration of function 
`TAILQ_FOREACH'
scripts/kconfig/mconf.c:326: error: `entries' undeclared (first use in this 
function)
scripts/kconfig/mconf.c:326: error: (Each undeclared identifier is reported 
only once
scripts/kconfig/mconf.c:326: error: for each function it appears in.)
scripts/kconfig/mconf.c:326: error: syntax error before '{' token
scripts/kconfig/mconf.c:333: error: `header' undeclared (first use in this 
function)
scripts/kconfig/mconf.c: At top level:
scripts/kconfig/mconf.c:343: error: syntax error before '}' token
scripts/kconfig/mconf.c: In function `search_conf':
scripts/kconfig/mconf.c:378: warning: implicit declaration of function 
`TAILQ_HEAD_INITIALIZER'
scripts/kconfig/mconf.c:378: error: invalid initializer
make[1]: *** [scripts/kconfig/mconf.o] Error 1
make: *** [menuconfig] Error 2

So, would you add something which looks like sed -e 's/CIRCLEQ/TAILQ/g' upon
https://lkml.org/lkml/2012/10/16/274 ?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/4] power/jz4740.c: Simplify exit of function

2012-10-19 Thread Lars-Peter Clausen
On 10/19/2012 01:41 AM, Marcos Paulo de Souza wrote:
 This commit simplifies the exit of probe function, by returning
 directly when it don't need to release any resources.
 
 Signed-off-by: Marcos Paulo de Souza marcos.souza@gmail.com

Looks good, but I'd just fold the relevant parts in the previous patches.

 ---
  drivers/power/jz4740-battery.c |   17 ++---
  1 file changed, 6 insertions(+), 11 deletions(-)
 
 diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c
 index b80f7ed..0d6c9e4 100644
 --- a/drivers/power/jz4740-battery.c
 +++ b/drivers/power/jz4740-battery.c
 @@ -262,32 +262,28 @@ static int __devinit jz_battery_probe(struct 
 platform_device *pdev)
  
   jz_battery-irq = platform_get_irq(pdev, 0);
   if (jz_battery-irq  0) {
 - ret = jz_battery-irq;
   dev_err(pdev-dev, Failed to get platform irq: %d\n, ret);
 - goto err;
 + return jz_battery-irq;
   }
  
   jz_battery-mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
   if (!jz_battery-mem) {
 - ret = -ENOENT;
   dev_err(pdev-dev, Failed to get platform mmio resource\n);
 - goto err;
 + return -ENOENT;
   }
  
   jz_battery-mem = devm_request_mem_region(pdev-dev, 
 jz_battery-mem-start,
   resource_size(jz_battery-mem), pdev-name);
   if (!jz_battery-mem) {
 - ret = -EBUSY;
   dev_err(pdev-dev, Failed to request mmio memory region\n);
 - goto err;
 + return -EBUSY;
   }
  
   jz_battery-base = devm_ioremap_nocache(pdev-dev, 
 jz_battery-mem-start,
   resource_size(jz_battery-mem));
   if (!jz_battery-base) {
 - ret = -EBUSY;
   dev_err(pdev-dev, Failed to ioremap mmio memory\n);
 - goto err;
 + return -EBUSY;
   }
  
   battery = jz_battery-battery;
 @@ -311,7 +307,7 @@ static int __devinit jz_battery_probe(struct 
 platform_device *pdev)
   0, pdev-name, jz_battery);
   if (ret) {
   dev_err(pdev-dev, Failed to request irq %d\n, ret);
 - goto err;
 + return ret;
   }
   disable_irq(jz_battery-irq);
  
 @@ -319,7 +315,7 @@ static int __devinit jz_battery_probe(struct 
 platform_device *pdev)
   ret = gpio_request(pdata-gpio_charge, dev_name(pdev-dev));
   if (ret) {
   dev_err(pdev-dev, charger state gpio request 
 failed.\n);
 - goto err;
 + return ret;
   }
   ret = gpio_direction_input(pdata-gpio_charge);
   if (ret) {
 @@ -364,7 +360,6 @@ static int __devinit jz_battery_probe(struct 
 platform_device *pdev)
  err_free_gpio:
   if (gpio_is_valid(pdata-gpio_charge))
   gpio_free(jz_battery-pdata-gpio_charge);
 -err:
   platform_set_drvdata(pdev, NULL);
   return ret;
  }

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 15/16] sched: implement usage tracking

2012-10-19 Thread Vincent Guittot
Hi Paul,

I think that you have forgot to reset .usage_avg_sum in the
__sched_fork as it's already done for .runnable_avg_sum and
.usage_avg_sum

And it seems that this reset is not corrected in the latest version in
your git repo:
http://git.kernel.org/?p=linux/kernel/git/pjt/sched.git;a=blob;f=kernel/sched/core.c;h=df55e2ecdd2398648c7d01e318070d06b845a5b0;hb=refs/heads/load_tracking#l1535

Regards,
Vincent

On 23 August 2012 16:14,  p...@google.com wrote:
 From: Paul Turner p...@google.com

 With the frame-work for runnable tracking now fully in place.  Per-entity 
 usage
 tracking is a simple and low-overhead addition.

 Signed-off-by: Paul Turner p...@google.com
 Reviewed-by: Ben Segall bseg...@google.com
 ---
  include/linux/sched.h |1 +
  kernel/sched/debug.c  |3 +++
  kernel/sched/fair.c   |   33 -
  kernel/sched/sched.h  |4 ++--
  4 files changed, 34 insertions(+), 7 deletions(-)

 diff --git a/include/linux/sched.h b/include/linux/sched.h
 index 93e27c0..2a4be1f 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -1150,6 +1150,7 @@ struct sched_avg {
 u64 last_runnable_update;
 s64 decay_count;
 unsigned long load_avg_contrib;
 +   u32 usage_avg_sum;
  };

  #ifdef CONFIG_SCHEDSTATS
 diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
 index 2cd3c1b..b9d54d0 100644
 --- a/kernel/sched/debug.c
 +++ b/kernel/sched/debug.c
 @@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int 
 cpu, struct task_group
  #ifdef CONFIG_SMP
 P(se-avg.runnable_avg_sum);
 P(se-avg.runnable_avg_period);
 +   P(se-avg.usage_avg_sum);
 P(se-avg.load_avg_contrib);
 P(se-avg.decay_count);
  #endif
 @@ -230,6 +231,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct 
 cfs_rq *cfs_rq)
 cfs_rq-tg_runnable_contrib);
 SEQ_printf(m,   .%-30s: %d\n, tg-runnable_avg,
 atomic_read(cfs_rq-tg-runnable_avg));
 +   SEQ_printf(m,   .%-30s: %d\n, tg-usage_avg,
 +   atomic_read(cfs_rq-tg-usage_avg));
  #endif

 print_cfs_group_stats(m, cpu, cfs_rq-tg);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
 index b249371..44a9a11 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
 @@ -994,7 +994,8 @@ static u32 __compute_runnable_contrib(u64 n)
   */
  static __always_inline int __update_entity_runnable_avg(u64 now,
 struct sched_avg *sa,
 -   int runnable)
 +   int runnable,
 +   int running)
  {
 u64 delta, periods;
 u32 runnable_contrib;
 @@ -1033,6 +1034,8 @@ static __always_inline int 
 __update_entity_runnable_avg(u64 now,
 delta_w = 1024 - delta_w;
 if (runnable)
 sa-runnable_avg_sum += delta_w;
 +   if (running)
 +   sa-usage_avg_sum += delta_w;
 sa-runnable_avg_period += delta_w;

 delta -= delta_w;
 @@ -1045,17 +1048,22 @@ static __always_inline int 
 __update_entity_runnable_avg(u64 now,
   periods + 1);
 sa-runnable_avg_period = decay_load(sa-runnable_avg_period,
  periods + 1);
 +   sa-usage_avg_sum = decay_load(sa-usage_avg_sum, periods + 
 1);

 /* Efficiently calculate \sum (1..n_period) 1024*y^i */
 runnable_contrib = __compute_runnable_contrib(periods);
 if (runnable)
 sa-runnable_avg_sum += runnable_contrib;
 +   if (running)
 +   sa-usage_avg_sum += runnable_contrib;
 sa-runnable_avg_period += runnable_contrib;
 }

 /* Remainder of delta accrued against u_0` */
 if (runnable)
 sa-runnable_avg_sum += delta;
 +   if (running)
 +   sa-usage_avg_sum += delta;
 sa-runnable_avg_period += delta;

 return decayed;
 @@ -1101,16 +1109,28 @@ static inline void __update_tg_runnable_avg(struct 
 sched_avg *sa,
   struct cfs_rq *cfs_rq)
  {
 struct task_group *tg = cfs_rq-tg;
 -   long contrib;
 +   long contrib, usage_contrib;

 /* The fraction of a cpu used by this cfs_rq */
 contrib = div_u64(sa-runnable_avg_sum  NICE_0_SHIFT,
   sa-runnable_avg_period + 1);
 contrib -= cfs_rq-tg_runnable_contrib;

 -   if (abs(contrib)  cfs_rq-tg_runnable_contrib / 64) {
 +   usage_contrib = div_u64(sa-usage_avg_sum  NICE_0_SHIFT,
 +   sa-runnable_avg_period + 1);
 +   usage_contrib -= 

[RFC] linux/time.h vs. sys/time.h mess (was [PATCH 1/2] [media] remove include/linux/dvb/dmx.h)

2012-10-19 Thread Laurent Pinchart
Hi Mauro,

(CC'ing LKML)

On Friday 19 October 2012 08:21:16 Mauro Carvalho Chehab wrote:
 Em Fri, 19 Oct 2012 07:43:11 -0300
 
 Mauro Carvalho Chehab mche...@redhat.com escreveu:
  -#include linux/time.h
  -#include uapi/linux/dvb/dmx.h
  -
  -#endif /*_DVBDMX_H_*/
 
 Just to not discard a valid comment on IRC, Laurent proposed that we
 should investigate if we can, instead, move:
 
   #include linux/time.h
 
 to both dmx.h and videodev2.h, letting it to be included by both userspace
 and Kernelspace.
 
 I remember this used to cause compilation breakage in the past, as some
 userspace programs need to include sys/time.h and this used to conflict
 with linux/time.h.
 
 I'm not sure if this got fixed there. if so, Laurent has a point.

It's still not solved, but that's what the proper fix should be.

Several UAPI headers use struct timeval or struct timespec. Kernel code and 
user space code thus need to include the header(s) that define those 
structures, either directly or indirectly.

In kernel space struct timeval and struct timespec are defined in 
include/uapi/linux/time.h. In user space they're defined in sys/time.h. No 
proper conditional compilation exists in those headers to guard against 
multiple definitions, so they can't be included together.

On the kernel side sys/time.h isn't available, so we can include 
linux/time.h in the headers that use the timeval and timespec structures. 
This self-contained headers mechanism avoids forcing all users of those 
headers to explicitly include linux/time.h.

However, this then breaks user space applications that include both 
sys/time.h and a kernel header that includes linux/time.h. The way we've 
dealt with that until now is by including either linux/time.h or 
sys/time.h depending on __KERNEL__

#ifdef __KERNEL__
#include linux/time.h
#else
#include sys/kernel.h
#endif

in our user-facing headers. The recent UAPI disintegration patches resulted in 
nearly empty headers in include/linux/ that just #include both linux/time.h 
and the corresponding UAPI header. For instance include/linux/videodev2.h is 
now just

#ifndef __LINUX_VIDEODEV2_H
#define __LINUX_VIDEODEV2_H

#include linux/time.h /* need struct timeval */
#include uapi/linux/videodev2.h

#endif /* __LINUX_VIDEODEV2_H */

Patches have been posted to remove those headers and push the #include 
linux/time.h one level up, which breaks the self-contained headers 
concept.

How could we fix this ? Are there legitimate users of linux/time.h in user 
space ? A quick grep in glibc doesn't reveal anything.

-- 
Regards,

Laurent Pinchart

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] x86/efi: Fix oops caused by incorrect set_memory_uc() usage

2012-10-19 Thread Matt Fleming
From: Matt Fleming matt.flem...@intel.com

Calling __pa() with an ioremap'd address is invalid. If we
encounter an efi_memory_desc_t without EFI_MEMORY_WB set in
-attribute we currently call set_memory_uc(), which in turn
calls __pa() on a potentially ioremap'd address. On
CONFIG_X86_32 this results in the following oops,

  BUG: unable to handle kernel paging request at f7f22280
  IP: [c10257b9] reserve_ram_pages_type+0x89/0x210
  *pdpt = 01978001 *pde = 01ffb067 *pte = 
  Oops:  [#1] PREEMPT SMP
  Modules linked in:

  Pid: 0, comm: swapper Not tainted 3.0.0-acpi-efi-0805 #3
   EIP: 0060:[c10257b9] EFLAGS: 00010202 CPU: 0
   EIP is at reserve_ram_pages_type+0x89/0x210
   EAX: 0070e280 EBX: 38714000 ECX: f7814000 EDX: 
   ESI:  EDI: 38715000 EBP: c189fef0 ESP: c189fea8
   DS: 007b ES: 007b FS: 00d8 GS:  SS: 0068
  Process swapper (pid: 0, ti=c189e000 task=c18bbe60 task.ti=c189e000)
  Stack:
   8200 ff108000  c189ff00 00038714   c189fed0
   c104f8ca 00038714  00038715   00038715 
   0010 38715000 c189ff48 c1025aff 38715000  0010 
  Call Trace:
   [c104f8ca] ? page_is_ram+0x1a/0x40
   [c1025aff] reserve_memtype+0xdf/0x2f0
   [c1024dc9] set_memory_uc+0x49/0xa0
   [c19334d0] efi_enter_virtual_mode+0x1c2/0x3aa
   [c19216d4] start_kernel+0x291/0x2f2
   [c19211c7] ? loglevel+0x1b/0x1b
   [c19210bf] i386_start_kernel+0xbf/0xc8

The only time we can call set_memory_uc() for a memory region is when
it is part of the direct kernel mapping. For the case where we ioremap
a memory region we must leave it alone.

This patch reimplements the fix from e8c7106280a3 (x86, efi: Calling
__pa() with an ioremap()ed address is invalid) which was reverted in
e1ad783b12ec because it caused a regression on some MacBooks (they
hung at boot). The regression was caused because the commit only
marked EFI_RUNTIME_SERVICES_DATA as E820_RESERVED_EFI, when it should
have marked all regions that have the EFI_MEMORY_RUNTIME
attribute.

Despite first impressions, it's not possible to use ioremap_cache() to
map all cached memory regions on CONFIG_X86_64 because of the way that
the memory map might be configured as detailed in the following bug
report,

https://bugzilla.redhat.com/show_bug.cgi?id=748516

e.g. some of the EFI memory regions *need* to be mapped as part of the
direct kernel mapping.

Signed-off-by: Matt Fleming matt.flem...@intel.com
Cc: H. Peter Anvin h...@zytor.com
Cc: Ingo Molnar mi...@kernel.org
Cc: Matthew Garrett m...@redhat.com
Cc: Zhang Rui rui.zh...@intel.com
Cc: Huang Ying huang.ying.cari...@gmail.com
Cc: Keith Packard kei...@keithp.com
---
 arch/x86/include/asm/efi.h |  5 +++--
 arch/x86/platform/efi/efi.c| 29 ++---
 arch/x86/platform/efi/efi_64.c |  7 +--
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ae3bf3b..e56fde0 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -35,7 +35,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)  \
efi_call_virt(f, a1, a2, a3, a4, a5, a6)
 
-#define efi_ioremap(addr, size, type)  ioremap_cache(addr, size)
+#define efi_ioremap(addr, size, type, attr)ioremap_cache(addr, size)
 
 #else /* !CONFIG_X86_32 */
 
@@ -103,7 +103,7 @@ extern void efi_call_virt_epilog(unsigned long);
  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
-u32 type);
+u32 type, u64 attribute);
 
 #endif /* CONFIG_X86_32 */
 
@@ -112,6 +112,7 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool 
executable);
 extern int efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
+extern void efi_memory_uc(u64 addr, unsigned long size);
 
 #ifndef CONFIG_EFI
 /*
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7578344..7679689 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -788,6 +788,16 @@ void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
return NULL;
 }
 
+void efi_memory_uc(u64 addr, unsigned long size)
+{
+   unsigned long page_shift = 1UL  EFI_PAGE_SHIFT;
+   u64 npages;
+
+   npages = round_up(size, page_shift) / page_shift;
+   memrange_efi_to_native(addr, npages);
+   set_memory_uc(addr, npages);
+}
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
@@ -801,7 +811,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
-   u64 end, systab, 

Re: [PATCH 1/3] mm/slob: Drop usage of page-private for storing page-sized allocations

2012-10-19 Thread Ezequiel Garcia
On Thu, Oct 18, 2012 at 7:46 PM, David Rientjes rient...@google.com wrote:
 On Thu, 18 Oct 2012, Ezequiel Garcia wrote:

 This field was being used to store size allocation so it could be
 retrieved by ksize(). However, it is a bad practice to not mark a page
 as a slab page and then use fields for special purposes.
 There is no need to store the allocated size and
 ksize() can simply return PAGE_SIZE  compound_order(page).

 Cc: Pekka Penberg penb...@kernel.org

 Is Pekka Penberg the long distant cousin of Pekka Enberg? :)  You should
 probably cc the author of slob, Matt Mackall m...@selenic.com, on slob
 patches.


Ouch! ;-)

I found another typo so I'll just re-send the whole patchset.

Thanks for the review!

Ezequiel
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/3] mm/slob: Drop usage of page-private for storing page-sized allocations

2012-10-19 Thread Ezequiel Garcia
This field was being used to store size allocation so it could be
retrieved by ksize(). However, it is a bad practice to not mark a page
as a slab page and then use fields for special purposes.
There is no need to store the allocated size and
ksize() can simply return PAGE_SIZE  compound_order(page).

Cc: Pekka Enberg penb...@kernel.org
Cc: Matt Mackall m...@selenic.com
Acked-by: Christoph Lameter c...@linux.com
Signed-off-by: Ezequiel Garcia elezegar...@gmail.com
---
Changes from v1:
 * Fix Pekka's last name and put Matt Mackall in Cc

 mm/slob.c |   24 ++--
 1 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/mm/slob.c b/mm/slob.c
index a08e468..06a5ec7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -28,9 +28,8 @@
  * from kmalloc are prepended with a 4-byte header with the kmalloc size.
  * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
  * alloc_pages() directly, allocating compound pages so the page order
- * does not have to be separately tracked, and also stores the exact
- * allocation size in page-private so that it can be used to accurately
- * provide ksize(). These objects are detected in kfree() because slob_page()
+ * does not have to be separately tracked.
+ * These objects are detected in kfree() because PageSlab()
  * is false for them.
  *
  * SLAB is emulated on top of SLOB by simply calling constructors and
@@ -455,11 +454,6 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, 
unsigned long caller)
if (likely(order))
gfp |= __GFP_COMP;
ret = slob_new_pages(gfp, order, node);
-   if (ret) {
-   struct page *page;
-   page = virt_to_page(ret);
-   page-private = size;
-   }
 
trace_kmalloc_node(caller, ret,
   size, PAGE_SIZE  order, gfp, node);
@@ -514,18 +508,20 @@ EXPORT_SYMBOL(kfree);
 size_t ksize(const void *block)
 {
struct page *sp;
+   int align;
+   unsigned int *m;
 
BUG_ON(!block);
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
 
sp = virt_to_page(block);
-   if (PageSlab(sp)) {
-   int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
-   unsigned int *m = (unsigned int *)(block - align);
-   return SLOB_UNITS(*m) * SLOB_UNIT;
-   } else
-   return sp-private;
+   if (unlikely(!PageSlab(sp)))
+   return PAGE_SIZE  compound_order(sp);
+
+   align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+   m = (unsigned int *)(block - align);
+   return SLOB_UNITS(*m) * SLOB_UNIT;
 }
 EXPORT_SYMBOL(ksize);
 
-- 
1.7.8.6

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] mm/slob: Use object_size field in kmem_cache_size()

2012-10-19 Thread Ezequiel Garcia
Fields object_size and size are not the same: the latter might include
slab metadata. Return object_size field in kmem_cache_size().
Also, improve trace accuracy by correctly tracing reported size.

Cc: Christoph Lameter c...@linux-foundation.org
Cc: Pekka Enberg penb...@kernel.org
Cc: Matt Mackall m...@selenic.com
Acked-by: David Rientjes rient...@google.com
Signed-off-by: Ezequiel Garcia elezegar...@gmail.com
---
 mm/slob.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/slob.c b/mm/slob.c
index 06a5ec7..287a88a 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -554,12 +554,12 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t 
flags, int node)
 
if (c-size  PAGE_SIZE) {
b = slob_alloc(c-size, flags, c-align, node);
-   trace_kmem_cache_alloc_node(_RET_IP_, b, c-size,
+   trace_kmem_cache_alloc_node(_RET_IP_, b, c-object_size,
SLOB_UNITS(c-size) * SLOB_UNIT,
flags, node);
} else {
b = slob_new_pages(flags, get_order(c-size), node);
-   trace_kmem_cache_alloc_node(_RET_IP_, b, c-size,
+   trace_kmem_cache_alloc_node(_RET_IP_, b, c-object_size,
PAGE_SIZE  get_order(c-size),
flags, node);
}
@@ -606,7 +606,7 @@ EXPORT_SYMBOL(kmem_cache_free);
 
 unsigned int kmem_cache_size(struct kmem_cache *c)
 {
-   return c-size;
+   return c-object_size;
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
-- 
1.7.8.6

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/3] mm/sl[aou]b: Move common kmem_cache_size() to slab.h

2012-10-19 Thread Ezequiel Garcia
This function is identically defined in all three allocators
and it's trivial to move it to slab.h

Since now it's static, inline, header-defined function
this patch also drops the EXPORT_SYMBOL tag.

Cc: Christoph Lameter c...@linux-foundation.org
Cc: Pekka Enberg penb...@kernel.org
Cc: Matt Mackall m...@selenic.com
Signed-off-by: Ezequiel Garcia elezegar...@gmail.com
---
Changes from v1:
 * Declare kmem_cache_size() static inline

 include/linux/slab.h |9 -
 mm/slab.c|6 --
 mm/slob.c|6 --
 mm/slub.c|9 -
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 83d1a14..743a104 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, 
size_t,
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-unsigned int kmem_cache_size(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, 
int node)
return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+/*
+ * Determine the size of a slab object
+ */
+static inline unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+   return s-object_size;
+}
+
 void __init kmem_cache_init_late(void);
 
 #endif /* _LINUX_SLAB_H */
diff --git a/mm/slab.c b/mm/slab.c
index 87c55b0..92a3fec 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3969,12 +3969,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
-   return cachep-object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index 287a88a..fffbc82 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -604,12 +604,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-unsigned int kmem_cache_size(struct kmem_cache *c)
-{
-   return c-object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 int __kmem_cache_shutdown(struct kmem_cache *c)
 {
/* No way to check for remaining objects */
diff --git a/mm/slub.c b/mm/slub.c
index a0d6984..1f826b0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3127,15 +3127,6 @@ error:
return -EINVAL;
 }
 
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-   return s-object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
const char *text)
 {
-- 
1.7.8.6

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] brw_mutex: big read-write mutex

2012-10-19 Thread Peter Zijlstra
On Thu, 2012-10-18 at 15:28 -0400, Mikulas Patocka wrote:
 
 On Thu, 18 Oct 2012, Oleg Nesterov wrote:
 
  Ooooh. And I just noticed include/linux/percpu-rwsem.h which does
  something similar. Certainly it was not in my tree when I started
  this patch... percpu_down_write() doesn't allow multiple writers,
  but the main problem it uses msleep(1). It should not, I think.
 
 synchronize_rcu() can sleep for hundred milliseconds, so msleep(1) is not 
 a big problem.

That code is beyond ugly though.. it should really not have been merged.

There's absolutely no reason for it to use RCU except to make it more
complicated. And as Oleg pointed out that msleep() is very ill
considered.

The very worst part of it seems to be that nobody who's usually involved
with locking primitives was ever consulted (Linus, PaulMck, Oleg, Ingo,
tglx, dhowells and me). It doesn't even have lockdep annotations :/

So the only reason you appear to use RCU is because you don't actually
have a sane way to wait for count==0. And I'm contesting rcu_sync() is
sane here -- for the very simple reason you still need while (count)
loop right after it.

So it appears you want an totally reader biased, sleepable rw-lock like
thing?

So did you consider keeping the inc/dec on the same per-cpu variable?
Yes this adds a potential remote access to dec and requires you to use
atomics, but I would not be surprised if the inc/dec were mostly on the
same cpu most of the times -- which might be plenty fast for what you
want.

If you've got coherent per-cpu counts, you can better do the
waitqueue/wake condition for write_down.

It might also make sense to do away with the mutex, there's no point in
serializing the wakeups in the p-locked case of down_read. Furthermore,
p-locked seems a complete duplicate of the mutex state, so removing the
mutex also removes that duplication.

Also, that CONFIG_x86 thing.. *shudder*...
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] arm/dts: AM33XX: Add SPI device tree data

2012-10-19 Thread Benoit Cousson
Hi Matt,

On 10/19/2012 01:30 PM, Matt Porter wrote:
 On Fri, Oct 19, 2012 at 10:24:15AM +0200, Benoit Cousson wrote:
 Hi Avinash,

 This look good to me except the: status = disabled.

 The disabled should be reserved for variant that does not contain the IP.
 Is it the case here?
 
 http://comments.gmane.org/gmane.linux.drivers.devicetree/18968 is what
 I've been going by with the DTS support in the EDMA dmaengine series. It
 does make the most sense to only enable what you need in the
 board.dts.

Thanks, I missed that thread. That being said, there is no real rational
:-)
It seems to be a preference more than anything else.
I'm curious now, why powerpc was not really using that approach?

I'd rather explicitly disable an IP than assuming than it is disabled by
default and then enabling it in the board file. But again it is just a
different view point, since at the end it will have the same effect.

If we really want the disabled state to be the default state, why is it
not disabled in the DT fmwk by default?

Regards,
Benoit

 
 -Matt
 
 On 09/18/2012 07:30 AM, Philip, Avinash wrote:
 Add McSPI data node to AM33XX device tree file. The McSPI module (and so
 as the driver) is reused from OMAP4.

 Signed-off-by: Philip, Avinash avinashphi...@ti.com
 Tested-by: Matt Porter mpor...@ti.com
 ---
 Changes since v1:
 - Corrected reg offset in reg DT entry.

 :100644 100644 ff3badb... 065fd54... M  arch/arm/boot/dts/am33xx.dtsi
  arch/arm/boot/dts/am33xx.dtsi |   25 +
  1 files changed, 25 insertions(+), 0 deletions(-)

 diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
 index ff3badb..065fd54 100644
 --- a/arch/arm/boot/dts/am33xx.dtsi
 +++ b/arch/arm/boot/dts/am33xx.dtsi
 @@ -219,5 +219,30 @@
 interrupt-parent = intc;
 interrupts = 91;
 };
 +
 +   spi0: spi@4803 {
 +   compatible = ti,omap4-mcspi;
 +   #address-cells = 1;
 +   #size-cells = 0;
 +   reg = 0x4803 0x400;
 +   interrupt-parent = intc;
 +   interrupt = 65;
 +   ti,spi-num-cs = 2;
 +   ti,hwmods = spi0;
 +   status = disabled;
 +
 +   };
 +
 +   spi1: spi@481a {
 +   compatible = ti,omap4-mcspi;
 +   #address-cells = 1;
 +   #size-cells = 0;
 +   reg = 0x481a 0x400;
 +   interrupt-parent = intc;
 +   interrupt = 125;
 +   ti,spi-num-cs = 2;
 +   ti,hwmods = spi1;
 +   status = disabled;
 +   };
 };
  };


 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/3] lp8788-charger: fix wrong ADC conversion

2012-10-19 Thread Lars-Peter Clausen
On 10/19/2012 02:12 AM, Kim, Milo wrote:
  To get the battery voltage and temperature, IIO ADC functions are used.
  LP8788 ADC driver provides RAW and SCALE channel information.
  This patch fixes wrong ADC result.
 
  Patch v2.
  Use simple iio_read_channel_processed() function rather than
  iio_read_channel_raw() and _scale().
 
  Fix the result type of ADC function as a signed integer.
  Because power_supply_propval.intval and the return value of
  iio_read_channel_processed() are a signed integer,
  'unsigned int' are replaced with 'int'.
 
  Patch v1.
  Fix wrong ADC results using iio_read_channel_raw() and _scale().
 
 Signed-off-by: Milo(Woogyom) Kim milo@ti.com

Looks good to me, fwiw:

Reviewed-by Lars-Peter Clausen l...@metafoo.de

But there is one issue, but this is not necessarily related to this patch,
more inline.

 ---
  drivers/power/lp8788-charger.c |   26 +++---
  1 file changed, 7 insertions(+), 19 deletions(-)
 
 diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c
 index 02fc9ab..f18ec8f 100644
 --- a/drivers/power/lp8788-charger.c
 +++ b/drivers/power/lp8788-charger.c
 @@ -235,25 +235,14 @@ static int lp8788_get_battery_present(struct 
 lp8788_charger *pchg,
   return 0;
  }
  
[...]
  static int lp8788_get_battery_voltage(struct lp8788_charger *pchg,
 @@ -268,7 +257,7 @@ static int lp8788_get_battery_capacity(struct 
 lp8788_charger *pchg,
   struct lp8788 *lp = pchg-lp;
   struct lp8788_charger_platform_data *pdata = pchg-pdata;
   unsigned int max_vbatt;
 - unsigned int vbatt;
 + int vbatt;
   enum lp8788_charging_state state;
   u8 data;
   int ret;
 @@ -304,19 +293,18 @@ static int lp8788_get_battery_temperature(struct 
 lp8788_charger *pchg,
   union power_supply_propval *val)
  {
   struct iio_channel *channel = pchg-chan[LP8788_BATT_TEMP];
 - int scaleint;
 - int scalepart;
 + int result;
   int ret;
  
   if (!channel)
   return -EINVAL;
  
 - ret = iio_read_channel_scale(channel, scaleint, scalepart);
 - if (ret != IIO_VAL_INT_PLUS_MICRO)
 + ret = iio_read_channel_processed(channel, result);
 + if (ret  0)
   return -EINVAL;
  
   /* unit: 0.1 'C */
 - val-intval = (scaleint + scalepart * 100) / 100;
 + val-intval = result * 10;

IIO reports temperatures in milli degree Celsius. So it should be multiplied
by 100 to get tenth degree like the power supply framework expects it. But
this might be a issue in your IIO driver reporting the wrong scale.

- Lars

  
   return 0;
  }

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: MAX_LOCKDEP_ENTRIES too low (called from ioc_release_fn)

2012-10-19 Thread Peter Zijlstra
On Fri, 2012-10-19 at 01:21 -0400, Dave Jones wrote:
   Not sure why you are CC'ing a call site, rather than the maintainers of
   the code. Just looks like lockdep is using too small a static value.
   Though it is pretty darn large...
 
 You're right, it's a huge chunk of memory.
 It looks like I can trigger this from multiple callsites..
 Another different trace below.
 
 Not sure why this suddenly got a lot worse in 3.7 

Did we add a static array of structures with locks in somewhere? Doing
that is a great way of blowing up the number of lock classes and the
resulting amount of lock dependency chains.

From Documentation/lockdep-design.txt; it talks about overflowing
MAX_LOCKDEP_KEYS, but I suppose its a good starts for overflowing the
dependency entries too, more classes means more dependencies after all.

---
Of course, if you do run out of lock classes, the next thing to do is
to find the offending lock classes.  First, the following command gives
you the number of lock classes currently in use along with the maximum:

grep lock-classes /proc/lockdep_stats

This command produces the following output on a modest system:

 lock-classes:  748 [max: 8191]

If the number allocated (748 above) increases continually over time,
then there is likely a leak.  The following command can be used to
identify the leaking lock classes:

grep BD /proc/lockdep

Run the command and save the output, then compare against the output from
a later run of this command to identify the leakers.  This same output
can also help you find situations where runtime lock initialization has
been omitted.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] pinctrl: reserve pins when states are activated

2012-10-19 Thread Linus Walleij
On Fri, Oct 19, 2012 at 1:33 AM, Stephen Warren swar...@wwwdotorg.org wrote:
 On 10/18/2012 04:01 AM, Linus Walleij wrote:
 This switches the way that pins are reserved for multiplexing:

 The conceptual change makes perfect sense to me.

Thanks...

 diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c

  void pinmux_free_setting(struct pinctrl_setting const *setting)
 ...
 + /*
 +  * If a setting is active, disable it so that all pins are released
 +  */
 + pinmux_disable_setting(setting);

 *If* a setting is active ... implies there should be a C if statement
 here to check if the setting is actually active before trying to disable it?

Spot on.

Actually in core.c we're already checking this:

case PIN_MAP_TYPE_MUX_GROUP:
if (state == p-state)
pinmux_disable_setting(setting);
pinmux_free_setting(setting);
break;

So this is just buggy driving the refcount down to negative.
(I think Jean-Nicolas noticed this phenomenon...)

I should just delete pinmux_free_setting().

Yours,
Linus Walleij

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RESEND PATCH 2/4] asm-generic: io: don't perform swab during {in,out} string functions

2012-10-19 Thread Will Deacon
On Thu, Oct 18, 2012 at 06:48:16AM +0100, Geert Uytterhoeven wrote:
 On Thu, Oct 18, 2012 at 2:04 AM, Benjamin Herrenschmidt
 b...@kernel.crashing.org wrote:
  The sort story is that endianness is not a property of the IO port but
  of the information that transit through it. If you're just going to copy
  it into memory, you want to preserve it's format and so do not byteswap.
 
  The byteswap we do on standard accessors is a helper because we assume
  that underneath those IO ports are registers that are Little Endian. But
  when using one as a window to a byte stream, we must not arbitrarily
  swap the byte stream. We copy it as-is to memory, and then one can work
  at interpreting the various fields that might or might not be present in
  that stream with the appropriate accessors for memory accesses.
 
 So assume you have the bytestream Hello, world!\n in memory on the
 PCI device.I.e.
 
   48 65 6c 6c 6f 2c 20 77  6f 72 6c 64 21 0a|Hello, world!.|
 
 You want to copy it to system RAM using readsl(), which does:
 
u32 *buf = buffer;
do {
u32 x = __raw_readl(addr + PCI_IOBASE);
*buf++ = x;
 } while (--count);
 
 On little endian, the first __raw_readl() should return 0x6c6c6548, so
 it is stored correctly by *buf = x .
 On big endian, the first __raw_readl() should return 0x48656c6c instead,
 else it's stored incorrectly by *buf = x .

So far so good...

 But the PCI bus is little endian, so I expect __raw_readl() would return
 0x6c6c6548, and thus needs swapping?

I think this would only happen if your busses are wired swapped, in which
case you'll have to handle this in your arch code because reading from a
device and then writing to memory will end up with the data in the wrong
order (the data stream won't be affected by passing through the CPU).

Will
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] Input: matrix-keypad - Add device tree support

2012-10-19 Thread Rob Herring
On 10/19/2012 02:06 AM, AnilKumar Ch wrote:
 Add device tree support to matrix keypad driver and usage details
 are added to device tree documentation. Driver was tested on AM335x
 EVM.
 
 Signed-off-by: AnilKumar Ch anilku...@ti.com
 ---
  .../devicetree/bindings/input/matrix-keypad.txt|   52 ++
  drivers/input/keyboard/matrix_keypad.c |  104 
 +++-
  2 files changed, 155 insertions(+), 1 deletion(-)
  create mode 100644 Documentation/devicetree/bindings/input/matrix-keypad.txt
 
 diff --git a/Documentation/devicetree/bindings/input/matrix-keypad.txt 
 b/Documentation/devicetree/bindings/input/matrix-keypad.txt
 new file mode 100644
 index 000..50aaa6e
 --- /dev/null
 +++ b/Documentation/devicetree/bindings/input/matrix-keypad.txt
 @@ -0,0 +1,52 @@
 +* GPIO driven matrix keypad device tree bindings
 +
 +GPIO driven matrix keypad is used to interface a SoC with a matrix keypad.
 +The matrix keypad supports multiple row and column lines, a key can be
 +placed at each intersection of a unique row and a unique column. The matrix
 +keypad can sense a key-press and key-release by means of GPIO lines and
 +report the event using GPIO interrupts to the cpu.
 +
 +Required Properties:
 +- compatible:Should be matix-keypad

How about gpio-matrix-keypad?

 +- keypad,num-row-gpios:  Number of row lines connected to the keypad
 + controller.
 +- keypad,num-col-gpios:  Number of column lines connected to the keypad
 + controller.

Isn't the number of gpios just the count of gpios listed below? So you
don't need these props.

 +- row-gpios: List of gpios used as row lines. The gpio specifier
 + for this property depends on the gpio controller to
 + which these row lines are connected.
 +- col-gpios: List of gpios used as column lines. The gpio specifier
 + for this property depends on the gpio controller to
 + which these column lines are connected.
 +
 +Optional Properties:
 +- linux,no-autorepeat:   do no enable autorepeat feature.
 +- linux,wakeup:  use any event on keypad as wakeup event.
 +- debounce-delay-ms: debounce interval in milliseconds
 +- col-scan-delay-us: delay, measured in microseconds, that is needed
 + before we can scan keypad after activating column gpio
 +- clustered-irq: have clustered irq number
 +- clustered-irq-flags:   have clustered irq flags

It's not clear what clustered means here. If I have to go read Linux
code to understand, you are doing it wrong. Describe the h/w, not Linux
data structs.

 +
 +Example:
 + matrix-keypad {
 + compatible = matrix-keypad;
 + keypad,num-row-gpios = 3;
 + keypad,num-col-gpios = 2;
 + debounce-delay-ms = 5;
 + col-scan-delay-us = 2;
 +
 + row-gpios = gpio2 25 0
 +  gpio2 26 0
 +  gpio2 27 0;
 +
 + col-gpios = gpio2 21 0
 +  gpio2 22 0;
 +
 + linux,keymap = 0x008B
 + 0x019E
 + 0x0269
 + 0x0001006A
 + 0x0101001C
 + 0x0201006C;
 + };
 diff --git a/drivers/input/keyboard/matrix_keypad.c 
 b/drivers/input/keyboard/matrix_keypad.c
 index 18b7237..39e480d 100644
 --- a/drivers/input/keyboard/matrix_keypad.c
 +++ b/drivers/input/keyboard/matrix_keypad.c
 @@ -23,6 +23,9 @@
  #include linux/gpio.h
  #include linux/input/matrix_keypad.h
  #include linux/slab.h
 +#include linux/of.h
 +#include linux/of_gpio.h
 +#include linux/of_platform.h
  
  struct matrix_keypad {
   const struct matrix_keypad_platform_data *pdata;
 @@ -394,6 +397,91 @@ static void matrix_keypad_free_gpio(struct matrix_keypad 
 *keypad)
   gpio_free(pdata-col_gpios[i]);
  }
  
 +#ifdef CONFIG_OF
 +static
 +struct matrix_keypad_platform_data *matrix_keypad_parse_dt(struct device 
 *dev)
 +{
 + struct matrix_keypad_platform_data *pdata;
 + struct matrix_keymap_data *keymap_data;
 + struct device_node *np = dev-of_node;
 + struct property *prop;
 + int key_count = 0, length, row, col;
 + uint32_t *keymap;
 +
 + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 + if (!pdata) {
 + dev_err(dev, could not allocate memory for platform data\n);
 + return NULL;
 + }
 +
 + of_property_read_u32(np, keypad,num-row-gpios, pdata-num_row_gpios);
 + of_property_read_u32(np, keypad,num-col-gpios, pdata-num_col_gpios);
 + if (!pdata-num_row_gpios || !pdata-num_col_gpios) {
 + dev_err(dev, number of keypad rows/columns not specified\n);
 + return NULL;
 + }
 +
 + keymap_data = devm_kzalloc(dev, sizeof(*keymap_data), GFP_KERNEL);
 + 

nfsd bugfixes for 3.7

2012-10-19 Thread J. Bruce Fields
Please pull nfsd bugfixes for 3.7 from the for-3.7 branch at:

  git://linux-nfs.org/~bfields/linux.git for-3.7

--b.


Sasha Levin (1):
  SUNRPC: Prevent kernel stack corruption on long values of flush

Trond Myklebust (1):
  NLM: nlm_lookup_file() may return NLMv4-specific error codes

 fs/lockd/clntxdr.c |2 +-
 fs/lockd/svcproc.c |3 ++-
 net/sunrpc/cache.c |4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] epoll: Support for disabling items, and a self-test app.

2012-10-19 Thread Paolo Bonzini
Il 18/10/2012 20:05, Andy Lutomirski ha scritto:
 
 Unless something is rather buggy in kernel land (and I don't think it
 is), once EPOLL_CTL_DEL has returned, no call to epoll_wait that starts
 *after* EPOLL_CTL_DEL finishes will return that object.  This suggests
 an RCU-like approach: once EPOLL_CTL_DEL has returned and every thread
 has returned from an epoll_wait call that started after the
 EPOLL_CTL_DEL returns, then the data structure can be safely freed.
 
 In pseudocode:
 
 delete(fd, pdata) {
   pdata-dead = true;
   EPOLL_CTL_DEL(fd);
   rcu_call(delete pdata);
 }
 
 wait() {
   epoll_wait;
   for each event pdata {
 if (pdata-gone) continue;
 process the event;
   }
 
   rcu_this_is_a_grace_period();
 }
 
 Of course, these are not normal grace periods and would need to be
 tracked separately.  (The optimal data structure to do this without
 killing scalability is not obvious.  urcu presumably implements such a
 thing.)
 
 Am I right?

Equip each thread with a) an id or something else that lets each thread
refer to the next thread; b) a lists of items waiting to be deleted.
 Then the deleting thread adds the item being deleted to the first
thread's list.  Before executing epoll_wait, thread K empties its list
and passes the buck, appending the old contents of its list to that of
thread K+1.  This is an O(1) operation no matter how many items are
being deleted; only Thread N, being the last thread, actually has to go
through the list and delete the items.

The lists need to be protected by a mutex, but contention should really
be rare since there are just two writers.  Note that each thread only
needs to hold one mutex at a time, and the deletion loop does not need
to happen with the mutex held at all, so there's no worries of
cascading waits on the mutexes.

Compared to http://thread.gmane.org/gmane.linux.kernel/1311457, you get
rid of the per-item mutex and the operations that have to be done with
the (now per-thread) mutex held remain pretty trivial.

Paolo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/3] virtio: 9p: correctly pass physical address to userspace for high pages

2012-10-19 Thread Will Deacon
When using a virtio transport, the 9p net device may pass the physical
address of a kernel buffer to userspace via a scatterlist inside a
virtqueue. If the kernel buffer is mapped outside of the linear mapping
(e.g. highmem), then virt_to_page will return a bogus value and we will
populate the scatterlist with junk.

This patch uses kmap_to_page when populating the page array for a kernel
buffer.

Cc: Rusty Russell ru...@rustcorp.com.au
Cc: Sasha Levin levinsasha...@gmail.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 net/9p/trans_virtio.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 35b8911..fd05c81 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -39,6 +39,7 @@
 #include linux/inet.h
 #include linux/idr.h
 #include linux/file.h
+#include linux/highmem.h
 #include linux/slab.h
 #include net/9p/9p.h
 #include linux/parser.h
@@ -325,7 +326,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
-   pages[index++] = virt_to_page(data);
+   pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
-- 
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/3] mm: highmem: export kmap_to_page for modules

2012-10-19 Thread Will Deacon
Some virtio device drivers (9p) need to translate high virtual addresses
to physical addresses, which are inserted into the virtqueue for
processing by userspace.

This patch exports the kmap_to_page symbol, so that the affected drivers
can be compiled as modules.

Signed-off-by: Will Deacon will.dea...@arm.com
---
 mm/highmem.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/mm/highmem.c b/mm/highmem.c
index d517cd1..2a07f97 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -105,6 +105,7 @@ struct page *kmap_to_page(void *vaddr)
 
return virt_to_page(addr);
 }
+EXPORT_SYMBOL(kmap_to_page);
 
 static void flush_all_zero_pkmaps(void)
 {
-- 
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/3] virtio: force vring descriptors to be allocated from lowmem

2012-10-19 Thread Will Deacon
Virtio devices may attempt to add descriptors to a virtqueue from atomic
context using GFP_ATOMIC allocation. This is problematic because such
allocations can fall outside of the lowmem mapping, causing virt_to_phys
to report bogus physical addresses which are subsequently passed to
userspace via the buffers for the virtual device.

This patch masks out __GFP_HIGH and __GFP_HIGHMEM from the requested
flags when allocating descriptors for a virtqueue. If an atomic
allocation is requested and later fails, we will return -ENOSPC which
will be handled by the driver.

Cc: Rusty Russell ru...@rustcorp.com.au
Cc: Sasha Levin levinsasha...@gmail.com
Signed-off-by: Will Deacon will.dea...@arm.com
---
 drivers/virtio/virtio_ring.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e639584..286c30c 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -135,6 +135,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
unsigned head;
int i;
 
+   /*
+* We require lowmem mappings for the descriptors because
+* otherwise virt_to_phys will give us bogus addresses in the
+* virtqueue.
+*/
+   gfp = ~(__GFP_HIGHMEM | __GFP_HIGH);
+
desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
if (!desc)
return -ENOMEM;
-- 
1.7.4.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] pinctrl: reserve pins when states are activated

2012-10-19 Thread Linus Walleij
From: Linus Walleij linus.wall...@linaro.org

This switches the way that pins are reserved for multiplexing:

We used to do this when the map was parsed, at the creation of
the settings inside the pinctrl handle, in pinmux_map_to_setting().

However this does not work for us, because we want to use the
same set of pins with different devices at different times: the
current code assumes that the pin groups in a pinmux state will
only be used with one single device, albeit different groups can
be active at different times. For example if a single I2C driver
block is used to drive two different busses located on two
pin groups A and B, then the pins for all possible states of a
function are reserved when fetching the pinctrl handle: the
I2C bus can choose either set A or set B by a mux state at
runtime, but all pins in both group A and B (the superset) are
effectively reserved for that I2C function and mapped to the
device. Another device can never get in and use the pins in
group A, even if the device/function is using group B at the
moment.

Instead: let use reserve the pins when the state is activated
and drop them when the state is disabled, i.e. when we move to
another state. This way different devices/functions can use the
same pins at different times.

We know that this is an odd way of doing things, but we really
need to switch e.g. an SD-card slot to become a tracing output
sink at runtime: we plug in a special tracing card then mux
the pins that used to be an SD slot around to the tracing
unit and push out tracing data there instead of SD-card
traffic.

As a side effect pinmux_free_setting() is unused and gets
deleted.

Cc: Patrice Chotard patrice.chot...@st.com
Cc: Jean Nicolas Graux jean-nicolas.gr...@stericsson.com
Cc: Loic Pallardy loic.palla...@st.com
Signed-off-by: Linus Walleij linus.wall...@linaro.org
---
ChangeLog v1-v2:
- The code was already accounting for the case where the setting
  was not active and called pinmux_disable_setting()
  from the core, so skip this and delete the now empty
  pinmux_free_setting() altogether.
---
 Documentation/pinctrl.txt |  4 ++-
 drivers/pinctrl/core.c|  3 +-
 drivers/pinctrl/core.h|  2 ++
 drivers/pinctrl/pinmux.c  | 70 ++-
 drivers/pinctrl/pinmux.h  |  5 
 5 files changed, 28 insertions(+), 56 deletions(-)

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 3b4ee53..a1cd2f9 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -1193,4 +1193,6 @@ foo_switch()
...
 }
 
-The above has to be done from process context.
+The above has to be done from process context. The reservation of the pins
+will be done when the state is activated, so in effect one specific pin
+can be used by different functions at different times on a running system.
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 0f1ec9e..bbd930e 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -563,6 +563,8 @@ static int add_setting(struct pinctrl *p, struct 
pinctrl_map const *map)
return -EPROBE_DEFER;
}
 
+   setting-dev_name = map-dev_name;
+
switch (map-type) {
case PIN_MAP_TYPE_MUX_GROUP:
ret = pinmux_map_to_setting(map, setting);
@@ -689,7 +691,6 @@ static void pinctrl_put_locked(struct pinctrl *p, bool 
inlist)
case PIN_MAP_TYPE_MUX_GROUP:
if (state == p-state)
pinmux_disable_setting(setting);
-   pinmux_free_setting(setting);
break;
case PIN_MAP_TYPE_CONFIGS_PIN:
case PIN_MAP_TYPE_CONFIGS_GROUP:
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 1f40ff6..12f5694 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -105,12 +105,14 @@ struct pinctrl_setting_configs {
  * @type: the type of setting
  * @pctldev: pin control device handling to be programmed. Not used for
  *   PIN_MAP_TYPE_DUMMY_STATE.
+ * @dev_name: the name of the device using this state
  * @data: Data specific to the setting type
  */
 struct pinctrl_setting {
struct list_head node;
enum pinctrl_map_type type;
struct pinctrl_dev *pctldev;
+   const char *dev_name;
union {
struct pinctrl_setting_mux mux;
struct pinctrl_setting_configs configs;
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 9301a7a..0ecdf54 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -314,14 +314,11 @@ int pinmux_map_to_setting(struct pinctrl_map const *map,
 {
struct pinctrl_dev *pctldev = setting-pctldev;
const struct pinmux_ops *pmxops = pctldev-desc-pmxops;
-   const struct pinctrl_ops *pctlops = pctldev-desc-pctlops;
char const * const *groups;
unsigned num_groups;
 

[RFC PATCH] init: Use the stringify operator for the __define_initcall macro

2012-10-19 Thread Matthew Leach
Currently the __define_initcall macro takes three arguments, fn, id
and level. The level argument is exactly the same as the id argument
but wrapped in quotes. To overcome this need to specify three
arguments to the __define_initcall macro, where one argument is the
stringification of another, we can just use the stringification macro
instead.

Signed-off-by: Matthew Leach matt...@mattleach.net
---
 include/linux/init.h | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index e59041e..eaa3851 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -186,16 +186,16 @@ extern bool initcall_debug;
  * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn,id) \
+#define __define_initcall(fn,id) \
static initcall_t __initcall_##fn##id __used \
-   __attribute__((__section__(.initcall level .init))) = fn
+   __attribute__((__section__(.initcall #id .init))) = fn
 
 /*
  * Early initcalls run before initializing SMP.
  *
  * Only for built-in code, not modules.
  */
-#define early_initcall(fn) __define_initcall(early,fn,early)
+#define early_initcall(fn) __define_initcall(fn,early)
 
 /*
  * A pure initcall has no dependencies on anything else, and purely
@@ -204,23 +204,23 @@ extern bool initcall_debug;
  * This only exists for built-in code, not for modules.
  * Keep main.c:initcall_level_names[] in sync.
  */
-#define pure_initcall(fn)  __define_initcall(0,fn,0)
-
-#define core_initcall(fn)  __define_initcall(1,fn,1)
-#define core_initcall_sync(fn) __define_initcall(1s,fn,1s)
-#define postcore_initcall(fn)  __define_initcall(2,fn,2)
-#define postcore_initcall_sync(fn) __define_initcall(2s,fn,2s)
-#define arch_initcall(fn)  __define_initcall(3,fn,3)
-#define arch_initcall_sync(fn) __define_initcall(3s,fn,3s)
-#define subsys_initcall(fn)__define_initcall(4,fn,4)
-#define subsys_initcall_sync(fn)   __define_initcall(4s,fn,4s)
-#define fs_initcall(fn)__define_initcall(5,fn,5)
-#define fs_initcall_sync(fn)   __define_initcall(5s,fn,5s)
-#define rootfs_initcall(fn)__define_initcall(rootfs,fn,rootfs)
-#define device_initcall(fn)__define_initcall(6,fn,6)
-#define device_initcall_sync(fn)   __define_initcall(6s,fn,6s)
-#define late_initcall(fn)  __define_initcall(7,fn,7)
-#define late_initcall_sync(fn) __define_initcall(7s,fn,7s)
+#define pure_initcall(fn)  __define_initcall(fn,0)
+
+#define core_initcall(fn)  __define_initcall(fn,1)
+#define core_initcall_sync(fn) __define_initcall(fn,1s)
+#define postcore_initcall(fn)  __define_initcall(fn,2)
+#define postcore_initcall_sync(fn) __define_initcall(fn,2s)
+#define arch_initcall(fn)  __define_initcall(fn,3)
+#define arch_initcall_sync(fn) __define_initcall(fn,3s)
+#define subsys_initcall(fn)__define_initcall(fn,4)
+#define subsys_initcall_sync(fn)   __define_initcall(fn,4s)
+#define fs_initcall(fn)__define_initcall(fn,5)
+#define fs_initcall_sync(fn)   __define_initcall(fn,5s)
+#define rootfs_initcall(fn)__define_initcall(fn,rootfs)
+#define device_initcall(fn)__define_initcall(fn,6)
+#define device_initcall_sync(fn)   __define_initcall(fn,6s)
+#define late_initcall(fn)  __define_initcall(fn,7)
+#define late_initcall_sync(fn) __define_initcall(fn,7s)
 
 #define __initcall(fn) device_initcall(fn)
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3/5] xen-pcifront: handle backend CLOSED without CLOSING

2012-10-19 Thread Konrad Rzeszutek Wilk
On Thu, Oct 18, 2012 at 11:03:36AM +0100, David Vrabel wrote:
 From: David Vrabel david.vra...@citrix.com
 
 Backend drivers shouldn't transistion to CLOSED unless the frontend is
 CLOSED.  If a backend does transition to CLOSED too soon then the
 frontend may not see the CLOSING state and will not properly shutdown.
 
 So, treat an unexpected backend CLOSED state the same as CLOSING.
 
 Signed-off-by: David Vrabel david.vra...@citrix.com
 Acked-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 ---
 Cc: linux-...@vger.kernel.org
 Cc: Bjorn Helgaas bhelg...@google.com

Bjorn, do you want me to prep a git pull with this patch
or can I have your Ack to put it my tree and have it part of my
git pull to Linus?

Thx.
 ---
  drivers/pci/xen-pcifront.c |5 -
  1 files changed, 4 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
 index 0aab85a..a0c7312 100644
 --- a/drivers/pci/xen-pcifront.c
 +++ b/drivers/pci/xen-pcifront.c
 @@ -1068,13 +1068,16 @@ static void __init_refok 
 pcifront_backend_changed(struct xenbus_device *xdev,
   case XenbusStateInitialising:
   case XenbusStateInitWait:
   case XenbusStateInitialised:
 - case XenbusStateClosed:
   break;
  
   case XenbusStateConnected:
   pcifront_try_connect(pdev);
   break;
  
 + case XenbusStateClosed:
 + if (xdev-state == XenbusStateClosed)
 + break;
 + /* Missed the backend's CLOSING state -- fallthrough */
   case XenbusStateClosing:
   dev_warn(xdev-dev, backend going away!\n);
   pcifront_try_disconnect(pdev);
 -- 
 1.7.2.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/5] xen-fbfront: handle backend CLOSED without CLOSING

2012-10-19 Thread Konrad Rzeszutek Wilk
On Thu, Oct 18, 2012 at 11:03:37AM +0100, David Vrabel wrote:
 From: David Vrabel david.vra...@citrix.com
 
 Backend drivers shouldn't transistion to CLOSED unless the frontend is
 CLOSED.  If a backend does transition to CLOSED too soon then the
 frontend may not see the CLOSING state and will not properly shutdown.
 
 So, treat an unexpected backend CLOSED state the same as CLOSING.
 
 Signed-off-by: David Vrabel david.vra...@citrix.com
 Acked-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 ---
 Cc: linux-fb...@vger.kernel.org
 Cc: Florian Tobias Schandinat florianschandi...@gmx.de

Hey Florian,

Should I prep a git pull for you with this or would it be OK
if I just have your Ack to put this in my git pull for Linus?

Thanks!
 ---
  drivers/video/xen-fbfront.c |5 -
  1 files changed, 4 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
 index b7f5173..917bb56 100644
 --- a/drivers/video/xen-fbfront.c
 +++ b/drivers/video/xen-fbfront.c
 @@ -641,7 +641,6 @@ static void xenfb_backend_changed(struct xenbus_device 
 *dev,
   case XenbusStateReconfiguring:
   case XenbusStateReconfigured:
   case XenbusStateUnknown:
 - case XenbusStateClosed:
   break;
  
   case XenbusStateInitWait:
 @@ -670,6 +669,10 @@ InitWait:
   info-feature_resize = val;
   break;
  
 + case XenbusStateClosed:
 + if (dev-state == XenbusStateClosed)
 + break;
 + /* Missed the backend's CLOSING state -- fallthrough */
   case XenbusStateClosing:
   xenbus_frontend_closed(dev);
   break;
 -- 
 1.7.2.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/5] xen-kbdfront: handle backend CLOSED without CLOSING

2012-10-19 Thread Konrad Rzeszutek Wilk
On Thu, Oct 18, 2012 at 11:03:38AM +0100, David Vrabel wrote:
 From: David Vrabel david.vra...@citrix.com
 
 Backend drivers shouldn't transistion to CLOSED unless the frontend is
 CLOSED.  If a backend does transition to CLOSED too soon then the
 frontend may not see the CLOSING state and will not properly shutdown.
 
 So, treat an unexpected backend CLOSED state the same as CLOSING.
 
 Signed-off-by: David Vrabel david.vra...@citrix.com
 Acked-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 ---
 Cc: linux-in...@vger.kernel.org
 Cc: Dmitry Torokhov dmitry.torok...@gmail.com

Hey Dmitry,

Should I prep a git pull for you for this or are you OK giving
an Ack for me to put this patch in my git pull for Linus?

Thx.
 ---
  drivers/input/misc/xen-kbdfront.c |5 -
  1 files changed, 4 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/input/misc/xen-kbdfront.c 
 b/drivers/input/misc/xen-kbdfront.c
 index 02ca868..6f7d990 100644
 --- a/drivers/input/misc/xen-kbdfront.c
 +++ b/drivers/input/misc/xen-kbdfront.c
 @@ -311,7 +311,6 @@ static void xenkbd_backend_changed(struct xenbus_device 
 *dev,
   case XenbusStateReconfiguring:
   case XenbusStateReconfigured:
   case XenbusStateUnknown:
 - case XenbusStateClosed:
   break;
  
   case XenbusStateInitWait:
 @@ -350,6 +349,10 @@ InitWait:
  
   break;
  
 + case XenbusStateClosed:
 + if (dev-state == XenbusStateClosed)
 + break;
 + /* Missed the backend's CLOSING state -- fallthrough */
   case XenbusStateClosing:
   xenbus_frontend_closed(dev);
   break;
 -- 
 1.7.2.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] unicore32: switch to generic kernel_thread()/kernel_execve()

2012-10-19 Thread Al Viro
On Fri, Oct 19, 2012 at 04:43:09PM +0800, Guan Xuetao wrote:
 From: Al Viro v...@zeniv.linux.org.uk
 
 Signed-off-by: Al Viro v...@zeniv.linux.org.uk
 Acked-and-Tested-by: Guan Xuetao g...@mprc.pku.edu.cn

You mean, it worked modulo obvious missing ')'?  Wow... OK, merged
into signal.git#for-next, obviously in no-rebase mode (as the matter
of fact, I'm dropping the local branch completely).
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3 1/6]: PVH: basic and header changes, elfnote changes, ...

2012-10-19 Thread Konrad Rzeszutek Wilk
On Wed, Oct 17, 2012 at 05:26:42PM -0700, Mukesh Rathor wrote:
 [PATCH 1/6] PVH: is a PV linux guest that has extended capabilities. This 
 patch allows it to be configured and enabled. Also, basic header file changes 
 to add new subcalls to physmap hypercall. Lastly, mfn_to_local_pfn must 
 return mfn for paging mode translate.

Usually one splits that description. So you have:

(for title)
xen/pvh: Introduce ParaVirtualized Hardware support.

And then in the body do (and split it in 80 lines for
easier readability):

ParaVirtualized Hardware (PVH) support allows a PV linux guest that has 
extended capabilities.

[Q:Like what kind of extended capabilities? Can you explain
what they are?]

This patch allows it to be configured and enabled. Also, basic header file 
changes to
add new subcalls to physmap hypercall. 

Lastly, mfn_to_local_pfn must return mfn for paging mode translate.

[Q: You should explain why. There is nothing in this description
saying why we do not need the PV MMU anymore]

[note: the verb tense is wrong since I meshed your description
with mine, so it would have to be fixed]
 
 Signed-off-by: Mukesh Rathor mukesh.rat...@oracle.com
 ---
  arch/x86/include/asm/xen/page.h |3 +++
  arch/x86/xen/Kconfig|   10 ++
  arch/x86/xen/xen-head.S |   11 ++-
  include/xen/interface/memory.h  |   24 +++-
  include/xen/interface/physdev.h |   10 ++
  5 files changed, 56 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
 index 472b9b7..6af440d 100644
 --- a/arch/x86/include/asm/xen/page.h
 +++ b/arch/x86/include/asm/xen/page.h
 @@ -159,6 +159,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
  static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
  {
   unsigned long pfn = mfn_to_pfn(mfn);
 +
 + if (xen_feature(XENFEAT_auto_translated_physmap))
 + return mfn;
   if (get_phys_to_machine(pfn) != mfn)
   return -1; /* force !pfn_valid() */
   return pfn;
 diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
 index fdce49c..822c5a0 100644
 --- a/arch/x86/xen/Kconfig
 +++ b/arch/x86/xen/Kconfig
 @@ -50,3 +50,13 @@ config XEN_DEBUG_FS
 Enable statistics output and various tuning options in debugfs.
 Enabling this option may incur a significant performance overhead.
  
 +config XEN_X86_PVH
 + bool Support for running as a PVH guest (EXPERIMENTAL)
 + depends on X86_64  XEN  EXPERIMENTAL
 + default n
 + help
 +This option enables support for running as a PVH guest (PV guest
 +using hardware extensions) under a suitably capable hypervisor.
 +This option is EXPERIMENTAL because the hypervisor interfaces
 +which it uses are not yet considered stable therefore backwards and
 +forwards compatibility is not yet guaranteed.  If unsure, say N.
 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
 index 7faed58..1a6bca1 100644
 --- a/arch/x86/xen/xen-head.S
 +++ b/arch/x86/xen/xen-head.S
 @@ -13,6 +13,15 @@
  #include xen/interface/elfnote.h
  #include asm/xen/interface.h
  
 +#ifdef CONFIG_XEN_X86_PVH
 +#define FEATURES_PVH |writable_descriptor_tables \
 +  |auto_translated_physmap \
 +  |supervisor_mode_kernel \
 +  |hvm_callback_vector
 +#else
 +#define FEATURES_PVH /* Not supported */
 +#endif
 +
   __INIT
  ENTRY(startup_xen)
   cld
 @@ -95,7 +104,7 @@ NEXT_HYPERCALL(arch_6)
  #endif
   ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,  _ASM_PTR startup_xen)
   ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
 - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,   .asciz 
 !writable_page_tables|pae_pgdir_above_4gb)
 + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,   .asciz 
 !writable_page_tables|pae_pgdir_above_4gbFEATURES_PVH)
   ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,   .asciz yes)
   ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz generic)
   ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
 diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
 index d8e33a9..425911f 100644
 --- a/include/xen/interface/memory.h
 +++ b/include/xen/interface/memory.h
 @@ -169,7 +169,13 @@ struct xen_add_to_physmap {
  /* Source mapping space. */
  #define XENMAPSPACE_shared_info 0 /* shared info page */
  #define XENMAPSPACE_grant_table 1 /* grant table page */
 -unsigned int space;
 +#define XENMAPSPACE_gmfn2 /* GMFN */
 +#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
 +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
 +uint16_t space;
 +domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
 +
 +#define XENMAPIDX_grant_table_status 0x8000
  
  /* Index into source mapping space. */
  unsigned long idx;
 @@ -237,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
   * during a driver critical region.
  

Re: [PATCH 3/6] memcg: Simplify mem_cgroup_force_empty_list error handling

2012-10-19 Thread Michal Hocko
On Thu 18-10-12 15:16:54, Tejun Heo wrote:
 Hello, Michal.
 
 On Wed, Oct 17, 2012 at 03:30:45PM +0200, Michal Hocko wrote:
  mem_cgroup_force_empty_list currently tries to remove all pages from
  the given LRU. To prevent from temoporary failures (EBUSY returned by
  mem_cgroup_move_parent) it uses a margin to the current LRU pages and
  returns the true if there are still some pages left on the list.
  
  If we consider that mem_cgroup_move_parent fails only when we are racing
  with somebody else removing the page (resp. uncharging it) or when the
  page is migrated then it is obvious that all those failures are only
  temporal and so we can safely retry later.
  Let's get rid of the safety margin and make the loop really wait for the
  empty LRU. The caller should still make sure that all charges have been
  removed from the res_counter because mem_cgroup_replace_page_cache might
  add a page to the LRU after the check (it doesn't touch res_counter
  though).
  This catches most of the cases except for shmem which might call
  mem_cgroup_replace_page_cache with a page which is not charged and on
  the LRU yet but this was the case also without this patch. In order to
  fix this we need a guarantee that try_get_mem_cgroup_from_page falls
  back to the current mm's cgroup so it needs css_tryget to fail. This
  will be fixed up in a later patch because it nees a help from cgroup
  core.
  
  Signed-off-by: Michal Hocko mho...@suse.cz
 
 In the sense that I looked at it and nothing seemed too scary.
 
  Reviewed-by: Tejun Heo t...@kernel.org

Thanks

 
 Some nitpicks below.
 
   /*
  - * move charges to its parent.
  + * move charges to its parent or the root cgroup if the group
  + * has no parent (aka use_hierarchy==0).
  + * Although this might fail the failure is always temporary and it
  + * signals a race with a page removal/uncharge or migration. In the
  + * first case the page will vanish from the LRU on the next attempt
  + * and the call should be retried later.
*/
  -
 
 Maybe convert to proper /** function comment while at it?  

these are internal functions and we usually do not create kerneldoc for
them. But I can surely change it - it would deserve a bigger clean up
then.

 I also think it would be helpful to actually comment on each possible
 failure case explaining why the failure condition is temporal.

What about:

 * Although this might fail (get_page_unless_zero, isolate_lru_page or
 * mem_cgroup_move_account fails) the failure is always temporary and
 * it signals a race with a page removal/uncharge or migration. In the
 * first case the page is on the way out and it will vanish from the LRU
 * on the next attempt and the call should be retried later.
 * Isolation from the LRU fails only if page has been isolated from
 * the LRU since we looked at it and that usually means either global
 * reclaim or migration going on. The page will either get back to the
 * LRU or vanish.
 * Finaly mem_cgroup_move_account fails only if the page got uncharged
 * (!PageCgroupUsed) or moved to a different group. The page will
 * disappear in the next attempt.


Better? Or should it rather be in the changelog?

 
   /*
* Traverse a specified page_cgroup list and try to drop them all.  This 
  doesn't
  - * reclaim the pages page themselves - it just removes the page_cgroups.
  - * Returns true if some page_cgroups were not freed, indicating that the 
  caller
  - * must retry this operation.
  + * reclaim the pages page themselves - pages are moved to the parent (or 
  root)
  + * group.
*/
 
 Ditto.
 
  -static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
  +static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
  int node, int zid, enum lru_list lru)
   {
  struct mem_cgroup_per_zone *mz;
  -   unsigned long flags, loop;
  +   unsigned long flags;
  struct list_head *list;
  struct page *busy;
  struct zone *zone;
  @@ -3696,11 +3701,8 @@ static bool mem_cgroup_force_empty_list(struct 
  mem_cgroup *memcg,
  mz = mem_cgroup_zoneinfo(memcg, node, zid);
  list = mz-lruvec.lists[lru];
   
  -   loop = mz-lru_size[lru];
  -   /* give some margin against EBUSY etc...*/
  -   loop += 256;
  busy = NULL;
  -   while (loop--) {
  +   do {
  struct page_cgroup *pc;
  struct page *page;
   
  @@ -3726,8 +3728,7 @@ static bool mem_cgroup_force_empty_list(struct 
  mem_cgroup *memcg,
  cond_resched();
  } else
  busy = NULL;
  -   }
  -   return !list_empty(list);
  +   } while (!list_empty(list));
   }
 
 Is there anything which can keep failing until migration to another
 cgroup is complete?  

This is not about migration to another cgroup. Remember there are no
tasks in the group so we have no origin for the migration. I was talking
about migrate_pages.

 I think there is, e.g., if mmap_sem is busy or memcg is co-mounted
 with other 

Re: [PATCH 1/2] unicore32: switch to generic kernel_thread()/kernel_execve()

2012-10-19 Thread guanxuetao
 On Fri, Oct 19, 2012 at 04:43:09PM +0800, Guan Xuetao wrote:
 From: Al Viro v...@zeniv.linux.org.uk

 Signed-off-by: Al Viro v...@zeniv.linux.org.uk
 Acked-and-Tested-by: Guan Xuetao g...@mprc.pku.edu.cn

 You mean, it worked modulo obvious missing ')'?  Wow... OK, merged
 into signal.git#for-next, obviously in no-rebase mode (as the matter
 of fact, I'm dropping the local branch completely).

I've added')' in the patch. Then it works.
In addition, I pushed these two patches into my tree.

Thanks  regards,
Guan Xuetao
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3 1/6]: PVH: basic and header changes, elfnote changes, ...

2012-10-19 Thread Konrad Rzeszutek Wilk
  +config XEN_X86_PVH
  +   bool Support for running as a PVH guest (EXPERIMENTAL)
  +   depends on X86_64  XEN  EXPERIMENTAL
  +   default n
  +   help
  +  This option enables support for running as a PVH guest (PV guest
  +  using hardware extensions) under a suitably capable hypervisor.
  +  This option is EXPERIMENTAL because the hypervisor interfaces
  +  which it uses are not yet considered stable therefore backwards and
  +  forwards compatibility is not yet guaranteed.  If unsure, say N.
 
 Do we really need the kconfig symbol? Why can't we have it always

Yes for right now. That is to make sure that we can test for regressions
PV guests on a hypervisor without PVH extensions - or vice-versa:
PVH hypervisors with an normal PV guest.

Until most bugs and the other work is completed this is a bit of a safety
valve, in case we mess up.

 enabled?

You know what Linus's thinks about the 'y' be default. He usually rips
one's behind for that - especially for this which is still in its infancy
period. Later on when we get bugs and kinks worked out then we can
re-evaluate.

 
 
  diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
  index 7faed58..1a6bca1 100644
  --- a/arch/x86/xen/xen-head.S
  +++ b/arch/x86/xen/xen-head.S
  @@ -13,6 +13,15 @@
   #include xen/interface/elfnote.h
   #include asm/xen/interface.h
   
  +#ifdef CONFIG_XEN_X86_PVH
  +#define FEATURES_PVH |writable_descriptor_tables \
  +|auto_translated_physmap \
  +|supervisor_mode_kernel \
  +|hvm_callback_vector
  +#else
  +#define FEATURES_PVH /* Not supported */
  +#endif
  +
  __INIT
   ENTRY(startup_xen)
  cld
  @@ -95,7 +104,7 @@ NEXT_HYPERCALL(arch_6)
   #endif
  ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,  _ASM_PTR startup_xen)
  ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
  -   ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,   .asciz 
  !writable_page_tables|pae_pgdir_above_4gb)
  +   ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,   .asciz 
  !writable_page_tables|pae_pgdir_above_4gbFEATURES_PVH)
  ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,   .asciz yes)
  ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz generic)
  ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
 
 Considering that the PVH capability ends up in an ELF note, the kconfig
 symbol is actually useful at least for debugging: it is the only way to
 disable it from the guest side. However I would imaging that Xen would
 always provide an option to disable PVH features in a VM or dom0.

Right.
 
 
  diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
  index d8e33a9..425911f 100644
  --- a/include/xen/interface/memory.h
  +++ b/include/xen/interface/memory.h
  @@ -169,7 +169,13 @@ struct xen_add_to_physmap {
   /* Source mapping space. */
   #define XENMAPSPACE_shared_info 0 /* shared info page */
   #define XENMAPSPACE_grant_table 1 /* grant table page */
  -unsigned int space;
  +#define XENMAPSPACE_gmfn2 /* GMFN */
  +#define XENMAPSPACE_gmfn_range  3 /* GMFN range */
  +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
  +uint16_t space;
  +domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
  +
  +#define XENMAPIDX_grant_table_status 0x8000
   
   /* Index into source mapping space. */
   unsigned long idx;
  @@ -237,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
* during a driver critical region.
*/
   extern spinlock_t xen_reservation_lock;
  +
  +/*
  + * Unmaps the page appearing at a particular GPFN from the specified 
  guest's
  + * pseudophysical address space.
  + * arg == addr of xen_remove_from_physmap_t.
  + */
  +#define XENMEM_remove_from_physmap  15
  +struct xen_remove_from_physmap {
  +/* Which domain to change the mapping for. */
  +domid_t domid;
  +
  +/* GPFN of the current mapping of the page. */
  +xen_pfn_t gpfn;
  +};
  +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
  +
   #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
 these bits have been submitted separately by Ian, if I am not mistaken.

I can take of care of doing the merge/conflict resolution.

 
 
  diff --git a/include/xen/interface/physdev.h 
  b/include/xen/interface/physdev.h
  index 9ce788d..3b9d5b6 100644
  --- a/include/xen/interface/physdev.h
  +++ b/include/xen/interface/physdev.h
  @@ -258,6 +258,16 @@ struct physdev_pci_device {
   uint8_t devfn;
   };
   
  +#define PHYSDEVOP_pvh_map_iomem30
 
 I would just call this PHYSDEVOP_map_iomem, we might use it on non-PVH
 guests as well one day.

I completely lost track of the naming now :-( Isn't the ARM version
called range something?
 
 
  +struct physdev_map_iomem {
  +/* IN */
  +uint64_t first_gfn;
  +uint64_t first_mfn;
  +uint32_t nr_mfns;
  +uint32_t add_mapping;/* 1 == add mapping;  0 == unmap */
  +
  +};
  +
   /*
* Notify that some PIRQ-bound event channels 

[PATCH 1/2] ASoC: Ux500: Fixup complile errors due to merge

2012-10-19 Thread Ulf Hansson
From: Ulf Hansson ulf.hans...@linaro.org

Likely during merge of the below commits ended up breaking compilation:
ASoC: Ux500: Enable ux500 MSP driver for Device Tree
ASoC: ux500_msp_i2s: better use devm functions and fix error return code

Signed-off-by: Ulf Hansson ulf.hans...@linaro.org
---
 sound/soc/ux500/ux500_msp_i2s.c |   18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/sound/soc/ux500/ux500_msp_i2s.c b/sound/soc/ux500/ux500_msp_i2s.c
index b7c996e..e6ff328 100644
--- a/sound/soc/ux500/ux500_msp_i2s.c
+++ b/sound/soc/ux500/ux500_msp_i2s.c
@@ -692,19 +692,15 @@ int ux500_msp_i2s_init_msp(struct platform_device *pdev,
if (!msp)
return -ENOMEM;
 
-   if (np) {
-   if (!platform_data) {
-   platform_data = devm_kzalloc(pdev-dev,
-   sizeof(struct msp_i2s_platform_data), 
GFP_KERNEL);
-   if (!platform_data)
-   ret = -ENOMEM;
-   }
-   } else
+   if (np  !platform_data) {
+   platform_data = devm_kzalloc(pdev-dev,
+   sizeof(struct msp_i2s_platform_data), GFP_KERNEL);
if (!platform_data)
-   ret = -EINVAL;
+   return -ENOMEM;
+   }
 
-   if (ret)
-   goto err_res;
+   if (!platform_data)
+   return -EINVAL;
 
dev_dbg(pdev-dev, %s: Enter (name: %s, id: %d).\n, __func__,
pdev-name, platform_data-id);
-- 
1.7.10

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] ASoC: Ux500: Fixup compile error

2012-10-19 Thread Ulf Hansson
From: Ulf Hansson ulf.hans...@linaro.org

The below commit introduced a compile error for a missing include file.
ASoC: ux500_msp_i2s: better use devm functions and fix error return code

Signed-off-by: Ulf Hansson ulf.hans...@linaro.org
---
 sound/soc/ux500/ux500_msp_i2s.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/ux500/ux500_msp_i2s.c b/sound/soc/ux500/ux500_msp_i2s.c
index e6ff328..ba15351 100644
--- a/sound/soc/ux500/ux500_msp_i2s.c
+++ b/sound/soc/ux500/ux500_msp_i2s.c
@@ -18,6 +18,7 @@
 #include linux/pinctrl/consumer.h
 #include linux/delay.h
 #include linux/slab.h
+#include linux/io.h
 #include linux/of.h
 
 #include mach/hardware.h
-- 
1.7.10

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH RFC 1/2] kvm: Handle undercommitted guest case in PLE handler

2012-10-19 Thread Andrew Theurer
On Fri, 2012-10-19 at 14:00 +0530, Raghavendra K T wrote:
 On 10/15/2012 08:04 PM, Andrew Theurer wrote:
  On Mon, 2012-10-15 at 17:40 +0530, Raghavendra K T wrote:
  On 10/11/2012 01:06 AM, Andrew Theurer wrote:
  On Wed, 2012-10-10 at 23:24 +0530, Raghavendra K T wrote:
  On 10/10/2012 08:29 AM, Andrew Theurer wrote:
  On Wed, 2012-10-10 at 00:21 +0530, Raghavendra K T wrote:
  * Avi Kivity a...@redhat.com [2012-10-04 17:00:28]:
 
  On 10/04/2012 03:07 PM, Peter Zijlstra wrote:
  On Thu, 2012-10-04 at 14:41 +0200, Avi Kivity wrote:
 
  [...]
  A big concern I have (if this is 1x overcommit) for ebizzy is that it
  has just terrible scalability to begin with.  I do not think we should
  try to optimize such a bad workload.
 
 
  I think my way of running dbench has some flaw, so I went to ebizzy.
  Could you let me know how you generally run dbench?
 
  I mount a tmpfs and then specify that mount for dbench to run on.  This
  eliminates all IO.  I use a 300 second run time and number of threads is
  equal to number of vcpus.  All of the VMs of course need to have a
  synchronized start.
 
  I would also make sure you are using a recent kernel for dbench, where
  the dcache scalability is much improved.  Without any lock-holder
  preemption, the time in spin_lock should be very low:
 
 
21.54%  78016 dbench  [kernel.kallsyms]   [k] 
  copy_user_generic_unrolled
 3.51%  12723 dbench  libc-2.12.so[.] 
  __strchr_sse42
 2.81%  10176 dbench  dbench  [.] child_run
 2.54%   9203 dbench  [kernel.kallsyms]   [k] 
  _raw_spin_lock
 2.33%   8423 dbench  dbench  [.] 
  next_token
 2.02%   7335 dbench  [kernel.kallsyms]   [k] 
  __d_lookup_rcu
 1.89%   6850 dbench  libc-2.12.so[.] 
  __strstr_sse42
 1.53%   5537 dbench  libc-2.12.so[.] 
  __memset_sse2
 1.47%   5337 dbench  [kernel.kallsyms]   [k] 
  link_path_walk
 1.40%   5084 dbench  [kernel.kallsyms]   [k] 
  kmem_cache_alloc
 1.38%   5009 dbench  libc-2.12.so[.] memmove
 1.24%   4496 dbench  libc-2.12.so[.] vfprintf
 1.15%   4169 dbench  [kernel.kallsyms]   [k] 
  __audit_syscall_exit
 
 
  Hi Andrew,
  I ran the test with dbench with tmpfs. I do not see any improvements in
  dbench for 16k ple window.
 
  So it seems apart from ebizzy no workload benefited by that. and I
  agree that, it may not be good to optimize for ebizzy.
  I shall drop changing to 16k default window and continue with other
  original patch series. Need to experiment with latest kernel.
 
  Thanks for running this again.  I do believe there are some workloads,
  when run at 1x overcommit, would benefit from a larger ple_window [with
  he current ple handling code], but I do not also want to potentially
  degrade 1x with a larger window.  I do, however, think there may be a
  another option.  I have not fully worked this out, but I think I am on
  to something.
 
  I decided to revert back to just a yield() instead of a yield_to().  My
  motivation was that yield_to() [for large VMs] is like a dog chasing its
  tail, round and round we go   Just yield(), in particular a yield()
  which results in yielding to something -other- than the current VM's
  vcpus, helps synchronize the execution of sibling vcpus by deferring
  them until the lock holder vcpu is running again.  The more we can do to
  get all vcpus running at the same time, the far less we deal with the
  preemption problem.  The other benefit is that yield() is far, far lower
  overhead than yield_to()
 
  This does assume that vcpus from same VM do not share same runqueues.
  Yielding to a sibling vcpu with yield() is not productive for larger VMs
  in the same way that yield_to() is not.  My recent results include
  restricting vcpu placement so that sibling vcpus do not get to run on
  the same runqueue.  I do believe we could implement a initial placement
  and load balance policy to strive for this restriction (making it purely
  optional, but I bet could also help user apps which use spin locks).
 
  For 1x VMs which still vm_exit due to PLE, I believe we could probably
  just leave the ple_window alone, as long as we mostly use yield()
  instead of yield_to().  The problem with the unneeded exits in this case
  has been the overhead in routines leading up to yield_to() and the
  yield_to() itself.  If we use yield() most of the time, this overhead
  will go away.
 
  Here is a comparison of yield_to() and yield():
 
  dbench with 20-way VMs, 8 of them on 80-way host:
 
  no PLE426 +/- 11.03%
  no PLE w/ gangsched 32001 +/- .37%
  PLE with yield()29207 +/- .28%
  PLE with yield_to()  8175 +/- 1.37%
 
  Yield() is far and way better than yield_to() here and almost approaches
  

Re: [PATCH V3 5/6]: PVH:balloon and grant changes

2012-10-19 Thread Konrad Rzeszutek Wilk
On Thu, Oct 18, 2012 at 12:44:16PM +0100, Stefano Stabellini wrote:
 On Thu, 18 Oct 2012, Mukesh Rathor wrote:
  PVH: balloon and grant changes. For balloon changes we skip setting of 
  local p2m as it's updated in xen. For grant, the shared grant frame is the 
  pfn and not mfn, hence its mapped via the same code path as HVM
  
  Signed-off-by: Mukesh Rathor mukesh.rat...@oracle.com
 
 this patch looks good

Allright! We got an ACK!! Five more to go :-)

 
 
   drivers/xen/balloon.c |   15 +--
   drivers/xen/gntdev.c  |3 ++-
   drivers/xen/grant-table.c |   26 ++
   3 files changed, 33 insertions(+), 11 deletions(-)
  
  diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
  index 31ab82f..c825b63 100644
  --- a/drivers/xen/balloon.c
  +++ b/drivers/xen/balloon.c
  @@ -361,7 +361,9 @@ static enum bp_state increase_reservation(unsigned long 
  nr_pages)
  set_phys_to_machine(pfn, frame_list[i]);
   
  /* Link back into the page tables if not highmem. */
  -   if (xen_pv_domain()  !PageHighMem(page)) {
  +   if (xen_pv_domain()  !PageHighMem(page) 
  +   !xen_feature(XENFEAT_auto_translated_physmap)) {
  +
  int ret;
  ret = HYPERVISOR_update_va_mapping(
  (unsigned long)__va(pfn  PAGE_SHIFT),
  @@ -418,12 +420,13 @@ static enum bp_state decrease_reservation(unsigned 
  long nr_pages, gfp_t gfp)
  scrub_page(page);
   
  if (xen_pv_domain()  !PageHighMem(page)) {
  -   ret = HYPERVISOR_update_va_mapping(
  -   (unsigned long)__va(pfn  PAGE_SHIFT),
  -   __pte_ma(0), 0);
  -   BUG_ON(ret);
  +   if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  +   ret = HYPERVISOR_update_va_mapping(
  +   (unsigned long)__va(pfn  PAGE_SHIFT),
  +   __pte_ma(0), 0);
  +   BUG_ON(ret);
  +   }
  }
  -
  }
   
  /* Ensure that ballooned highmem pages don't have kmaps. */
  diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
  index 5df9fd8..36ec380 100644
  --- a/drivers/xen/gntdev.c
  +++ b/drivers/xen/gntdev.c
  @@ -803,7 +803,8 @@ static int __init gntdev_init(void)
  if (!xen_domain())
  return -ENODEV;
   
  -   use_ptemod = xen_pv_domain();
  +   use_ptemod = xen_pv_domain() 
  +!xen_feature(XENFEAT_auto_translated_physmap);
   
  err = misc_register(gntdev_miscdev);
  if (err != 0) {
  diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
  index f37faf6..1b851fa 100644
  --- a/drivers/xen/grant-table.c
  +++ b/drivers/xen/grant-table.c
  @@ -976,14 +976,19 @@ static void gnttab_unmap_frames_v2(void)
   static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
   {
  struct gnttab_setup_table setup;
  -   unsigned long *frames;
  +   unsigned long *frames, start_gpfn;
  unsigned int nr_gframes = end_idx + 1;
  int rc;
   
  -   if (xen_hvm_domain()) {
  +   if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap)) {
  struct xen_add_to_physmap xatp;
  unsigned int i = end_idx;
  rc = 0;
  +
  +   if (xen_hvm_domain())
  +   start_gpfn = xen_hvm_resume_frames  PAGE_SHIFT;
  +   else
  +   start_gpfn = virt_to_pfn(gnttab_shared.addr);
  /*
   * Loop backwards, so that the first hypercall has the largest
   * index, ensuring that the table will grow only once.
  @@ -992,7 +997,7 @@ static int gnttab_map(unsigned int start_idx, unsigned 
  int end_idx)
  xatp.domid = DOMID_SELF;
  xatp.idx = i;
  xatp.space = XENMAPSPACE_grant_table;
  -   xatp.gpfn = (xen_hvm_resume_frames  PAGE_SHIFT) + i;
  +   xatp.gpfn = start_gpfn + i;
  rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, xatp);
  if (rc != 0) {
  printk(KERN_WARNING
  @@ -1055,7 +1060,7 @@ static void gnttab_request_version(void)
  int rc;
  struct gnttab_set_version gsv;
   
  -   if (xen_hvm_domain())
  +   if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap))
  gsv.version = 1;
  else
  gsv.version = 2;
  @@ -1083,12 +1088,25 @@ static void gnttab_request_version(void)
   int gnttab_resume(void)
   {
  unsigned int max_nr_gframes;
  +   char *kmsg = Failed to kmalloc pages for pv in hvm grant frames\n;
   
  gnttab_request_version();
  max_nr_gframes = gnttab_max_grant_frames();
  if (max_nr_gframes  nr_grant_frames)
  return -ENOSYS;
   
  +   /* PVH note: xen will free existing 

Re: [PATCH 4/6] cgroups: forbid pre_destroy callback to fail

2012-10-19 Thread Michal Hocko
On Thu 18-10-12 15:41:48, Tejun Heo wrote:
 Hello, Michal.
 
 On Wed, Oct 17, 2012 at 03:30:46PM +0200, Michal Hocko wrote:
  Now that mem_cgroup_pre_destroy callback doesn't fail finally we can
  safely move on and forbit all the callbacks to fail. The last missing
  piece is moving cgroup_call_pre_destroy after cgroup_clear_css_refs so
  that css_tryget fails so no new charges for the memcg can happen.
  The callbacks are also called from within cgroup_lock to guarantee that
  no new tasks show up. We could theoretically call them outside of the
  lock but then we have to move after CGRP_REMOVED flag is set.
  
  Signed-off-by: Michal Hocko mho...@suse.cz
 
 So, the plan is to do something like the following once memcg is
 ready.
 
   http://thread.gmane.org/gmane.linux.kernel.containers/22559/focus=75251
 
 Note that the patch is broken in a couple places but it does show the
 general direction.  I'd prefer if patch #3 simply makes pre_destroy()
 return 0 and drop __DEPRECATED_clear_css_refs from mem_cgroup_subsys.

We can still fail inn #3 without this patch becasuse there are is no
guarantee that a new task is attached to the group. And I wanted to keep
memcg and generic cgroup parts separated.
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 4/6] cgroups: forbid pre_destroy callback to fail

2012-10-19 Thread Michal Hocko
On Thu 18-10-12 15:46:06, Tejun Heo wrote:
 On Thu, Oct 18, 2012 at 03:41:48PM -0700, Tejun Heo wrote:
  Note that the patch is broken in a couple places but it does show the
  general direction.  I'd prefer if patch #3 simply makes pre_destroy()
  return 0 and drop __DEPRECATED_clear_css_refs from mem_cgroup_subsys.
  Then, I can pull the branch in and drop all the unnecessary cruft.
 
 But you need the locking change for further memcg cleanup.  To avoid
 interlocked pulls from both sides, I think it's okay to push this one
 with the rest of memcg changes.  I can do the cleanup on top of this
 whole series, but please do drop .__DEPRECATED_clear_css_refs from
 memcg.

OK I will drop that one.

  Acked-by: Tejun Heo t...@kernel.org

Do you still agree with the v2 based on Li's feedback?

Thanks
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf: Fix UAPI fallout

2012-10-19 Thread David Howells
Ingo Molnar mi...@kernel.org wrote:

 What we want in .c files are not ../.. inclusions but the 
 'seemless' linux/abc.h inclusions. Which is the overwhelming 
 majority, gladly. Do we want to make that the 100% majority?

I think this is going to be necessary for when x86 gets merged.  x86's
asm/unistd.h #includes uapi/asm/unistd.h, so you can't manually specify the
header without also specifying a -I flag.

I've been having a prod at it, and this seems to partially work:

-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util 
-I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+$(info XXX $(srctree))
+
+BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include 
-I../../arch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) 
-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE

However, I'm trying to work out what will happen if this is run in a separate
output dir, but if I do:

make tools/perf O=build_dir

from the bottom directory, I get:

scripts/Makefile.include:2: *** O=build_dir does not exist.  Stop.

The problem is that the bottom-level Makefile does this:

tools/: FORCE
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/
tools/%: FORCE
$(Q)$(MAKE) LDFLAGS= MAKEFLAGS= -C $(src)/tools/ $*

which changes the directory, rendering a relative O= that would be good for
building the normal kernel useless for building a tool.  Should these rules
respecify the O= flag here, or should we give an error if someone tries it?

David
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] RAS fix for tip/x86/urgent

2012-10-19 Thread Borislav Petkov
Hi guys,

below is a RAS fix which reverts the addition of a sysfs attribute
which we agreed is not needed, post-factum. And this should go in now
because that sysfs attribute is going to end up in 3.7 otherwise and
thus exposed to userspace; removing it then would be a lot harder.

Thanks.

The following changes since commit c9623de4fc2f8320fe94316b46171683be3b1d59:

  Merge branch 'v4l_for_linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (2012-10-18 
16:10:42 -0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git tags/cmci_threshold

for you to fetch changes up to 5bc66170dc486556a1e36fd384463536573f4b82:

  x86, MCE: Remove bios_cmci_threshold sysfs attribute (2012-10-19 15:22:29 
+0200)


Remove a useless sysfs attribute added during this merge window before
it becomes a public interface.


Borislav Petkov (1):
  x86, MCE: Remove bios_cmci_threshold sysfs attribute

 arch/x86/kernel/cpu/mcheck/mce.c | 6 --
 1 file changed, 6 deletions(-)

---
Actual commit:

commit 5bc66170dc486556a1e36fd384463536573f4b82
Author: Borislav Petkov borislav.pet...@amd.com
Date:   Thu Oct 18 15:10:56 2012 +0200

x86, MCE: Remove bios_cmci_threshold sysfs attribute

450cc201038f3 (x86/mce: Provide boot argument to honour bios-set CMCI
threshold) added the bios_cmci_threshold sysfs attribute which was
supposed to communicate to userspace tools that BIOS CMCI threshold has
been honoured.

However, this info is not of any importance to userspace - it should
rather get the actual error count it has been thresholded already from
MCi_STATUS[38:52].

So drop this before it becomes a used interface (good thing we caught
this early in 3.7-rc1, right after the merge window closed).

Cc: Naveen N. Rao naveen.n@linux.vnet.ibm.com
Acked-by: Tony Luck tony.l...@intel.com
Link: http://lkml.kernel.org/r/20121017105940.ga14...@x1.osrc.amd.com
Signed-off-by: Borislav Petkov borislav.pet...@amd.com

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29e87d3b2843..46cbf8689692 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2209,11 +2209,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = 
{
mce_cmci_disabled
 };
 
-static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
-   __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
-   mce_bios_cmci_threshold
-};
-
 static struct device_attribute *mce_device_attrs[] = {
dev_attr_tolerant.attr,
dev_attr_check_interval.attr,
@@ -,7 +2217,6 @@ static struct device_attribute *mce_device_attrs[] = {
dev_attr_dont_log_ce.attr,
dev_attr_ignore_ce.attr,
dev_attr_cmci_disabled.attr,
-   dev_attr_bios_cmci_threshold.attr,
NULL
 };

-- 
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
GM: Alberto Bozzo
Reg: Dornach, Landkreis Muenchen
HRB Nr. 43632 WEEE Registernr: 129 19551
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] epoll: Support for disabling items, and a self-test app.

2012-10-19 Thread Paul Holland
On 10/19/12 6:03 AM, Paolo Bonzini pbonz...@redhat.com wrote:

Il 18/10/2012 20:05, Andy Lutomirski ha scritto:
 
 Unless something is rather buggy in kernel land (and I don't think it
 is), once EPOLL_CTL_DEL has returned, no call to epoll_wait that starts
 *after* EPOLL_CTL_DEL finishes will return that object.  This suggests
 an RCU-like approach: once EPOLL_CTL_DEL has returned and every thread
 has returned from an epoll_wait call that started after the
 EPOLL_CTL_DEL returns, then the data structure can be safely freed.
 
 In pseudocode:
 
 delete(fd, pdata) {
   pdata-dead = true;
   EPOLL_CTL_DEL(fd);
   rcu_call(delete pdata);
 }
 
 wait() {
   epoll_wait;
   for each event pdata {
 if (pdata-gone) continue;
 process the event;
   }
 
   rcu_this_is_a_grace_period();
 }
 
 Of course, these are not normal grace periods and would need to be
 tracked separately.  (The optimal data structure to do this without
 killing scalability is not obvious.  urcu presumably implements such a
 thing.)
 
 Am I right?

Equip each thread with a) an id or something else that lets each thread
refer to the next thread; b) a lists of items waiting to be deleted.
 Then the deleting thread adds the item being deleted to the first
thread's list.  Before executing epoll_wait, thread K empties its list
and passes the buck, appending the old contents of its list to that of
thread K+1.  This is an O(1) operation no matter how many items are
being deleted; only Thread N, being the last thread, actually has to go
through the list and delete the items.

The lists need to be protected by a mutex, but contention should really
be rare since there are just two writers.  Note that each thread only
needs to hold one mutex at a time, and the deletion loop does not need
to happen with the mutex held at all, so there's no worries of
cascading waits on the mutexes.

Compared to http://thread.gmane.org/gmane.linux.kernel/1311457, you get
rid of the per-item mutex and the operations that have to be done with
the (now per-thread) mutex held remain pretty trivial.

Paolo

A disadvantage of solutions in this direction, which was not preset in
Paton's patch, is that all calls to epoll_wait would need to specify some
timeout value (!= -1) to guarantee that they each come out of epoll_wait
and execute the pass the buck or grace_period logic.  So you would
then have contention between designs that want highly responsive delete
operations (those would require very short timeout values to epoll_wait)
and those that want low execution overhead (those would want larger
timeout values).




--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] epoll: Support for disabling items, and a self-test app.

2012-10-19 Thread Paolo Bonzini
Il 19/10/2012 15:29, Paul Holland ha scritto:
 A disadvantage of solutions in this direction, which was not preset in
 Paton's patch, is that all calls to epoll_wait would need to specify some
 timeout value (!= -1) to guarantee that they each come out of epoll_wait
 and execute the pass the buck or grace_period logic.  So you would
 then have contention between designs that want highly responsive delete
 operations (those would require very short timeout values to epoll_wait)
 and those that want low execution overhead (those would want larger
 timeout values).

Is this really a problem?  If your thread pool risks getting oversized,
you might need some kind of timeout anyway to expire threads.  If your
thread pool is busy, the timeout will never be reached.

I'm not against EPOLL_CTL_DISABLE, just couldn't resist replying to The
optimal data structure to do this without killing scalability is not
obvious. :)

Paolo
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] x86/dt: use linear irq domain for ioapic(s).

2012-10-19 Thread Florian Fainelli
On Friday 19 October 2012 11:36:25  Fainelli wrote:
 Sebastian Andrzej Siewior bigeasy at linutronix.de writes:
  
  No. You do have a compatible entry. It first appeared on the ce4100
  CPU. If it happens to also work on the n450 then it seems to be
  compatible with that one. This is documented somewhere…
  Usually you add 'compatible = your cpu, generic binding' in case
  you need a fixup / errata whatever for your cpu. Even if you compare
  all hpets from Intel there is the one or other difference / errata.
 
 Can we make sure that his hits the future 3.6 stable releases? We had to merge
 this back to your 3.6 kernel tree in order to have a functionnal CE4100 
 system.
 
 Thank you!

Adding Adding Thomas, Ingo and Sebastian in CC.
--
Florian
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] sched, autogroup: fix kernel crashes caused by runtime disable autogroup

2012-10-19 Thread Peter Zijlstra
Always try and CC people who wrote the code..

On Fri, 2012-10-19 at 16:36 +0800, Xiaotian Feng wrote:
 There's a regression from commit 800d4d30, in autogroup_move_group()
 
   p-signal-autogroup = autogroup_kref_get(ag);
 
   if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
   goto out;
   ...
 out:
   autogroup_kref_put(prev);
 
 So kernel changed p's autogroup to ag, but never sched_move_task(p).
 Then previous autogroup of p is released, which may release task_group
 related with p. After commit 8323f26ce, p-sched_task_group might point
 to this stale value, and thus caused kernel crashes.
 
 This is very easy to reproduce, add kernel.sched_autogroup_enabled = 0
 to your /etc/sysctl.conf, your system will never boot up. It is not reasonable
 to put the sysctl enabled check in autogroup_move_group(), kernel should check
 it before autogroup_create in sched_autogroup_create_attach().
 
 Reported-by: cwillu cwi...@cwillu.com
 Reported-by: Luis Henriques luis.henriq...@canonical.com
 Signed-off-by: Xiaotian Feng dannyf...@tencent.com
 Cc: Ingo Molnar mi...@redhat.com
 Cc: Peter Zijlstra pet...@infradead.org
 ---
  kernel/sched/auto_group.c |   10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
 index 0984a21..ac62415 100644
 --- a/kernel/sched/auto_group.c
 +++ b/kernel/sched/auto_group.c
 @@ -143,15 +143,11 @@ autogroup_move_group(struct task_struct *p, struct 
 autogroup *ag)
  
   p-signal-autogroup = autogroup_kref_get(ag);
  
 - if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
 - goto out;
 -
   t = p;
   do {
   sched_move_task(t);
   } while_each_thread(p, t);
  
 -out:
   unlock_task_sighand(p, flags);
   autogroup_kref_put(prev);
  }

So I've looked at this for all of 1 minute, but why isn't moving that
check up one line to be above the p-signal-autogroup assignment
enough?

 @@ -159,8 +155,12 @@ out:
  /* Allocates GFP_KERNEL, cannot be called under any spinlock */
  void sched_autogroup_create_attach(struct task_struct *p)
  {
 - struct autogroup *ag = autogroup_create();
 + struct autogroup *ag;
 +
 + if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
 + return;
  
 + ag = autogroup_create();
   autogroup_move_group(p, ag);
   /* drop extra reference added by autogroup_create() */
   autogroup_kref_put(ag);

Man,.. so on memory allocation fail we'll put the group in
autogroup_default, which I think ends up being the root cgroup.

But what happens when sysctl_sched_autogroup_enabled is false?

It looks like sched_autogroup_fork() is effective in that case, which
would mean we'll stay in whatever group our parent is in, which is not
the same as being disabled.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/5] memory-hotplug: update mce_bad_pages when removing the memory

2012-10-19 Thread Dave Hansen
On 10/18/2012 03:20 PM, Andrew Morton wrote:
 On Wed, 17 Oct 2012 08:09:55 -0700
 Dave Hansen d...@linux.vnet.ibm.com wrote:
 +#ifdef CONFIG_MEMORY_FAILURE
 +static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 +{
 +   int i;
 +
 +   if (!memmap)
 +   return;

 I guess free_section_usemap() does the same thing.
 
 What does this observation mean?

sparse_remove_one_section() has an if(ms-section_mem_map) statement.
Inside that if() block is the only place in the function where 'memmap'
can get set.

Currently, sparse_remove_one_section() calls in to free_section_usemap()
ouside of that if() block.  With this patch new call to
clear_hwpoisoned_pages() is done in the same place, both passing 'memmap'.

However, both free_section_usemap() and clear_hwpoisoned_pages() check
'memmap' for NULL and immediately return if so.  That's a bit silly
since it could hide garbage coming back from sparse_decode_mem_map().
Seems like we should just call them both inside that if() block, or
reorganize sparse_remove_one_section(), maybe like this:

void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
{
struct page *memmap = NULL;
unsigned long *usemap = NULL;

if (!ms-section_mem_map)
return;

usemap = ms-pageblock_flags;
memmap = sparse_decode_mem_map(ms-section_mem_map,
__section_nr(ms));
ms-section_mem_map = 0;
ms-pageblock_flags = NULL;

free_section_usemap(memmap, usemap);
clear_hwpoisoned_pages(usemap, ...);
}

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2] PWM: Add SPEAr PWM chip driver support

2012-10-19 Thread Viresh Kumar
On 19 October 2012 15:45, Shiraz Hashim shiraz.has...@st.com wrote:
 diff --git a/Documentation/devicetree/bindings/pwm/spear-pwm.txt 
 b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
 +pwm: pwm@a800 {
 +compatible =st,spear320-pwm;
 +reg = 0xa800 0x1000;
 +#pwm-cells = 2;
 +status = disabled;

Must remove disabled from here. Isn't it?

 diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
 +#include linux/clk.h
 +#include linux/err.h
 +#include linux/io.h
 +#include linux/ioport.h
 +#include linux/kernel.h
 +#include linux/math64.h
 +#include linux/module.h
 +#include linux/of.h
 +#include linux/platform_device.h
 +#include linux/pwm.h
 +#include linux/slab.h
 +#include linux/types.h
 +
 +#define NUM_PWM4
 +
 +/* PWM registers and bits definitions */
 +#define PWMCR  0x00/* Control Register */
 +#define PWMCR_PWM_ENABLE   0x1
 +#define PWMCR_PRESCALE_SHIFT   2
 +#define PWMCR_MIN_PRESCALE 0x00
 +#define PWMCR_MAX_PRESCALE 0x3FFF

I would do it as to make it more readable, your call:

#define PWMCR  0x00/* Control Register */
#define PWMCR_PWM_ENABLE   0x1
#define PWMCR_PRESCALE_SHIFT   2
#define PWMCR_MIN_PRESCALE 0x00
#define PWMCR_MAX_PRESCALE 0x3FFF

 +static int spear_pwm_remove(struct platform_device *pdev)
 +{
 +   struct spear_pwm_chip *pc = platform_get_drvdata(pdev);
 +   int i;
 +
 +   if (WARN_ON(!pc))
 +   return -ENODEV;

Sorry for not asking earlier, how can this be true anytime?

--
viresh
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


remoteproc open issues (was [RFC 1/4] remoteproc: Bugfix assign device address to carveout (noiommu)

2012-10-19 Thread Sjur Brændeland
Hi,

On Fri, Aug 10, 2012 at 5:30 PM, Ohad Ben-Cohen o...@wizery.com wrote:
 The general direction I have in mind is to put the resource table in
 its final location while we do the first pass of fw parsing.

 This will solve all sort of open issues we have (or going to have soon):

 1. dynamically-allocated address of the vrings can be communicated
 2. vdev statuses can be communicated
 3. virtio config space will finally become bi-directional as it should
 4. dynamically probed rproc-to-rproc IPC could then take place

 It's the real deal :)

 The only problem with this approach is that the resource table isn't
 reloaded throughout cycles of power up/down, and that is insecure.
 We'll have to manually reload it somewhere after the rproc is powered
 down (or before it is powered up again).

 This change will break existing firmwares, but it looks required and 
 inevitable.

Has anyone started looking into any of the open issues mentioned above?

Thanks,
Sjur
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 5/6] memcg: make mem_cgroup_reparent_charges non failing

2012-10-19 Thread Michal Hocko
This is an updated version of the patch. I have dropped
.__DEPRECATED_clear_css_refs in this one as it makes the best sense to
me. I didn't add Tejun's Reviewed-by because of this change. Could you
recheck, please?
---
From 6c1f2e76e254e7638ad8cc87f319e3492ac80c5b Mon Sep 17 00:00:00 2001
From: Michal Hocko mho...@suse.cz
Date: Wed, 17 Oct 2012 14:15:09 +0200
Subject: [PATCH] memcg: make mem_cgroup_reparent_charges non failing

Now that pre_destroy callbacks are called from within cgroup_lock and
the cgroup has been checked to be empty without any children then there
is no other way to fail.
mem_cgroup_pre_destroy doesn't have to take a reference to memcg's css
because all css' are marked dead already.

mem_cgroup_subsys.__DEPRECATED_clear_css_refs can be dropped as
mem_cgroup_pre_destroy cannot fail now.

Changes since v1
- drop __DEPRECATED_clear_css_refs

Signed-off-by: Michal Hocko mho...@suse.cz
---
 mm/memcontrol.c |   19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f57ba4c..b4d854e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3738,14 +3738,12 @@ static void mem_cgroup_force_empty_list(struct 
mem_cgroup *memcg,
  *
  * Caller is responsible for holding css reference on the memcg.
  */
-static int mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
+static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
 {
struct cgroup *cgrp = memcg-css.cgroup;
int node, zid;
 
do {
-   if (cgroup_task_count(cgrp) || !list_empty(cgrp-children))
-   return -EBUSY;
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
drain_all_stock_sync(memcg);
@@ -3771,8 +3769,6 @@ static int mem_cgroup_reparent_charges(struct mem_cgroup 
*memcg)
 * charge before adding to the LRU.
 */
} while (res_counter_read_u64(memcg-res, RES_USAGE)  0);
-
-   return 0;
 }
 
 /*
@@ -3809,7 +3805,9 @@ static int mem_cgroup_force_empty(struct mem_cgroup 
*memcg)
 
}
lru_add_drain();
-   return mem_cgroup_reparent_charges(memcg);
+   mem_cgroup_reparent_charges(memcg);
+
+   return 0;
 }
 
 static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int 
event)
@@ -5013,13 +5011,9 @@ free_out:
 static int mem_cgroup_pre_destroy(struct cgroup *cont)
 {
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-   int ret;
 
-   css_get(memcg-css);
-   ret = mem_cgroup_reparent_charges(memcg);
-   css_put(memcg-css);
-
-   return ret;
+   mem_cgroup_reparent_charges(memcg);
+   return 0;
 }
 
 static void mem_cgroup_destroy(struct cgroup *cont)
@@ -5621,7 +5615,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
.base_cftypes = mem_cgroup_files,
.early_init = 0,
.use_id = 1,
-   .__DEPRECATED_clear_css_refs = true,
 };
 
 #ifdef CONFIG_MEMCG_SWAP
-- 
1.7.10.4

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] arm/dts: AM33XX: Add SPI device tree data

2012-10-19 Thread Matt Porter
On Fri, Oct 19, 2012 at 02:40:58PM +0200, Benoit Cousson wrote:
 Hi Matt,
 
 On 10/19/2012 01:30 PM, Matt Porter wrote:
  On Fri, Oct 19, 2012 at 10:24:15AM +0200, Benoit Cousson wrote:
  Hi Avinash,
 
  This look good to me except the: status = disabled.
 
  The disabled should be reserved for variant that does not contain the IP.
  Is it the case here?
  
  http://comments.gmane.org/gmane.linux.drivers.devicetree/18968 is what
  I've been going by with the DTS support in the EDMA dmaengine series. It
  does make the most sense to only enable what you need in the
  board.dts.
 
 Thanks, I missed that thread. That being said, there is no real rational
 :-)
 It seems to be a preference more than anything else.

Ok, agreed. Here's an example to attempt to illustrate why the one true
preference is to default to disabled. :) Let's say I'm a system developer
with a custom board in the default enabled model. My board makes use of,
say, mmc1 and uart1 on am33xx (oh yes, we must fix the numbering that
doesn't match the TRM) only. In that case, I develop a board.dts
with:

uart2: serial@foo {
status = disabled;
};
.
.
.
uart6: serial@bar {
status = disabled;
};

mmc1 {
vmmc-supply = ldo3_reg;
};

mmc2 {
status = disabled;
};

mmc3 {
status = disabled;
};

In the positive logic case I would have a board.dts with:

uart1: serial@foo {
status = okay;
};

mmc1 {
status = okay;
vmmc-supply = ldo3_reg;
};

In the first case, the fact that there's a board specific property
present for mmc1 can almost get lost in the sea of disabled peripherals.
Extend that out to all the other devices that would be disabled in a
full board.dts and it becomes clear (at least to me) as to how ugly the
board.dts will become. My concern is that this hardware description will
quickly become unreadable to a human.

You might say it's unnecessary to disable everything, but in a
production system the developer is not going to want all these
unused devices to be instantiated. They may have a common family of
boards with a common kernel image, all with a separate dtb so they
can precisely control which items are enabled such that unused drivers
are not loaded.

So then look at the second case. At least for me, I can look at that in
the board.dts and I know that this board uses uart1 and mmc1...and
nothing else. That's a clear and concise hardware description at a
board level, but I understand that can be just considered a
preference.

 I'm curious now, why powerpc was not really using that approach?

Good question.

 I'd rather explicitly disable an IP than assuming than it is disabled by
 default and then enabling it in the board file. But again it is just a
 different view point, since at the end it will have the same effect.

Well, there's functionality then there's production usability. I'm
pointing out that there are many cases where the readability (and
therefore) usability for someone creating a new board is reduced when
devices default to enabled.

 If we really want the disabled state to be the default state, why is it
 not disabled in the DT fmwk by default?

I think it's simply answered by the fact that powerpc always took the
negative logic approach.

-Matt

  On 09/18/2012 07:30 AM, Philip, Avinash wrote:
  Add McSPI data node to AM33XX device tree file. The McSPI module (and so
  as the driver) is reused from OMAP4.
 
  Signed-off-by: Philip, Avinash avinashphi...@ti.com
  Tested-by: Matt Porter mpor...@ti.com
  ---
  Changes since v1:
  - Corrected reg offset in reg DT entry.
 
  :100644 100644 ff3badb... 065fd54... March/arm/boot/dts/am33xx.dtsi
   arch/arm/boot/dts/am33xx.dtsi |   25 +
   1 files changed, 25 insertions(+), 0 deletions(-)
 
  diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
  index ff3badb..065fd54 100644
  --- a/arch/arm/boot/dts/am33xx.dtsi
  +++ b/arch/arm/boot/dts/am33xx.dtsi
  @@ -219,5 +219,30 @@
interrupt-parent = intc;
interrupts = 91;
};
  +
  + spi0: spi@4803 {
  + compatible = ti,omap4-mcspi;
  + #address-cells = 1;
  + #size-cells = 0;
  + reg = 0x4803 0x400;
  + interrupt-parent = intc;
  + interrupt = 65;
  + ti,spi-num-cs = 2;
  + ti,hwmods = spi0;
  + status = disabled;
  +
  + };
  +
  + spi1: spi@481a {
  + compatible = ti,omap4-mcspi;
  + #address-cells = 1;
  + #size-cells = 0;
  + reg = 0x481a 0x400;
  + interrupt-parent = intc;
  + 

Re: [tip:numa/core] sched/numa/mm: Improve migration

2012-10-19 Thread Johannes Weiner
On Thu, Oct 18, 2012 at 10:05:39AM -0700, tip-bot for Peter Zijlstra wrote:
 Commit-ID:  713f937655c4b15131b5a0eae4610918a4febe17
 Gitweb: http://git.kernel.org/tip/713f937655c4b15131b5a0eae4610918a4febe17
 Author: Peter Zijlstra a.p.zijls...@chello.nl
 AuthorDate: Fri, 12 Oct 2012 19:30:14 +0200
 Committer:  Ingo Molnar mi...@kernel.org
 CommitDate: Mon, 15 Oct 2012 14:18:40 +0200
 
 sched/numa/mm: Improve migration
 
 Add THP migration. Extend task_numa_fault() to absorb THP faults.
 
 [ Would be nice if the gents on Cc: expressed their opinion about
   this change. A missing detail might be cgroup page accounting,
   plus the fact that some architectures might cache PMD_NONE pmds
   in their TLBs, needing some extra TLB magic beyond what we already
   do here? ]

Looks good to me, the cgroup fixup should be easy enough as well
(added the calls inline below).

Of course I'm banging my head into a wall for not seeing earlier
through the existing migration path how easy this could be.  It would
be great for compaction to have this fastpath in the traditional
migration code too.

Right now, unlike the traditional migration path, this breaks COW for
every migration, but maybe you don't care about shared pages in the
first place.  And fixing that should be nothing more than grabbing the
anon_vma lock and using rmap to switch more than one pmd over, right?

It won't work for pages in swap, which is only a future problem.

It's slightly ugly that migrate_page_copy() actually modifies the
existing page (deactivation, munlock) when you end up having to revert
back to it.

The new page needs to be PageUptodate.

 + task_numa_placement();
 +
 + new_page = alloc_pages_node(node,
 + (GFP_TRANSHUGE | GFP_THISNODE)  ~(__GFP_NO_KSWAPD | __GFP_WAIT),
 + HPAGE_PMD_ORDER);
 +
 + WARN_ON(PageLRU(new_page));
 +
 + if (!new_page)
 + goto alloc_fail;

mem_cgroup_prepare_migration(page, new_page, memcg);

 + lru = PageLRU(page);
 +
 + if (lru  isolate_lru_page(page)) /* does an implicit get_page() */
 + goto alloc_fail;
 +
 + if (!trylock_page(new_page))
 + BUG();
 +
 + /* anon mapping, we can simply copy page-mapping to the new page: */
 + new_page-mapping = page-mapping;
 + new_page-index = page-index;
 +
 + migrate_page_copy(new_page, page);
 +
 + WARN_ON(PageLRU(new_page));
  
 -do_fixup:
   spin_lock(mm-page_table_lock);
 - if (unlikely(!pmd_same(*pmd, entry)))
 - goto out_unlock;
 -#endif
 + if (unlikely(!pmd_same(*pmd, entry))) {
 + spin_unlock(mm-page_table_lock);
 + if (lru)
 + putback_lru_page(page);
  
 - /* change back to regular protection */
 - entry = pmd_modify(entry, vma-vm_page_prot);
 - if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1))
 - update_mmu_cache(vma, address, entry);
 + unlock_page(new_page);
 + ClearPageActive(new_page);  /* Set by migrate_page_copy() */
 + new_page-mapping = NULL;
 + put_page(new_page); /* Free it */
  
 -out_unlock:
 + unlock_page(page);
 + put_page(page); /* Drop the local reference */
 +
 + return;
 + }
 +
 + entry = mk_pmd(new_page, vma-vm_page_prot);
 + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 + entry = pmd_mkhuge(entry);
 +
 + page_add_new_anon_rmap(new_page, vma, haddr);
 +
 + set_pmd_at(mm, haddr, pmd, entry);
 + update_mmu_cache(vma, address, entry);
 + page_remove_rmap(page);
   spin_unlock(mm-page_table_lock);
 - if (page)
 +
 + put_page(page); /* Drop the rmap reference */
 +
 + task_numa_fault(node, HPAGE_PMD_NR);
 +
 + if (lru)
 + put_page(page); /* drop the LRU isolation reference */
 +
 + unlock_page(new_page);

mem_cgroup_end_migration(memcg, page, new_page, true);

 + unlock_page(page);
 + put_page(page); /* Drop the local reference */
 +
 + return;
 +
 +alloc_fail:
 + if (new_page)
 + put_page(new_page);
mem_cgroup_end_migration(memcg, page, new_page, false);
}

 + task_numa_fault(page_to_nid(page), HPAGE_PMD_NR);
 + unlock_page(page);
 +
 + spin_lock(mm-page_table_lock);
 + if (unlikely(!pmd_same(*pmd, entry))) {
   put_page(page);
 + page = NULL;
 + goto unlock;
 + }
 + goto fixup;
  }
  
  int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm: Simplify for_each_populated_zone()

2012-10-19 Thread Johannes Weiner
On Fri, Oct 19, 2012 at 04:25:47PM +0530, Srivatsa S. Bhat wrote:
 Move the check for populated_zone() to the control statement of the
 'for' loop and get rid of the odd looking if/else block.
 
 Signed-off-by: Srivatsa S. Bhat srivatsa.b...@linux.vnet.ibm.com
 ---
 
  include/linux/mmzone.h |7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)
 
 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
 index 50aaca8..5bdf02e 100644
 --- a/include/linux/mmzone.h
 +++ b/include/linux/mmzone.h
 @@ -913,11 +913,8 @@ extern struct zone *next_zone(struct zone *zone);
  
  #define for_each_populated_zone(zone)\
   for (zone = (first_online_pgdat())-node_zones; \
 -  zone;  \
 -  zone = next_zone(zone))\
 - if (!populated_zone(zone))  \
 - ; /* do nothing */  \
 - else
 +  zone  populated_zone(zone);  \
 +  zone = next_zone(zone))

I don't think we want to /abort/ the loop when encountering an
unpopulated zone.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] mmc: core: bus.c: re-aligned the line over 80 chars

2012-10-19 Thread Sangho Yi
I made a correction on a line which has  80 characters
also, aligned the consecutive line to meet the --strict rule.

Signed-off-by: Sangho Yi antir...@gmail.com
---
 drivers/mmc/core/bus.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 420cb67..66bac9f 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -269,8 +269,8 @@ int mmc_add_card(struct mmc_card *card)
[UHS_DDR50_BUS_SPEED] = DDR50 ,
};
 
-
-   dev_set_name(card-dev, %s:%04x, mmc_hostname(card-host), 
card-rca);
+   dev_set_name(card-dev, %s:%04x, mmc_hostname(card-host),
+card-rca);
 
switch (card-type) {
case MMC_TYPE_MMC:
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] ARM: multi_v7_defconfig: Add ARCH_MXC

2012-10-19 Thread Rob Herring
Adding Arnd and Olof.

On 10/18/2012 07:04 PM, Fabio Estevam wrote:
 From: Fabio Estevam fabio.este...@freescale.com
 
 Let ARCH_MXC be covered by multi_v7_defconfig.
 
 Allow booting mx6 via NFS.

Now we can start debating what should or shouldn't be in shared
defconfigs. :)

My intent with this defconfig was to only enable required options and
all drivers for platforms and leave features to their defaults. I don't
feel that strongly about it. We should have some general guideline here
so it's not a free for all. You can still have a defconfig for your
platform as well.

Rob

 
 Signed-off-by: Fabio Estevam fabio.este...@freescale.com
 ---
 Changes since v1:
 - Provide a more complete config that allows booting mx6 via NFS
 
  arch/arm/configs/multi_v7_defconfig |   13 +
  1 file changed, 13 insertions(+)
 
 diff --git a/arch/arm/configs/multi_v7_defconfig 
 b/arch/arm/configs/multi_v7_defconfig
 index 159f75f..df1e563 100644
 --- a/arch/arm/configs/multi_v7_defconfig
 +++ b/arch/arm/configs/multi_v7_defconfig
 @@ -5,6 +5,8 @@ CONFIG_ARCH_MVEBU=y
  CONFIG_MACH_ARMADA_370=y
  CONFIG_MACH_ARMADA_XP=y
  CONFIG_ARCH_HIGHBANK=y
 +CONFIG_ARCH_MXC=y
 +CONFIG_SOC_IMX6Q=y
  CONFIG_ARCH_SOCFPGA=y
  # CONFIG_ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA is not set
  CONFIG_ARM_ERRATA_754322=y
 @@ -17,18 +19,29 @@ CONFIG_ARM_APPENDED_DTB=y
  CONFIG_VFP=y
  CONFIG_NEON=y
  CONFIG_NET=y
 +CONFIG_PACKET=y
 +CONFIG_UNIX=y
 +CONFIG_INET=y
 +CONFIG_IP_PNP=y
 +CONFIG_IP_PNP_DHCP=y
  CONFIG_ATA=y
  CONFIG_SATA_HIGHBANK=y
  CONFIG_NETDEVICES=y
  CONFIG_NET_CALXEDA_XGMAC=y
  CONFIG_SMSC911X=y
  CONFIG_STMMAC_ETH=y
 +CONFIG_NFS_FS=y
 +CONFIG_NFS_V3_ACL=y
 +CONFIG_NFS_V4=y
 +CONFIG_ROOT_NFS=y
  CONFIG_SERIO_AMBAKMI=y
  CONFIG_SERIAL_8250=y
  CONFIG_SERIAL_8250_CONSOLE=y
  CONFIG_SERIAL_8250_DW=y
  CONFIG_SERIAL_AMBA_PL011=y
  CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
 +CONFIG_SERIAL_IMX=y
 +CONFIG_SERIAL_IMX_CONSOLE=y
  CONFIG_SERIAL_OF_PLATFORM=y
  CONFIG_IPMI_HANDLER=y
  CONFIG_IPMI_SI=y
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] Replace if statement with WARN_ON_ONCE() in cmci_rediscover().

2012-10-19 Thread Greg KH
On Fri, Oct 19, 2012 at 01:45:27PM +0800, Tang Chen wrote:
 cmci_rediscover() is only called by the CPU_POST_DEAD event handler,
 which means the corresponding cpu has already dead. As a result, it
 won't be accessed in the for_each_online_cpu loop.
 So, we could change the if(cpu == dying) statement into a WARN_ON_ONCE().
 
 Signed-off-by: Tang Chen tangc...@cn.fujitsu.com
 ---
  arch/x86/kernel/cpu/mcheck/mce_intel.c |4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)

formletter

This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read Documentation/stable_kernel_rules.txt
for how to do this properly.

/formletter
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] rename NUMA fault handling functions

2012-10-19 Thread Rik van Riel

On 10/19/2012 07:41 AM, Peter Zijlstra wrote:

On Thu, 2012-10-18 at 17:20 -0400, Rik van Riel wrote:

Having the function name indicate what the function is used
for makes the code a little easier to read.  Furthermore,
the fault handling code largely consists of do__page
functions.


I don't much care either way, but I was thinking walken might want to
use something similar to do WSS estimation, in which case the NUMA name
is just as wrong.


That's a good point. I had not considered other uses of the
same code.

--
All rights reversed
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: perf: p6 PMU working by accident, should we fix it and KNC?

2012-10-19 Thread Peter Zijlstra
On Wed, 2012-10-17 at 11:35 -0400, Vince Weaver wrote:
 
 This is by accident; it looks like the code does 
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 in p6_pmu_disable_event() so that events are never truly disabled
 (is this a bug?  should it be =~ instead?).  

I think that's on purpose.. from what I can remember p6 only has a
single EN bit (on PMC0) that acts for both counters. So what I did was
treat that as a global enable/disable (which it is) and did the local
enable/disable by using the NOP events.

There really might be bugs in there, its not like I use this class of
hardware very frequently (nor do anybody much it seems).


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/3] mm/sl[aou]b: Move common kmem_cache_size() to slab.h

2012-10-19 Thread Christoph Lameter
On Fri, 19 Oct 2012, Ezequiel Garcia wrote:

 This function is identically defined in all three allocators
 and it's trivial to move it to slab.h

 Since now it's static, inline, header-defined function
 this patch also drops the EXPORT_SYMBOL tag.

Acked-by: Christoph Lameter c...@linux.com
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/3] mm/slob: Use object_size field in kmem_cache_size()

2012-10-19 Thread Christoph Lameter
On Fri, 19 Oct 2012, Ezequiel Garcia wrote:

 Fields object_size and size are not the same: the latter might include
 slab metadata. Return object_size field in kmem_cache_size().
 Also, improve trace accuracy by correctly tracing reported size.

Acked-by: Christoph Lameter c...@linux.com
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/10] memory-hotplug : check whether memory is offline or not when removing memory

2012-10-19 Thread Wen Congyang

At 2012/10/19 18:44, Wen Congyang Wrote:

At 10/06/2012 03:27 AM, KOSAKI Motohiro Wrote:

On Thu, Oct 4, 2012 at 10:25 PM, Yasuaki Ishimatsu
isimatu.yasu...@jp.fujitsu.com  wrote:

When calling remove_memory(), the memory should be offline. If the function
is used to online memory, kernel panic may occur.

So the patch checks whether memory is offline or not.


You don't explain WHY we need the check.


This patch is no necessary now, because the newest kernel has checked
it.


I think it again, and found that this check is necessary. Because we only
lock memory hotplug when offlining pages. Here is the steps to offline and
remove memory:

1. lock memory hotplug
2. offline a memory section
3. unlock memory hotplug
4. repeat 1-3 to offline all memory sections
5. lock memory hotplug
6. remove memory
7. unlock memory hotplug

All memory sections must be offlined before removing memory. But we 
don't hold
the lock in the whole operation. So we should check whether all memory 
sections

are offlined before step6.



Thanks
Wen Congyang





CC: David Rientjesrient...@google.com
CC: Jiang Liuliu...@gmail.com
CC: Len Brownlen.br...@intel.com
CC: Christoph Lameterc...@linux.com
Cc: Minchan Kimminchan@gmail.com
CC: Andrew Mortona...@linux-foundation.org
CC: KOSAKI Motohirokosaki.motoh...@jp.fujitsu.com
Signed-off-by: Wen Congyangwe...@cn.fujitsu.com
Signed-off-by: Yasuaki Ishimatsuisimatu.yasu...@jp.fujitsu.com

---
  drivers/base/memory.c  |   39 +++
  include/linux/memory.h |5 +
  mm/memory_hotplug.c|   17 +++--
  3 files changed, 59 insertions(+), 2 deletions(-)

Index: linux-3.6/drivers/base/memory.c
===
--- linux-3.6.orig/drivers/base/memory.c2012-10-04 14:22:57.0 
+0900
+++ linux-3.6/drivers/base/memory.c 2012-10-04 14:45:46.653585860 +0900
@@ -70,6 +70,45 @@ void unregister_memory_isolate_notifier(
  }
  EXPORT_SYMBOL(unregister_memory_isolate_notifier);

+bool is_memblk_offline(unsigned long start, unsigned long size)


Don't use memblk. Usually memblk mean struct numa_meminfo for x86/numa.
Maybe memory_range_offlined() is better.

And, this function don't take struct memory_block, then this file may be no good
place.

And you need to write down function comment.



+{
+   struct memory_block *mem = NULL;
+   struct mem_section *section;
+   unsigned long start_pfn, end_pfn;
+   unsigned long pfn, section_nr;
+
+   start_pfn = PFN_DOWN(start);
+   end_pfn = PFN_UP(start + size);
+
+   for (pfn = start_pfn; pfn  end_pfn; pfn += PAGES_PER_SECTION) {
+   section_nr = pfn_to_section_nr(pfn);
+   if (!present_section_nr(section_nr))
+   continue;
+
+   section = __nr_to_section(section_nr);
+   /* same memblock? */
+   if (mem)
+   if ((section_nr= mem-start_section_nr)
+   (section_nr= mem-end_section_nr))
+   continue;
+
+   mem = find_memory_block_hinted(section, mem);
+   if (!mem)
+   continue;
+   if (mem-state == MEM_OFFLINE)
+   continue;
+
+   kobject_put(mem-dev.kobj);
+   return false;
+   }
+
+   if (mem)
+   kobject_put(mem-dev.kobj);
+
+   return true;
+}
+EXPORT_SYMBOL(is_memblk_offline);
+
  /*
   * register_memory - Setup a sysfs device for a memory block
   */
Index: linux-3.6/include/linux/memory.h
===
--- linux-3.6.orig/include/linux/memory.h   2012-10-02 18:00:22.0 
+0900
+++ linux-3.6/include/linux/memory.h2012-10-04 14:44:40.902581028 +0900
@@ -106,6 +106,10 @@ static inline int memory_isolate_notify(
  {
 return 0;
  }
+static inline bool is_memblk_offline(unsigned long start, unsigned long size)
+{
+   return false;
+}
  #else
  extern int register_memory_notifier(struct notifier_block *nb);
  extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigne
  extern struct memory_block *find_memory_block_hinted(struct mem_section *,
 struct memory_block *);
  extern struct memory_block *find_memory_block(struct mem_section *);
+extern bool is_memblk_offline(unsigned long start, unsigned long size);
  #define CONFIG_MEM_BLOCK_SIZE  (PAGES_PER_SECTIONPAGE_SHIFT)
  enum mem_add_context { BOOT, HOTPLUG };
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
Index: linux-3.6/mm/memory_hotplug.c
===
--- linux-3.6.orig/mm/memory_hotplug.c  2012-10-04 14:31:08.0 +0900
+++ linux-3.6/mm/memory_hotplug.c   2012-10-04 14:58:22.449687986 +0900
@@ -1045,8 +1045,21 @@ int 

Re: remoteproc open issues (was [RFC 1/4] remoteproc: Bugfix assign device address to carveout (noiommu)

2012-10-19 Thread Ohad Ben-Cohen
On Fri, Oct 19, 2012 at 3:45 PM, Sjur Brændeland sjurb...@gmail.com wrote:
 Has anyone started looking into any of the open issues mentioned above?

No - feel free to take a stab at it.

Thanks,
Ohad.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 00/18] slab accounting for memcg

2012-10-19 Thread Glauber Costa
Note: This is basically the same as v4. During this week, I made some changes
to this series in advance based on the feedback I had in the kmemcg-stack
last submission. Although the last series was not yet extensively reviewed,
I opted for sending this out so you guys have the most up2date code to review.
So please review this one instead.

This is a followup to the previous kmem series. I divided them logically
so it gets easier for reviewers. But I believe they are ready to be merged
together (although we can do a two-pass merge if people would prefer)

Throwaway git tree found at:

git://git.kernel.org/pub/scm/linux/kernel/git/glommer/memcg.git 
kmemcg-slab

v5:
* code reorganization, name changes, etc.
v4:
* no more messing with the cache name after destruction: aggregated figures
  are shown in /proc/slabinfo.
* memory.kmem.slabinfo file with memcg-specific cache information during its
  lifespan.
* full slub attribute propagation.
* reusing the standard workqueue mechanism.
* cache-side indexing, instead of memcg-side indexing. The memcg css_id serves
  as an index, and we don't need extra indexes for that.
* struct memcg_cache_params no longer bundled in struct kmem_cache: We now will
  have only a pointer in the struct, allowing memory consumption when disable to
  fall down ever further.

Patches need to be adjusted to cope with those changes, but other than that,
look the same - just a lot simpler.

I also put quite some effort to overcome my writing disability and get some
decent changelogs in place.

For a detailed explanation about this whole effort, please refer to my previous
post (https://lkml.org/lkml/2012/10/8/119)

Glauber Costa (18):
  move slabinfo processing to slab_common.c
  move print_slabinfo_header to slab_common.c
  sl[au]b: process slabinfo_show in common code
  slab: don't preemptively remove element from list in cache destroy
  slab/slub: struct memcg_params
  consider a memcg parameter in kmem_create_cache
  Allocate memory for memcg caches whenever a new memcg appears
  memcg: infrastructure to match an allocation to the right cache
  memcg: skip memcg kmem allocations in specified code regions
  sl[au]b: always get the cache from its page in kfree
  sl[au]b: Allocate objects from memcg cache
  memcg: destroy memcg caches
  memcg/sl[au]b Track all the memcg children of a kmem_cache.
  memcg/sl[au]b: shrink dead caches
  Aggregate memcg cache values in slabinfo
  slab: propagate tunables values
  slub: slub-specific propagation changes.
  Add slab-specific documentation about the kmem controller

 Documentation/cgroups/memory.txt |   7 +
 include/linux/memcontrol.h   |  86 ++
 include/linux/sched.h|   1 +
 include/linux/slab.h |  46 +++
 include/linux/slab_def.h |   3 +
 include/linux/slub_def.h |  19 +-
 init/Kconfig |   2 +-
 mm/memcontrol.c  | 613 +--
 mm/slab.c| 188 ++--
 mm/slab.h| 132 -
 mm/slab_common.c | 217 +-
 mm/slub.c| 195 -
 12 files changed, 1294 insertions(+), 215 deletions(-)

-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 10/18] sl[au]b: always get the cache from its page in kfree

2012-10-19 Thread Glauber Costa
struct page already have this information. If we start chaining
caches, this information will always be more trustworthy than
whatever is passed into the function

A parent pointer is added to the slub structure, so we can make sure
the freeing comes from either the right slab, or from its rightful
parent.

[ v3: added parent testing with VM_BUG_ON ]
[ v4: make it faster when kmemcg not in use ]

Signed-off-by: Glauber Costa glom...@parallels.com
CC: Christoph Lameter c...@linux.com
CC: Pekka Enberg penb...@cs.helsinki.fi
CC: Christoph Lameter c...@linux.com
CC: Pekka Enberg penb...@cs.helsinki.fi
CC: Michal Hocko mho...@suse.cz
CC: Kamezawa Hiroyuki kamezawa.hir...@jp.fujitsu.com
CC: Johannes Weiner han...@cmpxchg.org
CC: Suleiman Souhlal sulei...@google.com
CC: Tejun Heo t...@kernel.org
---
 include/linux/memcontrol.h |  4 
 mm/slab.c  | 17 -
 mm/slab.h  | 13 +
 mm/slub.c  | 14 --
 4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 92fc47a..f1ecb4f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -534,6 +534,10 @@ static inline void sock_release_memcg(struct sock *sk)
 {
 }
 
+static inline bool memcg_kmem_enabled(void)
+{
+   return false;
+}
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 {
diff --git a/mm/slab.c b/mm/slab.c
index 98b3460..6f22067 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3911,9 +3911,24 @@ EXPORT_SYMBOL(__kmalloc);
  * Free an object which was previously allocated from this
  * cache.
  */
-void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+void kmem_cache_free(struct kmem_cache *s, void *objp)
 {
unsigned long flags;
+   struct kmem_cache *cachep;
+
+   /*
+* When kmemcg is not being used, both assignments should return the
+* same value. but we don't want to pay the assignment price in that
+* case. If it is not compiled in, the compiler should be smart enough
+* to not do even the assignment. In that case, slab_equal_or_root
+* will also be a constant.
+*/
+   if (memcg_kmem_enabled()) {
+   cachep = virt_to_cache(objp);
+   VM_BUG_ON(!slab_equal_or_root(cachep, s));
+   } else
+   cachep = s;
+
 
local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep-object_size);
diff --git a/mm/slab.h b/mm/slab.h
index c35ecce..b9b5f1f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -108,6 +108,13 @@ static inline bool cache_match_memcg(struct kmem_cache 
*cachep,
return (is_root_cache(cachep)  !memcg) ||
(cachep-memcg_params-memcg == memcg);
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+   struct kmem_cache *p)
+{
+   return (p == s) ||
+   (s-memcg_params  (p == s-memcg_params-root_cache));
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -119,5 +126,11 @@ static inline bool cache_match_memcg(struct kmem_cache 
*cachep,
 {
return true;
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+   struct kmem_cache *p)
+{
+   return true;
+}
 #endif
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index 05aefe2..6e1a90f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2609,9 +2609,19 @@ redo:
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
-   struct page *page;
+   struct page *page = virt_to_head_page(x);
 
-   page = virt_to_head_page(x);
+   /*
+* When kmemcg is not being used, both assignments should return the
+* same value. but we don't want to pay the assignment price in that
+* case. If it is not compiled in, the compiler should be smart enough
+* to not do even the assignment. In that case, slab_equal_or_root
+* will also be a constant.
+*/
+   if (memcg_kmem_enabled()) {
+   VM_BUG_ON(!slab_equal_or_root(page-slab, s));
+   s = page-slab;
+   }
 
if (kmem_cache_debug(s)  page-slab != s) {
pr_err(kmem_cache_free: Wrong slab cache. %s but object
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 05/18] slab/slub: struct memcg_params

2012-10-19 Thread Glauber Costa
For the kmem slab controller, we need to record some extra
information in the kmem_cache structure.

Signed-off-by: Glauber Costa glom...@parallels.com
Signed-off-by: Suleiman Souhlal sulei...@google.com
CC: Christoph Lameter c...@linux.com
CC: Pekka Enberg penb...@cs.helsinki.fi
CC: Michal Hocko mho...@suse.cz
CC: Kamezawa Hiroyuki kamezawa.hir...@jp.fujitsu.com
CC: Johannes Weiner han...@cmpxchg.org
CC: Tejun Heo t...@kernel.org
---
 include/linux/slab.h | 25 +
 include/linux/slab_def.h |  3 +++
 include/linux/slub_def.h |  3 +++
 mm/slab.h| 13 +
 4 files changed, 44 insertions(+)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0dd2dfa..e4ea48a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -177,6 +177,31 @@ unsigned int kmem_cache_size(struct kmem_cache *);
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
 
+#include linux/workqueue.h
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * struct kmem_cache will hold a pointer to it, so the memory cost while
+ * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
+ * would otherwise be if that would be bundled in kmem_cache: we'll need an
+ * extra pointer chase. But the trade off clearly lays in favor of not
+ * penalizing non-users.
+ *
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system.
+ *
+ * Child caches will hold extra metadata needed for its operation. Fields are:
+ *
+ * @memcg: pointer to the memcg this cache belongs to
+ */
+struct memcg_cache_params {
+   bool is_root_cache;
+   union {
+   struct kmem_cache *memcg_caches[0];
+   struct mem_cgroup *memcg;
+   };
+};
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 36d7031..665afa4 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,6 +81,9 @@ struct kmem_cache {
 */
int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
+#ifdef CONFIG_MEMCG_KMEM
+   struct memcg_cache_params *memcg_params;
+#endif
 
 /* 6) per-cpu/per-node data, touched during every alloc/free */
/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448ad..961e72e 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,6 +101,9 @@ struct kmem_cache {
 #ifdef CONFIG_SYSFS
struct kobject kobj;/* For sysfs */
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+   struct memcg_cache_params *memcg_params;
+#endif
 
 #ifdef CONFIG_NUMA
/*
diff --git a/mm/slab.h b/mm/slab.h
index 66a62d3..5ee1851 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -92,4 +92,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo 
*sinfo);
 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
   size_t count, loff_t *ppos);
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+   return !s-memcg_params || s-memcg_params-is_root_cache;
+}
+#else
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+   return true;
+}
+
+#endif
 #endif
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


<    3   4   5   6   7   8   9   10   11   12   >