date:20160407

Move comment from caller to pci_need_to_release(), as we will have one new
caller for alt_size support.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 27 +++
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 1571245..b4eb37d 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -414,6 +414,20 @@ static unsigned long pci_fail_res_type_mask(struct 
list_head *fail_head)
 
 static bool pci_need_to_release(unsigned long mask, struct resource *res)
 {
+   /*
+* Separate three resource type checking if we need to release
+* assigned resource.
+*  1. if there is io port assign fail, will release assigned
+* io port.
+*  2. if there is pref mmio assign fail, release assigned
+* pref mmio.
+* if assigned pref mmio's parent is non-pref mmio and there
+* is non-pref mmio assign fail, will release that assigned
+* pref mmio.
+*  3. if there is non-pref mmio assign fail or pref mmio
+* assigned fail, will release assigned non-pref mmio.
+*/
+
if (res->flags & IORESOURCE_IO)
return !!(mask & IORESOURCE_IO);
 
@@ -470,19 +484,8 @@ static void __assign_resources_sorted(struct list_head 
*head,
 *  if could do that, could get out early.
 *  if could not do that, we still try to assign requested at first,
 *then try to reassign add_size for some resources.
-*
-* Separate three resource type checking if we need to release
-* assigned resource after requested + add_size try.
-*  1. if there is io port assign fail, will release assigned
-* io port.
-*  2. if there is pref mmio assign fail, release assigned
-* pref mmio.
-* if assigned pref mmio's parent is non-pref mmio and there
-* is non-pref mmio assign fail, will release that assigned
-* pref mmio.
-*  3. if there is non-pref mmio assign fail or pref mmio
-* assigned fail, will release assigned non-pref mmio.
 */
+
LIST_HEAD(save_head);
LIST_HEAD(local_fail_head);
struct pci_dev_resource *save_res;
-- 
1.8.4.5

[PATCH v11 35/60] PCI: Simplify res reference using in __assign_resources_sorted()

There are couples of dev_res->res reference, to make code more readable
use res instead of dev_res->res directly.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index b22eb5f..7865e44 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -457,6 +457,7 @@ static void __assign_resources_sorted(struct list_head 
*head,
struct pci_dev_resource *dev_res, *tmp_res;
unsigned long fail_type;
resource_size_t add_align;
+   struct resource *res;
 
/* Check if optional add_size is there */
if (!realloc_head || list_empty(realloc_head))
@@ -472,8 +473,8 @@ static void __assign_resources_sorted(struct list_head 
*head,
 
/* Update res in head list with add_size in realloc_head list */
list_for_each_entry(dev_res, head, list) {
-   dev_res->res->end += get_res_add_size(realloc_head,
-   dev_res->res);
+   res = dev_res->res;
+   res->end += get_res_add_size(realloc_head, res);
 
/*
 * There are two kinds of additional resources in the list:
@@ -482,16 +483,16 @@ static void __assign_resources_sorted(struct list_head 
*head,
 * 2. resource with IORESOURCE_SIZEALIGN
 *update size above already change alignment.
 */
-   if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+   if (!(res->flags & IORESOURCE_STARTALIGN))
continue;
 
-   add_align = get_res_add_align(realloc_head, dev_res->res);
+   add_align = get_res_add_align(realloc_head, res);
 
if (add_align) {
-   resource_size_t r_size = resource_size(dev_res->res);
+   resource_size_t r_size = resource_size(res);
 
-   dev_res->res->start = add_align;
-   dev_res->res->end = add_align + r_size - 1;
+   res->start = add_align;
+   res->end = add_align + r_size - 1;
}
}
 
@@ -513,21 +514,21 @@ static void __assign_resources_sorted(struct list_head 
*head,
/* check failed type */
fail_type = pci_fail_res_type_mask(_fail_head);
/* remove not need to be released assigned res from head list etc */
-   list_for_each_entry_safe(dev_res, tmp_res, head, list)
-   if (dev_res->res->parent &&
-   !pci_need_to_release(fail_type, dev_res->res)) {
+   list_for_each_entry_safe(dev_res, tmp_res, head, list) {
+   res = dev_res->res;
+   if (res->parent && !pci_need_to_release(fail_type, res)) {
/* remove it from realloc_head list */
-   remove_from_list(realloc_head, dev_res->res);
-   remove_from_list(_head, dev_res->res);
+   remove_from_list(realloc_head, res);
+   remove_from_list(_head, res);
list_del(_res->list);
kfree(dev_res);
}
+   }
 
free_list(_fail_head);
/* Release assigned resource */
list_for_each_entry(dev_res, head, list) {
-   struct resource *res = dev_res->res;
-
+   res = dev_res->res;
if (res->parent) {
dev_printk(KERN_DEBUG, _res->dev->dev,
   "BAR %d: released %pR\n",
@@ -538,8 +539,7 @@ static void __assign_resources_sorted(struct list_head 
*head,
}
/* Restore start/end/flags from saved list */
list_for_each_entry(save_res, _head, list) {
-   struct resource *res = save_res->res;
-
+   res = save_res->res;
res->start = save_res->start;
res->end = save_res->end;
res->flags = save_res->flags;
-- 
1.8.4.5

[PATCH v11 51/60] PCI: Allow bridge optional only io port resource required size to be 0

When there is no child device under the non hotplug bridge,
We can use 0 for required size, and do not use old size as required size.

That will save some io port range for other bridges, as BIOS could do
some partial assign, and we want to use those not used io port range.

When there is child device, size will not be 0.
when the bridge supports hotplug, min_size will not be 0.
So they will still honor the old size as required size.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index b071035..28dfd8e 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1229,8 +1229,9 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
 
size = size_aligned_for_isa(size);
size += size1;
-   size0 = calculate_size(size, min_size,
-   resource_size(b_res), min_align);
+   if (size || min_size)
+   size0 = calculate_size(size, min_size,
+   resource_size(b_res), min_align);
sum_add_size = size_aligned_for_isa(sum_add_size);
sum_add_size += sum_add_size1;
if (sum_add_size < min_sum_size)
@@ -1246,7 +1247,7 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
return;
}
 
-   b_res->start = min_align;
+   b_res->start = size0 ? min_align : 0;
b_res->end = b_res->start + size0 - 1;
b_res->flags |= IORESOURCE_STARTALIGN;
if (size1 > size0 && realloc_head) {
-- 
1.8.4.5

[PATCH v11 55/60] PCI, x86: Allocate from high in available window for MMIO

Current code just use aligned start from avialable window, that could waste
big alignment from start.

We can align to the end from avialable window, so will save
start with big align to others: like second try for pref mmio
after first try already have non-pref assigned.

pci tree:
-[:00]-+-00.0
   +-1c.0-[01-10]--+-00.0-[02-10]--+-01.0-[03]00.0  PLX Technology, 
Inc. Device 87b1
   |   |   
+-02.0-[04-09]--+-00.0-[05-09]--+-01.0-[06]00.0  PLX Technology, Inc. 
Device 87b1
   |   |   |   |   
+-02.0-[07]00.0  Broadcom Corporation Device 8650
   |   |   |   |   
+-03.0-[08]--
   |   |   |   |   
\-04.0-[09]00.0  Altera Corporation Device 0201
   |   |   |   +-00.1  PLX 
Technology, Inc. Device 87d0
   |   |   |   +-00.2  PLX 
Technology, Inc. Device 87d0
   |   |   |   +-00.3  PLX 
Technology, Inc. Device 87d0
   |   |   |   \-00.4  PLX 
Technology, Inc. Device 87d0
   |   |   
+-03.0-[0a-0f]--+-00.0-[0b-0f]--+-01.0-[0c]00.0  PLX Technology, Inc. 
Device 87b1
   |   |   |   |   
+-02.0-[0d]00.0  Broadcom Corporation Device 8650
   |   |   |   |   
+-03.0-[0e]--
   |   |   |   |   
\-04.0-[0f]00.0  Altera Corporation Device 0201
   |   |   |   +-00.1  PLX 
Technology, Inc. Device 87d0
   |   |   |   +-00.2  PLX 
Technology, Inc. Device 87d0
   |   |   |   +-00.3  PLX 
Technology, Inc. Device 87d0
   |   |   |   \-00.4  PLX 
Technology, Inc. Device 87d0
   |   |   \-04.0-[10]--
   |   +-00.1  PLX Technology, Inc. Device 87d0
   |   +-00.2  PLX Technology, Inc. Device 87d0
   |   +-00.3  PLX Technology, Inc. Device 87d0
   |   \-00.4  PLX Technology, Inc. Device 87d0
   +-1c.3-[11]00.0

hotplug device under :02:03.0

before the patch:

pci :0a:00.0: BAR 9: no space for [mem size 0x0300 64bit pref]
pci :0a:00.0: BAR 9: failed to assign [mem size 0x0300 64bit pref]
pci :0a:00.0: BAR 8: assigned [mem 0xb000-0xb01f]  **
pci :0a:00.0: BAR 0: assigned [mem 0xb020-0xb023]
pci :0a:00.1: BAR 0: assigned [mem 0xb024-0xb0241fff]
pci :0a:00.2: BAR 0: assigned [mem 0xb0242000-0xb0243fff]
pci :0a:00.3: BAR 0: assigned [mem 0xb0244000-0xb0245fff]
pci :0a:00.4: BAR 0: assigned [mem 0xb0246000-0xb0247fff]
pci :0b:04.0: BAR 9: no space for [mem size 0x0300 64bit pref]
pci :0b:04.0: BAR 9: failed to assign [mem size 0x0300 64bit pref]
pci :0b:01.0: BAR 8: assigned [mem 0xb000-0xb00f]
pci :0b:02.0: BAR 8: assigned [mem 0xb010-0xb01f]
pci :0c:00.0: BAR 0: assigned [mem 0xb000-0xb003]
pci :0b:01.0: PCI bridge to [bus 0c]
pci :0b:01.0:   bridge window [mem 0xb000-0xb00f]
pci :0d:00.0: BAR 0: assigned [mem 0xb010-0xb013 64bit]
pci :0b:02.0: PCI bridge to [bus 0d]
pci :0b:02.0:   bridge window [mem 0xb010-0xb01f]
pci :0b:03.0: PCI bridge to [bus 0e]
pci :0f:00.0: BAR 0: no space for [mem size 0x0200 64bit pref]
pci :0f:00.0: BAR 0: failed to assign [mem size 0x0200 64bit pref]
pci :0f:00.0: BAR 2: no space for [mem size 0x0001 64bit pref]
pci :0f:00.0: BAR 2: failed to assign [mem size 0x0001 64bit pref]
pci :0b:04.0: PCI bridge to [bus 0f]
pci :0a:00.0: PCI bridge to [bus 0b-0f]
pci :0a:00.0:   bridge window [mem 0xb000-0xb01f]
pcieport :02:03.0: PCI bridge to [bus 0a-0f]
pcieport :02:03.0:   bridge window [io  0x2000-0x2fff]
pcieport :02:03.0:   bridge window [mem 0xb000-0xb24f]
pcieport :02:03.0:   bridge window [mem 0x8020-0x803f 64bit pref]
PCI: No. 2 try to assign unassigned res
pcieport :02:03.0: resource 9 [mem 0x8020-0x803f 64bit pref] 
released
pcieport :02:03.0: PCI bridge to [bus 0a-0f]
pcieport :02:03.0: BAR 9: no space for [mem size 0x0300 64bit pref]
pcieport :02:03.0: BAR 9: failed to assign [mem size 0x0300 64bit pref]
pcieport :02:03.0: BAR 9: no space for [mem size 0x0210 64bit pref]
pcieport :02:03.0: BAR 9: failed to assign [mem size 0x0210 64bit pref]
pci :0a:00.0: BAR 9: no space for [mem size 0x0300

[PATCH v11 16/60] PCI: Restore pref MMIO allocation logic for host bridge without mmio64

>From 5b2854155 (PCI: Restrict 64-bit prefetchable bridge windows to 64-bit
resources), we change the logic for pref mmio allocation:
When bridge pref support mmio64, we will only put children pref
that support mmio64 into it, and will put children pref mmio32
into bridge's non-pref mmio32.

That could leave bridge pref bar not used when that pref bar is mmio64,
and children res only has mmio32.
Also could have allocation failure when non-pref mmio32 is not big
enough space for those children pref mmio32.

That is not rational when the host bridge does not have 64bit mmio
above 4g at all.

The patch restore to old logic:
when host bridge does not have has_mem64, put children pref mmio64 and
pref mmio32 all under bridges pref bars.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 drivers/pci/bus.c   |  4 +++-
 drivers/pci/setup-bus.c | 13 +
 drivers/pci/setup-res.c |  9 ++---
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 6c9f546..200fdac 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -204,8 +204,10 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct 
resource *res,
 {
 #ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
int rc;
+   unsigned long mmio64 = pci_find_host_bridge(bus)->has_mem64 ?
+   IORESOURCE_MEM_64 : 0;
 
-   if (res->flags & IORESOURCE_MEM_64) {
+   if (res->flags & mmio64) {
rc = pci_bus_alloc_from_region(bus, res, size, align, min,
   type_mask, alignf, alignf_data,
   _high);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 9404032..0845a57 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1311,7 +1311,8 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
b_res = >self->resource[PCI_BRIDGE_RESOURCES];
mask = IORESOURCE_MEM;
prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-   if (b_res[2].flags & IORESOURCE_MEM_64) {
+   if ((b_res[2].flags & IORESOURCE_MEM_64) &&
+   pci_find_host_bridge(bus)->has_mem64) {
prefmask |= IORESOURCE_MEM_64;
ret = pbus_size_mem(bus, prefmask, prefmask,
  prefmask, prefmask,
@@ -1513,17 +1514,21 @@ static void pci_bridge_release_resources(struct pci_bus 
*bus,
 *io port.
 * 2. if there is non pref mmio assign fail, release bridge
 *nonpref mmio.
-* 3. if there is 64bit pref mmio assign fail, and bridge pref
+* 3. if there is pref mmio assign fail, and host bridge does
+*have 64bit mmio, release bridge pref mmio.
+* 4. if there is 64bit pref mmio assign fail, and bridge pref
 *is 64bit, release bridge pref mmio.
-* 4. if there is pref mmio assign fail, and bridge pref is
+* 5. if there is pref mmio assign fail, and bridge pref is
 *32bit mmio, release bridge pref mmio
-* 5. if there is pref mmio assign fail, and bridge pref is not
+* 6. if there is pref mmio assign fail, and bridge pref is not
 *assigned, release bridge nonpref mmio.
 */
if (type & IORESOURCE_IO)
idx = 0;
else if (!(type & IORESOURCE_PREFETCH))
idx = 1;
+   else if (!pci_find_host_bridge(bus)->has_mem64)
+   idx = 2;
else if ((type & IORESOURCE_MEM_64) &&
 (b_res[2].flags & IORESOURCE_MEM_64))
idx = 2;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index f741fed..59271ee 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -212,6 +212,8 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
struct resource *res = dev->resource + resno;
resource_size_t min;
int ret;
+   unsigned long mmio64 = pci_find_host_bridge(bus)->has_mem64 ?
+   IORESOURCE_MEM_64 : 0;
 
min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
@@ -223,7 +225,7 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
 * things differently than they were sized, not everything will fit.
 */
ret = pci_bus_alloc_resource(bus, res, size, align, min,
-IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
+IORESOURCE_PREFETCH | mmio64,
 pcibios_align_resource, dev);
if (ret == 0)
return 0;
@@ -232,7 +234,8 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
 * If the prefetchable

[PATCH v11 16/60] PCI: Restore pref MMIO allocation logic for host bridge without mmio64

>From 5b2854155 (PCI: Restrict 64-bit prefetchable bridge windows to 64-bit
resources), we change the logic for pref mmio allocation:
When bridge pref support mmio64, we will only put children pref
that support mmio64 into it, and will put children pref mmio32
into bridge's non-pref mmio32.

That could leave bridge pref bar not used when that pref bar is mmio64,
and children res only has mmio32.
Also could have allocation failure when non-pref mmio32 is not big
enough space for those children pref mmio32.

That is not rational when the host bridge does not have 64bit mmio
above 4g at all.

The patch restore to old logic:
when host bridge does not have has_mem64, put children pref mmio64 and
pref mmio32 all under bridges pref bars.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 drivers/pci/bus.c   |  4 +++-
 drivers/pci/setup-bus.c | 13 +
 drivers/pci/setup-res.c |  9 ++---
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 6c9f546..200fdac 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -204,8 +204,10 @@ int pci_bus_alloc_resource(struct pci_bus *bus, struct 
resource *res,
 {
 #ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
int rc;
+   unsigned long mmio64 = pci_find_host_bridge(bus)->has_mem64 ?
+   IORESOURCE_MEM_64 : 0;
 
-   if (res->flags & IORESOURCE_MEM_64) {
+   if (res->flags & mmio64) {
rc = pci_bus_alloc_from_region(bus, res, size, align, min,
   type_mask, alignf, alignf_data,
   _high);
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 9404032..0845a57 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1311,7 +1311,8 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
b_res = >self->resource[PCI_BRIDGE_RESOURCES];
mask = IORESOURCE_MEM;
prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-   if (b_res[2].flags & IORESOURCE_MEM_64) {
+   if ((b_res[2].flags & IORESOURCE_MEM_64) &&
+   pci_find_host_bridge(bus)->has_mem64) {
prefmask |= IORESOURCE_MEM_64;
ret = pbus_size_mem(bus, prefmask, prefmask,
  prefmask, prefmask,
@@ -1513,17 +1514,21 @@ static void pci_bridge_release_resources(struct pci_bus 
*bus,
 *io port.
 * 2. if there is non pref mmio assign fail, release bridge
 *nonpref mmio.
-* 3. if there is 64bit pref mmio assign fail, and bridge pref
+* 3. if there is pref mmio assign fail, and host bridge does
+*have 64bit mmio, release bridge pref mmio.
+* 4. if there is 64bit pref mmio assign fail, and bridge pref
 *is 64bit, release bridge pref mmio.
-* 4. if there is pref mmio assign fail, and bridge pref is
+* 5. if there is pref mmio assign fail, and bridge pref is
 *32bit mmio, release bridge pref mmio
-* 5. if there is pref mmio assign fail, and bridge pref is not
+* 6. if there is pref mmio assign fail, and bridge pref is not
 *assigned, release bridge nonpref mmio.
 */
if (type & IORESOURCE_IO)
idx = 0;
else if (!(type & IORESOURCE_PREFETCH))
idx = 1;
+   else if (!pci_find_host_bridge(bus)->has_mem64)
+   idx = 2;
else if ((type & IORESOURCE_MEM_64) &&
 (b_res[2].flags & IORESOURCE_MEM_64))
idx = 2;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index f741fed..59271ee 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -212,6 +212,8 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
struct resource *res = dev->resource + resno;
resource_size_t min;
int ret;
+   unsigned long mmio64 = pci_find_host_bridge(bus)->has_mem64 ?
+   IORESOURCE_MEM_64 : 0;
 
min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
@@ -223,7 +225,7 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
 * things differently than they were sized, not everything will fit.
 */
ret = pci_bus_alloc_resource(bus, res, size, align, min,
-IORESOURCE_PREFETCH | IORESOURCE_MEM_64,
+IORESOURCE_PREFETCH | mmio64,
 pcibios_align_resource, dev);
if (ret == 0)
return 0;
@@ -232,7 +234,8 @@ static int __pci_assign_resource(struct pci_bus *bus, 
struct pci_dev *dev,
 * If the prefetchable window is only 32 bits wide, we can put

[PATCH v11 60/60] PCI: Only try to assign io port only for root bus that support it

The PCI subsystem always assumes that I/O is supported on root bus and
tries to assign an I/O window to each child bus even if that is not the
case.

The use cases is on Intel 8 socket system that have 8 root buses,
last two root buses would not have io port resources from _CRS.

Check if root bus supports I/O, and later during sizing and
assigning, check that flags and skip those resources.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/probe.c | 6 ++
 drivers/pci/setup-bus.c | 9 +
 include/linux/pci.h | 1 +
 3 files changed, 16 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 52ddc45..6f0488c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -345,6 +345,9 @@ static void pci_read_bridge_io(struct pci_bus *child)
struct pci_bus_region region;
struct resource *res;
 
+   if (!pci_find_host_bridge(child)->has_ioport)
+   return;
+
io_mask = PCI_IO_RANGE_MASK;
io_granularity = 0x1000;
if (dev->io_window_1k) {
@@ -2231,6 +2234,9 @@ struct pci_bus *pci_create_root_bus(struct device 
*parent, int bus,
bus_addr[0] = '\0';
dev_info(>dev, "root bus resource %pR%s\n", res, bus_addr);
 
+   if (resource_type(res) == IORESOURCE_IO)
+   bridge->has_ioport = 1;
+
if (resource_type(res) == IORESOURCE_MEM) {
if ((res->end - offset) > 0x)
bridge->has_mem64 = 1;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d07ba87..076b5db 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -225,6 +225,10 @@ static void pdev_assign_resources_prepare(struct pci_dev 
*dev,
if (resource_disabled(r) || r->parent)
continue;
 
+   if ((r->flags & IORESOURCE_IO) &&
+   !pci_find_host_bridge(dev->bus)->has_ioport)
+   continue;
+
r_align = __pci_resource_alignment(dev, r, realloc_head);
if (!r_align) {
dev_warn(>dev, "BAR %d: %pR has bogus alignment\n",
@@ -1188,6 +1192,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
min_size = 0;
}
 
+   if (!pci_find_host_bridge(bus)->has_ioport) {
+   b_res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
+   return;
+   }
+
min_align = window_alignment(bus, IORESOURCE_IO);
list_for_each_entry(dev, >devices, bus_list) {
int i;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 41d06ce..463094a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -409,6 +409,7 @@ struct pci_host_bridge {
void *release_data;
unsigned int ignore_reset_delay:1;  /* for entire hierarchy */
unsigned int has_mem64:1;
+   unsigned int has_ioport:1;
/* Resource alignment requirements */
resource_size_t (*align_resource)(struct pci_dev *dev,
const struct resource *res,
-- 
1.8.4.5

[PATCH v11 21/60] PCI: Treat optional as required in first try for bridge rescan

For rescan bridge/bus that children are removed before, we should treat
optional as required just like root bus the boot time in 19aa7ee432ce
(PCI: make re-allocation try harder by reassigning ranges higher in
the heirarchy).

The reason: allocate required and expand to optional path do not
put failed resource to fail list, so will lose required info before
next try.

So we are using following way:
1. First and following try before last try:
   We don't keep realloc list so treat every optional as required.
   allocate for required+optional and put failed in the fail list.
   then size info (include must and optonal separatedly) will be kept
   for next try.
2. last try:
   a: try to allocate required+optional to see if all get allocated.
   b: try to allocate required then expand to optional.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7d58f3f..3dc4ac9 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1845,25 +1845,34 @@ void __init pci_assign_unassigned_resources(void)
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
 {
struct pci_bus *parent = bridge->subordinate;
-   LIST_HEAD(add_list); /* list of resources that
+   LIST_HEAD(realloc_head); /* list of resources that
want additional resources */
+   struct list_head *add_list = NULL;
int tried_times = 0;
LIST_HEAD(fail_head);
struct pci_dev_resource *fail_res;
int retval;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+   int pci_try_num = 2;
 
 again:
-   __pci_bus_size_bridges(parent, _list);
-   __pci_bridge_assign_resources(bridge, _list, _head);
-   pci_bus_check_realloc(_list);
+   /*
+* last try will use add_list, otherwise will try good to have as
+* must have, so can realloc parent bridge resource
+*/
+   if (tried_times + 1 == pci_try_num)
+   add_list = _head;
+   __pci_bus_size_bridges(parent, add_list);
+   __pci_bridge_assign_resources(bridge, add_list, _head);
+   if (add_list)
+   pci_bus_check_realloc(add_list);
tried_times++;
 
if (list_empty(_head))
goto enable_all;
 
-   if (tried_times >= 2) {
+   if (tried_times >= pci_try_num) {
/* still fail, don't need to try more */
free_list(_head);
goto enable_all;
-- 
1.8.4.5

[PATCH v11 60/60] PCI: Only try to assign io port only for root bus that support it

The PCI subsystem always assumes that I/O is supported on root bus and
tries to assign an I/O window to each child bus even if that is not the
case.

The use cases is on Intel 8 socket system that have 8 root buses,
last two root buses would not have io port resources from _CRS.

Check if root bus supports I/O, and later during sizing and
assigning, check that flags and skip those resources.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/probe.c | 6 ++
 drivers/pci/setup-bus.c | 9 +
 include/linux/pci.h | 1 +
 3 files changed, 16 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 52ddc45..6f0488c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -345,6 +345,9 @@ static void pci_read_bridge_io(struct pci_bus *child)
struct pci_bus_region region;
struct resource *res;
 
+   if (!pci_find_host_bridge(child)->has_ioport)
+   return;
+
io_mask = PCI_IO_RANGE_MASK;
io_granularity = 0x1000;
if (dev->io_window_1k) {
@@ -2231,6 +2234,9 @@ struct pci_bus *pci_create_root_bus(struct device 
*parent, int bus,
bus_addr[0] = '\0';
dev_info(>dev, "root bus resource %pR%s\n", res, bus_addr);
 
+   if (resource_type(res) == IORESOURCE_IO)
+   bridge->has_ioport = 1;
+
if (resource_type(res) == IORESOURCE_MEM) {
if ((res->end - offset) > 0x)
bridge->has_mem64 = 1;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d07ba87..076b5db 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -225,6 +225,10 @@ static void pdev_assign_resources_prepare(struct pci_dev 
*dev,
if (resource_disabled(r) || r->parent)
continue;
 
+   if ((r->flags & IORESOURCE_IO) &&
+   !pci_find_host_bridge(dev->bus)->has_ioport)
+   continue;
+
r_align = __pci_resource_alignment(dev, r, realloc_head);
if (!r_align) {
dev_warn(>dev, "BAR %d: %pR has bogus alignment\n",
@@ -1188,6 +1192,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
min_size = 0;
}
 
+   if (!pci_find_host_bridge(bus)->has_ioport) {
+   b_res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
+   return;
+   }
+
min_align = window_alignment(bus, IORESOURCE_IO);
list_for_each_entry(dev, >devices, bus_list) {
int i;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 41d06ce..463094a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -409,6 +409,7 @@ struct pci_host_bridge {
void *release_data;
unsigned int ignore_reset_delay:1;  /* for entire hierarchy */
unsigned int has_mem64:1;
+   unsigned int has_ioport:1;
/* Resource alignment requirements */
resource_size_t (*align_resource)(struct pci_dev *dev,
const struct resource *res,
-- 
1.8.4.5

[PATCH v11 21/60] PCI: Treat optional as required in first try for bridge rescan

For rescan bridge/bus that children are removed before, we should treat
optional as required just like root bus the boot time in 19aa7ee432ce
(PCI: make re-allocation try harder by reassigning ranges higher in
the heirarchy).

The reason: allocate required and expand to optional path do not
put failed resource to fail list, so will lose required info before
next try.

So we are using following way:
1. First and following try before last try:
   We don't keep realloc list so treat every optional as required.
   allocate for required+optional and put failed in the fail list.
   then size info (include must and optonal separatedly) will be kept
   for next try.
2. last try:
   a: try to allocate required+optional to see if all get allocated.
   b: try to allocate required then expand to optional.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7d58f3f..3dc4ac9 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1845,25 +1845,34 @@ void __init pci_assign_unassigned_resources(void)
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
 {
struct pci_bus *parent = bridge->subordinate;
-   LIST_HEAD(add_list); /* list of resources that
+   LIST_HEAD(realloc_head); /* list of resources that
want additional resources */
+   struct list_head *add_list = NULL;
int tried_times = 0;
LIST_HEAD(fail_head);
struct pci_dev_resource *fail_res;
int retval;
unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
  IORESOURCE_PREFETCH | IORESOURCE_MEM_64;
+   int pci_try_num = 2;
 
 again:
-   __pci_bus_size_bridges(parent, _list);
-   __pci_bridge_assign_resources(bridge, _list, _head);
-   pci_bus_check_realloc(_list);
+   /*
+* last try will use add_list, otherwise will try good to have as
+* must have, so can realloc parent bridge resource
+*/
+   if (tried_times + 1 == pci_try_num)
+   add_list = _head;
+   __pci_bus_size_bridges(parent, add_list);
+   __pci_bridge_assign_resources(bridge, add_list, _head);
+   if (add_list)
+   pci_bus_check_realloc(add_list);
tried_times++;
 
if (list_empty(_head))
goto enable_all;
 
-   if (tried_times >= 2) {
+   if (tried_times >= pci_try_num) {
/* still fail, don't need to try more */
free_list(_head);
goto enable_all;
-- 
1.8.4.5

[PATCH v11 47/60] PCI: Don't add too much optional size for hotplug bridge io

Same as patch for MMIO (PCI: Don't add too much optional size for hotplug
bridge MMIO), and this one is for io port.

It will compare required+optional with min_sum_size to get smaller
optional size.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d2712d8..11a4c1d 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1176,7 +1176,6 @@ static resource_size_t window_alignment(struct pci_bus 
*bus,
  *
  * @bus : the bus
  * @min_size : the minimum io window that must to be allocated
- * @add_size : additional optional io window
  * @realloc_head : track the additional io window on this list
  *
  * Sizing the IO windows of the PCI-PCI bridge is trivial,
@@ -1185,9 +1184,11 @@ static resource_size_t window_alignment(struct pci_bus 
*bus,
  * We must be careful with the ISA aliasing though.
  */
 static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
-   resource_size_t add_size, struct list_head *realloc_head)
+struct list_head *realloc_head)
 {
struct pci_dev *dev;
+   resource_size_t min_sum_size = 0;
+   resource_size_t sum_add_size;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
IORESOURCE_IO);
resource_size_t size = 0, size0 = 0, size1 = 0;
@@ -1197,6 +1198,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
if (!b_res)
return;
 
+   if (realloc_head) {
+   min_sum_size = min_size;
+   min_size = 0;
+   }
+
min_align = window_alignment(bus, IORESOURCE_IO);
list_for_each_entry(dev, >devices, bus_list) {
int i;
@@ -1226,10 +1232,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
 
size0 = calculate_iosize(size, min_size, size1,
resource_size(b_res), min_align);
-   if (children_add_size > add_size)
-   add_size = children_add_size;
-   size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
-   calculate_iosize(size, min_size, add_size + size1,
+   sum_add_size = children_add_size + size + size1;
+   if (sum_add_size < min_sum_size)
+   sum_add_size = min_sum_size;
+   size1 = !realloc_head ? size0 :
+   calculate_iosize(size, min_size, sum_add_size - size,
resource_size(b_res), min_align);
if (!size0 && !size1) {
if (b_res->start || b_res->end)
@@ -1757,7 +1764,7 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
 {
struct pci_dev *dev;
unsigned long mask, prefmask, type2 = 0, type3 = 0;
-   resource_size_t min_mem_size = 0, additional_io_size = 0;
+   resource_size_t min_mem_size = 0, min_io_size = 0;
struct resource *b_res;
int ret;
 
@@ -1793,13 +1800,12 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
 
case PCI_CLASS_BRIDGE_PCI:
if (bus->self->is_hotplug_bridge) {
-   additional_io_size  = pci_hotplug_io_size;
+   min_io_size  = pci_hotplug_io_size;
min_mem_size = pci_hotplug_mem_size;
}
/* Fall through */
default:
-   pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
-additional_io_size, realloc_head);
+   pbus_size_io(bus, min_io_size, realloc_head);
 
/*
 * If there's a 64-bit prefetchable MMIO window, compute
-- 
1.8.4.5

Re: [PATCH] mtd: nand: s3c2410: fix bug in s3c2410_nand_correct_data()

Hi Zeng,

On Fri,  8 Apr 2016 00:48:17 +0800
zengzhao...@163.com wrote:

> From: Zeng Zhaoxiu 
> 
> If there is only one bit difference in the ECC, the function should return 1.
> The result of "diff0 & ~(1< actually returns -1.
> 
> Here, we can use the simple expression "(diff0 & (diff0 - 1)) == 0" to 
> determine
> whether the diff0 has only one 1-bit.

Missing Signed-off-by here.

> ---
>  drivers/mtd/nand/s3c2410.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
> index 9c9397b..c9698cf 100644
> --- a/drivers/mtd/nand/s3c2410.c
> +++ b/drivers/mtd/nand/s3c2410.c
> @@ -542,7 +542,7 @@ static int s3c2410_nand_correct_data(struct mtd_info 
> *mtd, u_char *dat,
>   diff0 |= (diff1 << 8);
>   diff0 |= (diff2 << 16);
>  
> - if ((diff0 & ~(1< + if ((diff0 & (diff0 - 1)) == 0)

Or just

if (hweight_long((unsigned long)diff0) == 1)

which is doing exactly what the comment says.

BTW, I don't understand why the current code is wrong? To me, it seems
it's correctly detecting the case where only a single bit is different.
What are you trying to fix exactly?

Best Regards,

Boris

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

[PATCH v11 10/60] powerpc/PCI: Add IORESOURCE_MEM_64 for 64-bit resource in OF parsing

For device resource PREF bit setting under bridge 64-bit pref resource,
we need to make sure only set PREF for 64bit resource.

This patch set IORESOUCE_MEM_64 for 64bit resource during OF device resource
flags parsing.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=96261
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96241
Signed-off-by: Yinghai Lu 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Gavin Shan 
Cc: Yijing Wang 
Cc: Anton Blanchard 
Cc: linuxppc-...@lists.ozlabs.org
---
 arch/powerpc/kernel/pci_of_scan.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci_of_scan.c 
b/arch/powerpc/kernel/pci_of_scan.c
index 719f225..476b8ac5 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -44,8 +44,10 @@ static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
 
if (addr0 & 0x0200) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
-   flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+   if (addr0 & 0x0100)
+   flags |= IORESOURCE_MEM_64
+| PCI_BASE_ADDRESS_MEM_TYPE_64;
if (addr0 & 0x4000)
flags |= IORESOURCE_PREFETCH
 | PCI_BASE_ADDRESS_MEM_PREFETCH;
-- 
1.8.4.5

[PATCH v11 47/60] PCI: Don't add too much optional size for hotplug bridge io

Same as patch for MMIO (PCI: Don't add too much optional size for hotplug
bridge MMIO), and this one is for io port.

It will compare required+optional with min_sum_size to get smaller
optional size.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d2712d8..11a4c1d 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1176,7 +1176,6 @@ static resource_size_t window_alignment(struct pci_bus 
*bus,
  *
  * @bus : the bus
  * @min_size : the minimum io window that must to be allocated
- * @add_size : additional optional io window
  * @realloc_head : track the additional io window on this list
  *
  * Sizing the IO windows of the PCI-PCI bridge is trivial,
@@ -1185,9 +1184,11 @@ static resource_size_t window_alignment(struct pci_bus 
*bus,
  * We must be careful with the ISA aliasing though.
  */
 static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
-   resource_size_t add_size, struct list_head *realloc_head)
+struct list_head *realloc_head)
 {
struct pci_dev *dev;
+   resource_size_t min_sum_size = 0;
+   resource_size_t sum_add_size;
struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO,
IORESOURCE_IO);
resource_size_t size = 0, size0 = 0, size1 = 0;
@@ -1197,6 +1198,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
if (!b_res)
return;
 
+   if (realloc_head) {
+   min_sum_size = min_size;
+   min_size = 0;
+   }
+
min_align = window_alignment(bus, IORESOURCE_IO);
list_for_each_entry(dev, >devices, bus_list) {
int i;
@@ -1226,10 +1232,11 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
 
size0 = calculate_iosize(size, min_size, size1,
resource_size(b_res), min_align);
-   if (children_add_size > add_size)
-   add_size = children_add_size;
-   size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
-   calculate_iosize(size, min_size, add_size + size1,
+   sum_add_size = children_add_size + size + size1;
+   if (sum_add_size < min_sum_size)
+   sum_add_size = min_sum_size;
+   size1 = !realloc_head ? size0 :
+   calculate_iosize(size, min_size, sum_add_size - size,
resource_size(b_res), min_align);
if (!size0 && !size1) {
if (b_res->start || b_res->end)
@@ -1757,7 +1764,7 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
 {
struct pci_dev *dev;
unsigned long mask, prefmask, type2 = 0, type3 = 0;
-   resource_size_t min_mem_size = 0, additional_io_size = 0;
+   resource_size_t min_mem_size = 0, min_io_size = 0;
struct resource *b_res;
int ret;
 
@@ -1793,13 +1800,12 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct 
list_head *realloc_head)
 
case PCI_CLASS_BRIDGE_PCI:
if (bus->self->is_hotplug_bridge) {
-   additional_io_size  = pci_hotplug_io_size;
+   min_io_size  = pci_hotplug_io_size;
min_mem_size = pci_hotplug_mem_size;
}
/* Fall through */
default:
-   pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
-additional_io_size, realloc_head);
+   pbus_size_io(bus, min_io_size, realloc_head);
 
/*
 * If there's a 64-bit prefetchable MMIO window, compute
-- 
1.8.4.5

Re: [PATCH] mtd: nand: s3c2410: fix bug in s3c2410_nand_correct_data()

Hi Zeng,

On Fri,  8 Apr 2016 00:48:17 +0800
zengzhao...@163.com wrote:

> From: Zeng Zhaoxiu 
> 
> If there is only one bit difference in the ECC, the function should return 1.
> The result of "diff0 & ~(1< actually returns -1.
> 
> Here, we can use the simple expression "(diff0 & (diff0 - 1)) == 0" to 
> determine
> whether the diff0 has only one 1-bit.

Missing Signed-off-by here.

> ---
>  drivers/mtd/nand/s3c2410.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
> index 9c9397b..c9698cf 100644
> --- a/drivers/mtd/nand/s3c2410.c
> +++ b/drivers/mtd/nand/s3c2410.c
> @@ -542,7 +542,7 @@ static int s3c2410_nand_correct_data(struct mtd_info 
> *mtd, u_char *dat,
>   diff0 |= (diff1 << 8);
>   diff0 |= (diff2 << 16);
>  
> - if ((diff0 & ~(1< + if ((diff0 & (diff0 - 1)) == 0)

Or just

if (hweight_long((unsigned long)diff0) == 1)

which is doing exactly what the comment says.

BTW, I don't understand why the current code is wrong? To me, it seems
it's correctly detecting the case where only a single bit is different.
What are you trying to fix exactly?

Best Regards,

Boris

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

[PATCH v11 10/60] powerpc/PCI: Add IORESOURCE_MEM_64 for 64-bit resource in OF parsing

For device resource PREF bit setting under bridge 64-bit pref resource,
we need to make sure only set PREF for 64bit resource.

This patch set IORESOUCE_MEM_64 for 64bit resource during OF device resource
flags parsing.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=96261
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96241
Signed-off-by: Yinghai Lu 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Gavin Shan 
Cc: Yijing Wang 
Cc: Anton Blanchard 
Cc: linuxppc-...@lists.ozlabs.org
---
 arch/powerpc/kernel/pci_of_scan.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/pci_of_scan.c 
b/arch/powerpc/kernel/pci_of_scan.c
index 719f225..476b8ac5 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -44,8 +44,10 @@ static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
 
if (addr0 & 0x0200) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
-   flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+   if (addr0 & 0x0100)
+   flags |= IORESOURCE_MEM_64
+| PCI_BASE_ADDRESS_MEM_TYPE_64;
if (addr0 & 0x4000)
flags |= IORESOURCE_PREFETCH
 | PCI_BASE_ADDRESS_MEM_PREFETCH;
-- 
1.8.4.5

[PATCH v11 15/60] PCI: Only treat non-pref mmio64 as pref if host bridge has mmio64

If host bridge does not have mmio64 above 4G, We don't need to
treat device non-pref mmio64 as as pref mmio64.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 drivers/pci/setup-bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ffb1941..9404032 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -741,7 +741,7 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i)
 static bool pci_up_path_over_pref_mem64(struct pci_bus *bus)
 {
if (pci_is_root_bus(bus))
-   return true;
+   return to_pci_host_bridge(bus->bridge)->has_mem64;
 
if (bus->self) {
int i;
-- 
1.8.4.5

[PATCH v11 11/60] OF/PCI: Add IORESOURCE_MEM_64 for 64-bit resource

For device resource PREF bit setting under bridge 64-bit pref resource,
we need to make sure only set PREF for 64bit resource.

This patch set IORESOUCE_MEM_64 for 64bit resource during OF device resource
flags parsing.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=96261
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96241
Signed-off-by: Yinghai Lu 
Cc: Grant Likely 
Cc: Rob Herring 
Cc: devicet...@vger.kernel.org
Tested-by: Khalid Aziz 
---
 drivers/of/address.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 91a469d..3b09261 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -128,9 +128,11 @@ static unsigned int of_bus_pci_get_flags(const __be32 
*addr)
flags |= IORESOURCE_IO;
break;
case 0x02: /* 32 bits */
-   case 0x03: /* 64 bits */
flags |= IORESOURCE_MEM;
break;
+   case 0x03: /* 64 bits */
+   flags |= IORESOURCE_MEM | IORESOURCE_MEM_64;
+   break;
}
if (w & 0x4000)
flags |= IORESOURCE_PREFETCH;
-- 
1.8.4.5

[PATCH v11 43/60] PCI: Move saved required resource list out of required+optional assigning

We will need to share saved required list for alt_size support, so move
it out from required+optional assigning.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 30 --
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 373f76f..6c58b4a 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -455,6 +455,9 @@ static bool has_addon(struct list_head *head,
int add_count = 0;
struct pci_dev_resource *dev_res, *tmp_res;
 
+   if (!realloc_head)
+   return false;
+
/* check if we have add really */
list_for_each_entry(dev_res, head, list) {
tmp_res = res_to_dev_res(realloc_head, dev_res->res);
@@ -491,9 +494,9 @@ static void restore_resource(struct pci_dev_resource 
*save_res,
 }
 
 static bool __assign_resources_required_optional_sorted(struct list_head *head,
+struct list_head *save_head,
 struct list_head *realloc_head)
 {
-   LIST_HEAD(save_head);
LIST_HEAD(local_fail_head);
struct pci_dev_resource *save_res;
struct pci_dev_resource *dev_res, *tmp_res;
@@ -501,12 +504,6 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
resource_size_t add_align, add_size;
struct resource *res;
 
-   if (!has_addon(head, realloc_head))
-   return false;
-
-   if (!save_resources(head, _head))
-   return false;
-
/* Update res in head list with add_size in realloc_head list */
list_for_each_entry(dev_res, head, list) {
res = dev_res->res;
@@ -547,7 +544,6 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
/* Remove head list from realloc_head list */
list_for_each_entry(dev_res, head, list)
remove_from_list(realloc_head, dev_res->res);
-   free_list(_head);
free_list(head);
 
return true;
@@ -561,7 +557,7 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
if (res->parent && !pci_need_to_release(fail_type, res)) {
/* remove it from realloc_head list */
remove_from_list(realloc_head, res);
-   remove_from_list(_head, res);
+   remove_from_list(save_head, res);
list_del(_res->list);
kfree(dev_res);
}
@@ -580,11 +576,9 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
}
}
/* Restore start/end/flags from saved list */
-   list_for_each_entry(save_res, _head, list)
+   list_for_each_entry(save_res, save_head, list)
restore_resource(save_res, save_res->res);
 
-   free_list(_head);
-
return false;
 }
 
@@ -602,16 +596,24 @@ static void __assign_resources_sorted(struct list_head 
*head,
 *then try to reassign add_size for some resources.
 */
 
+   LIST_HEAD(save_head);
+
/* Check required+optional add */
-   if (realloc_head &&
-   __assign_resources_required_optional_sorted(head, realloc_head))
+   if (has_addon(head, realloc_head) &&
+   save_resources(head, _head) &&
+   __assign_resources_required_optional_sorted(head, _head,
+  realloc_head)) {
+   free_list(_head);
return;
+   }
 
sort_resources(head);
 
/* Satisfy the must-have resource requests */
assign_requested_resources_sorted(head, fail_head);
 
+   free_list(_head);
+
/* Try to satisfy any additional optional resource
requests */
if (realloc_head)
-- 
1.8.4.5

[PATCH v11 15/60] PCI: Only treat non-pref mmio64 as pref if host bridge has mmio64

If host bridge does not have mmio64 above 4G, We don't need to
treat device non-pref mmio64 as as pref mmio64.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 drivers/pci/setup-bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ffb1941..9404032 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -741,7 +741,7 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i)
 static bool pci_up_path_over_pref_mem64(struct pci_bus *bus)
 {
if (pci_is_root_bus(bus))
-   return true;
+   return to_pci_host_bridge(bus->bridge)->has_mem64;
 
if (bus->self) {
int i;
-- 
1.8.4.5

[PATCH v11 11/60] OF/PCI: Add IORESOURCE_MEM_64 for 64-bit resource

For device resource PREF bit setting under bridge 64-bit pref resource,
we need to make sure only set PREF for 64bit resource.

This patch set IORESOUCE_MEM_64 for 64bit resource during OF device resource
flags parsing.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=96261
Link: https://bugzilla.kernel.org/show_bug.cgi?id=96241
Signed-off-by: Yinghai Lu 
Cc: Grant Likely 
Cc: Rob Herring 
Cc: devicet...@vger.kernel.org
Tested-by: Khalid Aziz 
---
 drivers/of/address.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 91a469d..3b09261 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -128,9 +128,11 @@ static unsigned int of_bus_pci_get_flags(const __be32 
*addr)
flags |= IORESOURCE_IO;
break;
case 0x02: /* 32 bits */
-   case 0x03: /* 64 bits */
flags |= IORESOURCE_MEM;
break;
+   case 0x03: /* 64 bits */
+   flags |= IORESOURCE_MEM | IORESOURCE_MEM_64;
+   break;
}
if (w & 0x4000)
flags |= IORESOURCE_PREFETCH;
-- 
1.8.4.5

[PATCH v11 43/60] PCI: Move saved required resource list out of required+optional assigning

We will need to share saved required list for alt_size support, so move
it out from required+optional assigning.

Signed-off-by: Yinghai Lu 
---
 drivers/pci/setup-bus.c | 30 --
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 373f76f..6c58b4a 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -455,6 +455,9 @@ static bool has_addon(struct list_head *head,
int add_count = 0;
struct pci_dev_resource *dev_res, *tmp_res;
 
+   if (!realloc_head)
+   return false;
+
/* check if we have add really */
list_for_each_entry(dev_res, head, list) {
tmp_res = res_to_dev_res(realloc_head, dev_res->res);
@@ -491,9 +494,9 @@ static void restore_resource(struct pci_dev_resource 
*save_res,
 }
 
 static bool __assign_resources_required_optional_sorted(struct list_head *head,
+struct list_head *save_head,
 struct list_head *realloc_head)
 {
-   LIST_HEAD(save_head);
LIST_HEAD(local_fail_head);
struct pci_dev_resource *save_res;
struct pci_dev_resource *dev_res, *tmp_res;
@@ -501,12 +504,6 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
resource_size_t add_align, add_size;
struct resource *res;
 
-   if (!has_addon(head, realloc_head))
-   return false;
-
-   if (!save_resources(head, _head))
-   return false;
-
/* Update res in head list with add_size in realloc_head list */
list_for_each_entry(dev_res, head, list) {
res = dev_res->res;
@@ -547,7 +544,6 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
/* Remove head list from realloc_head list */
list_for_each_entry(dev_res, head, list)
remove_from_list(realloc_head, dev_res->res);
-   free_list(_head);
free_list(head);
 
return true;
@@ -561,7 +557,7 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
if (res->parent && !pci_need_to_release(fail_type, res)) {
/* remove it from realloc_head list */
remove_from_list(realloc_head, res);
-   remove_from_list(_head, res);
+   remove_from_list(save_head, res);
list_del(_res->list);
kfree(dev_res);
}
@@ -580,11 +576,9 @@ static bool 
__assign_resources_required_optional_sorted(struct list_head *head,
}
}
/* Restore start/end/flags from saved list */
-   list_for_each_entry(save_res, _head, list)
+   list_for_each_entry(save_res, save_head, list)
restore_resource(save_res, save_res->res);
 
-   free_list(_head);
-
return false;
 }
 
@@ -602,16 +596,24 @@ static void __assign_resources_sorted(struct list_head 
*head,
 *then try to reassign add_size for some resources.
 */
 
+   LIST_HEAD(save_head);
+
/* Check required+optional add */
-   if (realloc_head &&
-   __assign_resources_required_optional_sorted(head, realloc_head))
+   if (has_addon(head, realloc_head) &&
+   save_resources(head, _head) &&
+   __assign_resources_required_optional_sorted(head, _head,
+  realloc_head)) {
+   free_list(_head);
return;
+   }
 
sort_resources(head);
 
/* Satisfy the must-have resource requests */
assign_requested_resources_sorted(head, fail_head);
 
+   free_list(_head);
+
/* Try to satisfy any additional optional resource
requests */
if (realloc_head)
-- 
1.8.4.5

[PATCH v11 07/60] sparc/PCI: Keep resource idx order with bridge register number

On one system found strange "no compatible bridge window" warning

PCI: Claiming :00:01.0: Resource 14: 00020001..000200010fff 
[10220c]
PCI: Claiming :01:00.0: Resource 1: 00020001..00020001 
[100214]
pci :01:00.0: can't claim BAR 1 [mem 0x20001-0x20001 
64bit]: no compatible bridge window

and we already had pref_compat support that add extra pref bit for device
resource.

It turns out that pci_resource_compatible()/pci_up_path_over_pref_mem64()
just check resource with bridge pref mmio register idx 15, and we have put
resource to use mmio register idx 14 during of_scan_pci_bridge()
as the bridge does not have mmio resource.

We already fix pci_up_path_over_pref_mem64() to check all bus resources.

And at the same time, this patch make resource to have consistent sequence
like other arch or directly from pci_read_bridge_bases(),
even when non-pref mmio is missing, or out of ordering in firmware reporting.

Just hold i = 1 for non pref mmio, and i = 2 for pref mmio.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 arch/sparc/kernel/pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9c6daad..9415abc 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -472,7 +472,7 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm,
pci_read_bridge_bases(bus);
goto after_ranges;
}
-   i = 1;
+   i = 3;
for (; len >= 32; len -= 32, ranges += 8) {
u64 start;
 
@@ -504,6 +504,12 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm,
   " for bridge %s\n", node->full_name);
continue;
}
+   } else if ((flags & IORESOURCE_PREFETCH) &&
+  !bus->resource[2]->flags) {
+   res = bus->resource[2];
+   } else if (((flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH)) ==
+   IORESOURCE_MEM) && !bus->resource[1]->flags) {
+   res = bus->resource[1];
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
-- 
1.8.4.5

[PATCH v11 07/60] sparc/PCI: Keep resource idx order with bridge register number

On one system found strange "no compatible bridge window" warning

PCI: Claiming :00:01.0: Resource 14: 00020001..000200010fff 
[10220c]
PCI: Claiming :01:00.0: Resource 1: 00020001..00020001 
[100214]
pci :01:00.0: can't claim BAR 1 [mem 0x20001-0x20001 
64bit]: no compatible bridge window

and we already had pref_compat support that add extra pref bit for device
resource.

It turns out that pci_resource_compatible()/pci_up_path_over_pref_mem64()
just check resource with bridge pref mmio register idx 15, and we have put
resource to use mmio register idx 14 during of_scan_pci_bridge()
as the bridge does not have mmio resource.

We already fix pci_up_path_over_pref_mem64() to check all bus resources.

And at the same time, this patch make resource to have consistent sequence
like other arch or directly from pci_read_bridge_bases(),
even when non-pref mmio is missing, or out of ordering in firmware reporting.

Just hold i = 1 for non pref mmio, and i = 2 for pref mmio.

Signed-off-by: Yinghai Lu 
Tested-by: Khalid Aziz 
---
 arch/sparc/kernel/pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9c6daad..9415abc 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -472,7 +472,7 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm,
pci_read_bridge_bases(bus);
goto after_ranges;
}
-   i = 1;
+   i = 3;
for (; len >= 32; len -= 32, ranges += 8) {
u64 start;
 
@@ -504,6 +504,12 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm,
   " for bridge %s\n", node->full_name);
continue;
}
+   } else if ((flags & IORESOURCE_PREFETCH) &&
+  !bus->resource[2]->flags) {
+   res = bus->resource[2];
+   } else if (((flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH)) ==
+   IORESOURCE_MEM) && !bus->resource[1]->flags) {
+   res = bus->resource[1];
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
-- 
1.8.4.5

Re: [PATCH] x86/hpet: Reduce HPET counter read contention

On Thu, Apr 7, 2016 at 8:07 AM, Waiman Long  wrote:
> On 04/07/2016 12:58 AM, Andy Lutomirski wrote:
>> Reading the HPET is so slow that all the atomic ops in the world won't
>> make a dent.  Why not just turn this optimization on unconditionally?
>>
>> --Andy
>
>
> I am constantly on the alert that we should not introduce regression on
> lesser systems like a single socket machine with a few cores. That is why I
> put the check to conditionally enable this optimization. I have no issue of
> taking that out and let it be the default as long as no one object.
>

Agreed.  I just suspect it's actually faster on all systems.

This reminds me -- I need to send out my patch to disable the vdso
HPET code, which will make your change more effective.  I'll cc you.

> Cheers,
> Longman



-- 
Andy Lutomirski
AMA Capital Management, LLC

Re: [PATCH] x86/hpet: Reduce HPET counter read contention

On Thu, Apr 7, 2016 at 8:07 AM, Waiman Long  wrote:
> On 04/07/2016 12:58 AM, Andy Lutomirski wrote:
>> Reading the HPET is so slow that all the atomic ops in the world won't
>> make a dent.  Why not just turn this optimization on unconditionally?
>>
>> --Andy
>
>
> I am constantly on the alert that we should not introduce regression on
> lesser systems like a single socket machine with a few cores. That is why I
> put the check to conditionally enable this optimization. I have no issue of
> taking that out and let it be the default as long as no one object.
>

Agreed.  I just suspect it's actually faster on all systems.

This reminds me -- I need to send out my patch to disable the vdso
HPET code, which will make your change more effective.  I'll cc you.

> Cheers,
> Longman



-- 
Andy Lutomirski
AMA Capital Management, LLC

[PATCH v8 net-next 1/1] hv_sock: introduce Hyper-V Sockets

Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
mechanism between the host and the guest. It's somewhat like TCP over
VMBus, but the transportation layer (VMBus) is much simpler than IP.

With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.

Hyper-V Sockets is only available on new Windows hosts, like Windows Server
2016. More info is in this article "Make your own integration services":
https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service

The patch implements the necessary support in the guest side by introducing
a new socket address family AF_HYPERV.

Signed-off-by: Dexuan Cui 
Cc: "K. Y. Srinivasan" 
Cc: Haiyang Zhang 
Cc: Vitaly Kuznetsov 
---
 MAINTAINERS |2 +
 include/linux/hyperv.h  |   16 +
 include/linux/socket.h  |5 +-
 include/net/af_hvsock.h |   51 ++
 include/uapi/linux/hyperv.h |   25 +
 net/Kconfig |1 +
 net/Makefile|1 +
 net/hv_sock/Kconfig |   10 +
 net/hv_sock/Makefile|3 +
 net/hv_sock/af_hvsock.c | 1483 +++
 10 files changed, 1595 insertions(+), 2 deletions(-)
 create mode 100644 include/net/af_hvsock.h
 create mode 100644 net/hv_sock/Kconfig
 create mode 100644 net/hv_sock/Makefile
 create mode 100644 net/hv_sock/af_hvsock.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 67d99dd..7b6f203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5267,7 +5267,9 @@ F:drivers/pci/host/pci-hyperv.c
 F: drivers/net/hyperv/
 F: drivers/scsi/storvsc_drv.c
 F: drivers/video/fbdev/hyperv_fb.c
+F: net/hv_sock/
 F: include/linux/hyperv.h
+F: include/net/af_hvsock.h
 F: tools/hv/
 F: Documentation/ABI/stable/sysfs-bus-vmbus
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index aa0fadc..b92439d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1338,4 +1338,20 @@ extern __u32 vmbus_proto_version;
 
 int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
  const uuid_le *shv_host_servie_id);
+struct vmpipe_proto_header {
+   u32 pkt_type;
+   u32 data_size;
+} __packed;
+
+#define HVSOCK_HEADER_LEN  (sizeof(struct vmpacket_descriptor) + \
+sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */
+#define PREV_INDICES_LEN   (sizeof(u64))
+
+#define HVSOCK_PKT_LEN(payload_len)(HVSOCK_HEADER_LEN + \
+   ALIGN((payload_len), 8) + \
+   PREV_INDICES_LEN)
+#define HVSOCK_MIN_PKT_LEN HVSOCK_PKT_LEN(1)
+
 #endif /* _HYPERV_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 73bf6c6..88b1ccd 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -201,8 +201,8 @@ struct ucred {
 #define AF_NFC 39  /* NFC sockets  */
 #define AF_VSOCK   40  /* vSockets */
 #define AF_KCM 41  /* Kernel Connection Multiplexor*/
-
-#define AF_MAX 42  /* For now.. */
+#define AF_HYPERV  42  /* Hyper-V Sockets  */
+#define AF_MAX 43  /* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC  AF_UNSPEC
@@ -249,6 +249,7 @@ struct ucred {
 #define PF_NFC AF_NFC
 #define PF_VSOCK   AF_VSOCK
 #define PF_KCM AF_KCM
+#define PF_HYPERV  AF_HYPERV
 #define PF_MAX AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
new file mode 100644
index 000..a5aa28d
--- /dev/null
+++ b/include/net/af_hvsock.h
@@ -0,0 +1,51 @@
+#ifndef __AF_HVSOCK_H__
+#define __AF_HVSOCK_H__
+
+#include 
+#include 
+#include 
+
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV (5 * PAGE_SIZE)
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND (5 * PAGE_SIZE)
+
+#define HVSOCK_RCV_BUF_SZ  VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV
+#define HVSOCK_SND_BUF_SZ  PAGE_SIZE
+
+#define sk_to_hvsock(__sk)((struct hvsock_sock *)(__sk))
+#define hvsock_to_sk(__hvsk)   ((struct sock *)(__hvsk))
+
+struct hvsock_sock {
+   /* sk must be the first member. */
+   struct sock sk;
+
+   struct sockaddr_hv local_addr;
+   struct sockaddr_hv remote_addr;
+
+   /* protected by the global hvsock_mutex */
+   struct list_head bound_list;
+   struct list_head connected_list;
+
+   struct list_head accept_queue;
+   /* used by enqueue and dequeue */
+   struct mutex accept_queue_mutex;
+
+   struct delayed_work dwork;
+
+   u32 peer_shutdown;
+
+   struct vmbus_channel *channel;
+
+   struct {
+   struct

[PATCH v8 net-next 1/1] hv_sock: introduce Hyper-V Sockets

Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
mechanism between the host and the guest. It's somewhat like TCP over
VMBus, but the transportation layer (VMBus) is much simpler than IP.

With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.

Hyper-V Sockets is only available on new Windows hosts, like Windows Server
2016. More info is in this article "Make your own integration services":
https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service

The patch implements the necessary support in the guest side by introducing
a new socket address family AF_HYPERV.

Signed-off-by: Dexuan Cui 
Cc: "K. Y. Srinivasan" 
Cc: Haiyang Zhang 
Cc: Vitaly Kuznetsov 
---
 MAINTAINERS |2 +
 include/linux/hyperv.h  |   16 +
 include/linux/socket.h  |5 +-
 include/net/af_hvsock.h |   51 ++
 include/uapi/linux/hyperv.h |   25 +
 net/Kconfig |1 +
 net/Makefile|1 +
 net/hv_sock/Kconfig |   10 +
 net/hv_sock/Makefile|3 +
 net/hv_sock/af_hvsock.c | 1483 +++
 10 files changed, 1595 insertions(+), 2 deletions(-)
 create mode 100644 include/net/af_hvsock.h
 create mode 100644 net/hv_sock/Kconfig
 create mode 100644 net/hv_sock/Makefile
 create mode 100644 net/hv_sock/af_hvsock.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 67d99dd..7b6f203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5267,7 +5267,9 @@ F:drivers/pci/host/pci-hyperv.c
 F: drivers/net/hyperv/
 F: drivers/scsi/storvsc_drv.c
 F: drivers/video/fbdev/hyperv_fb.c
+F: net/hv_sock/
 F: include/linux/hyperv.h
+F: include/net/af_hvsock.h
 F: tools/hv/
 F: Documentation/ABI/stable/sysfs-bus-vmbus
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index aa0fadc..b92439d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1338,4 +1338,20 @@ extern __u32 vmbus_proto_version;
 
 int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
  const uuid_le *shv_host_servie_id);
+struct vmpipe_proto_header {
+   u32 pkt_type;
+   u32 data_size;
+} __packed;
+
+#define HVSOCK_HEADER_LEN  (sizeof(struct vmpacket_descriptor) + \
+sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */
+#define PREV_INDICES_LEN   (sizeof(u64))
+
+#define HVSOCK_PKT_LEN(payload_len)(HVSOCK_HEADER_LEN + \
+   ALIGN((payload_len), 8) + \
+   PREV_INDICES_LEN)
+#define HVSOCK_MIN_PKT_LEN HVSOCK_PKT_LEN(1)
+
 #endif /* _HYPERV_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 73bf6c6..88b1ccd 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -201,8 +201,8 @@ struct ucred {
 #define AF_NFC 39  /* NFC sockets  */
 #define AF_VSOCK   40  /* vSockets */
 #define AF_KCM 41  /* Kernel Connection Multiplexor*/
-
-#define AF_MAX 42  /* For now.. */
+#define AF_HYPERV  42  /* Hyper-V Sockets  */
+#define AF_MAX 43  /* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC  AF_UNSPEC
@@ -249,6 +249,7 @@ struct ucred {
 #define PF_NFC AF_NFC
 #define PF_VSOCK   AF_VSOCK
 #define PF_KCM AF_KCM
+#define PF_HYPERV  AF_HYPERV
 #define PF_MAX AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
new file mode 100644
index 000..a5aa28d
--- /dev/null
+++ b/include/net/af_hvsock.h
@@ -0,0 +1,51 @@
+#ifndef __AF_HVSOCK_H__
+#define __AF_HVSOCK_H__
+
+#include 
+#include 
+#include 
+
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV (5 * PAGE_SIZE)
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND (5 * PAGE_SIZE)
+
+#define HVSOCK_RCV_BUF_SZ  VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV
+#define HVSOCK_SND_BUF_SZ  PAGE_SIZE
+
+#define sk_to_hvsock(__sk)((struct hvsock_sock *)(__sk))
+#define hvsock_to_sk(__hvsk)   ((struct sock *)(__hvsk))
+
+struct hvsock_sock {
+   /* sk must be the first member. */
+   struct sock sk;
+
+   struct sockaddr_hv local_addr;
+   struct sockaddr_hv remote_addr;
+
+   /* protected by the global hvsock_mutex */
+   struct list_head bound_list;
+   struct list_head connected_list;
+
+   struct list_head accept_queue;
+   /* used by enqueue and dequeue */
+   struct mutex accept_queue_mutex;
+
+   struct delayed_work dwork;
+
+   u32 peer_shutdown;
+
+   struct vmbus_channel *channel;
+
+   struct {
+   struct vmpipe_proto_header hdr;
+   char buf[HVSOCK_SND_BUF_SZ];
+   }

[PATCH v8 net-next 0/1] introduce Hyper-V VM Sockets(hv_sock)

Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
mechanism between the host and the guest. It's somewhat like TCP over
VMBus, but the transportation layer (VMBus) is much simpler than IP.

With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.

Hyper-V Sockets is only available on new Windows hosts, like Windows Server
2016. More info is in this article "Make your own integration services":
https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service

The patch implements the necessary support in the guest side by
introducing a new socket address family AF_HYPERV.

Note: the VMBus driver side's supporting patches have been in the mainline
tree.

I know the kernel has already had a VM Sockets driver (AF_VSOCK) based
on VMware VMCI (net/vmw_vsock/, drivers/misc/vmw_vmci), and KVM is
proposing AF_VSOCK of virtio version:
http://marc.info/?l=linux-netdev=145952064004765=2

However, though Hyper-V Sockets may seem conceptually similar to
AF_VOSCK, there are differences in the transportation layer, and IMO these
make the direct code reusing impractical:

1. In AF_VSOCK, the endpoint type is: , but in
AF_HYPERV, the endpoint type is: . Here GUID
is 128-bit.

2. AF_VSOCK supports SOCK_DGRAM, while AF_HYPERV doesn't.

3. AF_VSOCK supports some special sock opts, like SO_VM_SOCKETS_BUFFER_SIZE,
SO_VM_SOCKETS_BUFFER_MIN/MAX_SIZE and SO_VM_SOCKETS_CONNECT_TIMEOUT.
These are meaningless to AF_HYPERV.

4. Some AF_VSOCK's VMCI transportation ops are meanless to AF_HYPERV/VMBus,
like .notify_recv_init
.notify_recv_pre_block
.notify_recv_pre_dequeue
.notify_recv_post_dequeue
.notify_send_init
.notify_send_pre_block
.notify_send_pre_enqueue
.notify_send_post_enqueue
etc.

So I think we'd better introduce a new address family: AF_HYPERV.

Please review the patch.

Looking forward to your comments!

Changes since v1:
- updated "[PATCH 6/7] hvsock: introduce Hyper-V VM Sockets feature"
- added __init and __exit for the module init/exit functions
- net/hv_sock/Kconfig: "default m" -> "default m if HYPERV"
- MODULE_LICENSE: "Dual MIT/GPL" -> "Dual BSD/GPL"

Changes since v2:
- fixed various coding issue pointed out by David Miller
- fixed indentation issues
- removed pr_debug in net/hv_sock/af_hvsock.c
- used reverse-Chrismas-tree style for local variables.
- EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL

Changes since v3:
- fixed a few coding issue pointed by Vitaly Kuznetsov and Dan Carpenter
- fixed the ret value in vmbus_recvpacket_hvsock on error
- fixed the style of multi-line comment: vmbus_get_hvsock_rw_status()

Changes since v4 (https://lkml.org/lkml/2015/7/28/404):
- addressed all the comments about V4.
- treat the hvsock offers/channels as special VMBus devices
- add a mechanism to pass hvsock events to the hvsock driver
- fixed some corner cases with proper locking when a connection is closed
- rebased to the latest Greg's tree

Changes since v5 (https://lkml.org/lkml/2015/12/24/103):
- addressed the coding style issues (Vitaly Kuznetsov & David Miller, thanks!)
- used a better coding for the per-channel rescind callback (Thank Vitaly!)
- avoided the introduction of new VMBUS driver APIs vmbus_sendpacket_hvsock()
and vmbus_recvpacket_hvsock() and used vmbus_sendpacket()/vmbus_recvpacket()
in the higher level (i.e., the vmsock driver). Thank Vitaly!

Changes since v6 (http://lkml.iu.edu/hypermail/linux/kernel/1601.3/01813.html)
- only a few minor changes of coding style and comments

Changes since v7
- a few minor changes of coding style: thanks, Joe Perches!
- added some lines of comments about GUID/UUID before the struct sockaddr_hv.

Dexuan Cui (1):
hv_sock: introduce Hyper-V Sockets

MAINTAINERS |2 +
include/linux/hyperv.h | 16 +
include/linux/socket.h |5 +-
include/net/af_hvsock.h | 51 ++
include/uapi/linux/hyperv.h | 25 +
net/Kconfig |1 +
net/Makefile|1 +
net/hv_sock/Kconfig | 10 +
net/hv_sock/Makefile|3 +
net/hv_sock/af_hvsock.c | 1483 +++
10 files changed, 1595 insertions(+), 2 deletions(-)
create mode 100644 include/net/af_hvsock.h
create mode 100644 net/hv_sock/Kconfig
create mode 100644 net/hv_sock/Makefile
create mode 100644 net/hv_sock/af_hvsock.c

--
2.1.0

[PATCH v8 net-next 0/1] introduce Hyper-V VM Sockets(hv_sock)

With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.

The patch implements the necessary support in the guest side by
introducing a new socket address family AF_HYPERV.

Note: the VMBus driver side's supporting patches have been in the mainline
tree.

However, though Hyper-V Sockets may seem conceptually similar to
AF_VOSCK, there are differences in the transportation layer, and IMO these
make the direct code reusing impractical:

1. In AF_VSOCK, the endpoint type is: , but in
AF_HYPERV, the endpoint type is: . Here GUID
is 128-bit.

2. AF_VSOCK supports SOCK_DGRAM, while AF_HYPERV doesn't.

3. AF_VSOCK supports some special sock opts, like SO_VM_SOCKETS_BUFFER_SIZE,
SO_VM_SOCKETS_BUFFER_MIN/MAX_SIZE and SO_VM_SOCKETS_CONNECT_TIMEOUT.
These are meaningless to AF_HYPERV.

So I think we'd better introduce a new address family: AF_HYPERV.

Please review the patch.

Looking forward to your comments!

Changes since v6 (http://lkml.iu.edu/hypermail/linux/kernel/1601.3/01813.html)
- only a few minor changes of coding style and comments

Changes since v7
- a few minor changes of coding style: thanks, Joe Perches!
- added some lines of comments about GUID/UUID before the struct sockaddr_hv.

Dexuan Cui (1):
hv_sock: introduce Hyper-V Sockets

--
2.1.0

Re: [PATCH v1 05/12] serial: 8250_dma: switch to new dmaengine_terminate_* API

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> Convert dmaengine_terminate_all() calls to synchronous and asynchronous
> versions where appropriate.

Reviewed-by: Peter Hurley

Re: [PATCH v1 05/12] serial: 8250_dma: switch to new dmaengine_terminate_* API

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> Convert dmaengine_terminate_all() calls to synchronous and asynchronous
> versions where appropriate.

Reviewed-by: Peter Hurley

Re: [PATCH v1 06/12] serial: 8250_dma: stop ongoing RX DMA on exception

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> If we get an exeption interrupt. i.e. UART_IIR_RLSI, stop any ongoing RX DMA
> transfer otherwise it might generates more spurious interrupts and make port
> unavailable anymore.

Then how to know which rx byte the error is for if dma continues anyway?
What if there are multiple error bytes?


> As has been seen on Intel Broxton system:

This system shouldn't be setup for UART DMA imo.


> ...
> [  168.526281] serial8250: too much work for irq5
> [  168.535908] serial8250: too much work for irq5
> [  173.449464] serial8250_interrupt: 4439 callbacks suppressed
> [  173.455694] serial8250: too much work for irq5
> ...
> 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/tty/serial/8250/8250_dma.c | 17 -
>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_dma.c 
> b/drivers/tty/serial/8250/8250_dma.c
> index 9d80bb1..b134bec 100644
> --- a/drivers/tty/serial/8250/8250_dma.c
> +++ b/drivers/tty/serial/8250/8250_dma.c
> @@ -110,6 +110,16 @@ err:
>   return ret;
>  }
>  
> +static void __dma_rx_stop(struct uart_8250_port *p, struct uart_8250_dma 
> *dma)
> +{
> + if (!dma->rx_running)
> + return;
> +
> + dmaengine_pause(dma->rxchan);
> + __dma_rx_complete(p);
> + dmaengine_terminate_async(dma->rxchan);
> +}
> +
>  int serial8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
>  {
>   struct uart_8250_dma*dma = p->dma;
> @@ -118,17 +128,14 @@ int serial8250_rx_dma(struct uart_8250_port *p, 
> unsigned int iir)
>   switch (iir & 0x3f) {
>   case UART_IIR_RLSI:
>   /* 8250_core handles errors and break interrupts */
> + __dma_rx_stop(p, dma);
>   return -EIO;
>   case UART_IIR_RX_TIMEOUT:
>   /*
>* If RCVR FIFO trigger level was not reached, complete the
>* transfer and let 8250_core copy the remaining data.
>*/
> - if (dma->rx_running) {
> - dmaengine_pause(dma->rxchan);
> - __dma_rx_complete(p);
> - dmaengine_terminate_async(dma->rxchan);
> - }
> + __dma_rx_stop(p, dma);
>   return -ETIMEDOUT;
>   default:
>   break;
>

Re: [PATCH v1 06/12] serial: 8250_dma: stop ongoing RX DMA on exception

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> If we get an exeption interrupt. i.e. UART_IIR_RLSI, stop any ongoing RX DMA
> transfer otherwise it might generates more spurious interrupts and make port
> unavailable anymore.

Then how to know which rx byte the error is for if dma continues anyway?
What if there are multiple error bytes?


> As has been seen on Intel Broxton system:

This system shouldn't be setup for UART DMA imo.


> ...
> [  168.526281] serial8250: too much work for irq5
> [  168.535908] serial8250: too much work for irq5
> [  173.449464] serial8250_interrupt: 4439 callbacks suppressed
> [  173.455694] serial8250: too much work for irq5
> ...
> 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/tty/serial/8250/8250_dma.c | 17 -
>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_dma.c 
> b/drivers/tty/serial/8250/8250_dma.c
> index 9d80bb1..b134bec 100644
> --- a/drivers/tty/serial/8250/8250_dma.c
> +++ b/drivers/tty/serial/8250/8250_dma.c
> @@ -110,6 +110,16 @@ err:
>   return ret;
>  }
>  
> +static void __dma_rx_stop(struct uart_8250_port *p, struct uart_8250_dma 
> *dma)
> +{
> + if (!dma->rx_running)
> + return;
> +
> + dmaengine_pause(dma->rxchan);
> + __dma_rx_complete(p);
> + dmaengine_terminate_async(dma->rxchan);
> +}
> +
>  int serial8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
>  {
>   struct uart_8250_dma*dma = p->dma;
> @@ -118,17 +128,14 @@ int serial8250_rx_dma(struct uart_8250_port *p, 
> unsigned int iir)
>   switch (iir & 0x3f) {
>   case UART_IIR_RLSI:
>   /* 8250_core handles errors and break interrupts */
> + __dma_rx_stop(p, dma);
>   return -EIO;
>   case UART_IIR_RX_TIMEOUT:
>   /*
>* If RCVR FIFO trigger level was not reached, complete the
>* transfer and let 8250_core copy the remaining data.
>*/
> - if (dma->rx_running) {
> - dmaengine_pause(dma->rxchan);
> - __dma_rx_complete(p);
> - dmaengine_terminate_async(dma->rxchan);
> - }
> + __dma_rx_stop(p, dma);
>   return -ETIMEDOUT;
>   default:
>   break;
>

Re: [PATCH v1 08/12] serial: 8250: enable AFE on ports where FIFO is 16 bytes

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> Intel Quark has 16550A compatible UART with autoflow feature enabled. It has
> only 16 bytes of FIFO. Currently serial8250_do_set_termios() prevents to 
> enable
> autoflow since the minimum requirement of 32 bytes of FIFO size.
> 
> Decrease a FIFO size limitation to 16 bytes to allow autoflow control be
> enabled on such UARTs.
> 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/tty/serial/8250/8250_port.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_port.c 
> b/drivers/tty/serial/8250/8250_port.c
> index e213da0..3f8121e 100644
> --- a/drivers/tty/serial/8250/8250_port.c
> +++ b/drivers/tty/serial/8250/8250_port.c
> @@ -2522,9 +2522,9 @@ serial8250_do_set_termios(struct uart_port *port, 
> struct ktermios *termios,
>* the trigger, or the MCR RTS bit is cleared.  In the case where
>* the remote UART is not using CTS auto flow control, we must
>* have sufficient FIFO entries for the latency of the remote
> -  * UART to respond.  IOW, at least 32 bytes of FIFO.
> +  * UART to respond.  IOW, at least 16 bytes of FIFO.
>*/
> - if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
> + if (up->capabilities & UART_CAP_AFE && port->fifosize >= 16) {

Let's just remove the fifosize test and rely on UART_CAP_AFE to enable
AFE.  Please remove comment from "In the case where ..."

Also, I think the PORT_A7 port type should have UART_CAP_AFE commented out,
especially since/if the trigger level is 1 byte.

Regards,
Peter Hurley

>   up->mcr &= ~UART_MCR_AFE;
>   if (termios->c_cflag & CRTSCTS)
>   up->mcr |= UART_MCR_AFE;
>

Re: [PATCH v1 08/12] serial: 8250: enable AFE on ports where FIFO is 16 bytes

On 04/07/2016 01:37 PM, Andy Shevchenko wrote:
> Intel Quark has 16550A compatible UART with autoflow feature enabled. It has
> only 16 bytes of FIFO. Currently serial8250_do_set_termios() prevents to 
> enable
> autoflow since the minimum requirement of 32 bytes of FIFO size.
> 
> Decrease a FIFO size limitation to 16 bytes to allow autoflow control be
> enabled on such UARTs.
> 
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/tty/serial/8250/8250_port.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250_port.c 
> b/drivers/tty/serial/8250/8250_port.c
> index e213da0..3f8121e 100644
> --- a/drivers/tty/serial/8250/8250_port.c
> +++ b/drivers/tty/serial/8250/8250_port.c
> @@ -2522,9 +2522,9 @@ serial8250_do_set_termios(struct uart_port *port, 
> struct ktermios *termios,
>* the trigger, or the MCR RTS bit is cleared.  In the case where
>* the remote UART is not using CTS auto flow control, we must
>* have sufficient FIFO entries for the latency of the remote
> -  * UART to respond.  IOW, at least 32 bytes of FIFO.
> +  * UART to respond.  IOW, at least 16 bytes of FIFO.
>*/
> - if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
> + if (up->capabilities & UART_CAP_AFE && port->fifosize >= 16) {

Let's just remove the fifosize test and rely on UART_CAP_AFE to enable
AFE.  Please remove comment from "In the case where ..."

Also, I think the PORT_A7 port type should have UART_CAP_AFE commented out,
especially since/if the trigger level is 1 byte.

Regards,
Peter Hurley

>   up->mcr &= ~UART_MCR_AFE;
>   if (termios->c_cflag & CRTSCTS)
>   up->mcr |= UART_MCR_AFE;
>

Re: 4.4, 4.6: camera and unlock buttons produce tons of interrupts (was Re: N900 sleep mode)

2016-04-07 Thread Tony Lindgren

* Pavel Machek  [160407 16:02]:
> Hi!
> 
> > > gzipped config is attached.
> > > 
> > > Note that I'm still using NOLO. I enabled the sleep, then went to
> > > runlevel 1. LEDs still stay on, 55mA power consumption. That was with
> > > 1 in off_mode.
> > 
> > Nothing idling for me with your .config.. And it seems slower to boot
> > compared to omap2plus_defconfig? Maybe because of the extra GPIO
> > interrupts?
> 
> Extra interrupts only happen when enable_off_mode is 1, so that should
> not be an issue during boot.

OK maybe it's just the extra driver probe time then.

> > Looks like you have 117 additional entries in .config enabled compared
> > to omap2plus_defconfig. Maybe go back to omap2plus_defconfig with minimal
> > changes and verify it idles properly first?
> > 
> > I'm suspecting it's some driver(s) you have enabled causing the
> > issue.
> 
> I guess so. Do you (or anyone else) have minimum non-modular config
> for N900 that boots with video? Could I get lsmod from your system?
> (Yes, I still have nightmares from getting .config that works).

Well I've been just using omap2plus_defconfig with:

# modprobe tsc2005
# modprobe gpio_backlight
# modprobe panel_sony_acx565akm
# modprobe omapfb

# echo 255 > /sys/class/backlight/acx565akm/brightness

And then the following to blank for idle:

# echo 1 > /sys/devices/platform/omapfb/graphics/fb0/blank

But in the world of eternal regressions, I'm not seeing anything
on the framebuffer with v4.6-rc2 :( No idea what broke it or when
as my n900 is in the rack.

Anyways, below is also my lsmod output.

Regards,

Tony


Module  Size  Used by
panel_sharp_ls037v7dw01 4148  0
ads784612959  0
hwmon   4213  1 ads7846
gpio_keys   9053  0
twl4030_keypad  3896  0
matrix_keymap   2801  1 twl4030_keypad
omapfb 39255  1
cfbfillrect 3614  1 omapfb
cfbimgblt   2416  1 omapfb
cfbcopyarea 3187  1 omapfb
panel_sony_acx565akm 7895  1
omapdss   269684  4 
panel_sharp_ls037v7dw01,omapfb,panel_sony_acx565akm
gpio_backlight  2804  0
tsc2005 1782  0
tsc200x_core7337  1 tsc2005
ledtrig_default_on  1119  0
leds_gpio   3530  0
led_class   5418  1 leds_gpio
rtc_twl 6234  0
twl4030_wdt 2711  0

Re: 4.4, 4.6: camera and unlock buttons produce tons of interrupts (was Re: N900 sleep mode)

2016-04-07 Thread Tony Lindgren

* Pavel Machek  [160407 16:02]:
> Hi!
> 
> > > gzipped config is attached.
> > > 
> > > Note that I'm still using NOLO. I enabled the sleep, then went to
> > > runlevel 1. LEDs still stay on, 55mA power consumption. That was with
> > > 1 in off_mode.
> > 
> > Nothing idling for me with your .config.. And it seems slower to boot
> > compared to omap2plus_defconfig? Maybe because of the extra GPIO
> > interrupts?
> 
> Extra interrupts only happen when enable_off_mode is 1, so that should
> not be an issue during boot.

OK maybe it's just the extra driver probe time then.

> > Looks like you have 117 additional entries in .config enabled compared
> > to omap2plus_defconfig. Maybe go back to omap2plus_defconfig with minimal
> > changes and verify it idles properly first?
> > 
> > I'm suspecting it's some driver(s) you have enabled causing the
> > issue.
> 
> I guess so. Do you (or anyone else) have minimum non-modular config
> for N900 that boots with video? Could I get lsmod from your system?
> (Yes, I still have nightmares from getting .config that works).

Well I've been just using omap2plus_defconfig with:

# modprobe tsc2005
# modprobe gpio_backlight
# modprobe panel_sony_acx565akm
# modprobe omapfb

# echo 255 > /sys/class/backlight/acx565akm/brightness

And then the following to blank for idle:

# echo 1 > /sys/devices/platform/omapfb/graphics/fb0/blank

But in the world of eternal regressions, I'm not seeing anything
on the framebuffer with v4.6-rc2 :( No idea what broke it or when
as my n900 is in the rack.

Anyways, below is also my lsmod output.

Regards,

Tony


Module  Size  Used by
panel_sharp_ls037v7dw01 4148  0
ads784612959  0
hwmon   4213  1 ads7846
gpio_keys   9053  0
twl4030_keypad  3896  0
matrix_keymap   2801  1 twl4030_keypad
omapfb 39255  1
cfbfillrect 3614  1 omapfb
cfbimgblt   2416  1 omapfb
cfbcopyarea 3187  1 omapfb
panel_sony_acx565akm 7895  1
omapdss   269684  4 
panel_sharp_ls037v7dw01,omapfb,panel_sony_acx565akm
gpio_backlight  2804  0
tsc2005 1782  0
tsc200x_core7337  1 tsc2005
ledtrig_default_on  1119  0
leds_gpio   3530  0
led_class   5418  1 leds_gpio
rtc_twl 6234  0
twl4030_wdt 2711  0

Re: [PATCH] extcon: usb-gpio: Don't miss event during suspend/resume

On 2016년 04월 06일 23:01, Roger Quadros wrote:
> Pin state might have changed during suspend/resume while
> our interrupts were disabled. Scan for change during resume.
> 
> Signed-off-by: Roger Quadros 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..20175ec 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -192,6 +192,7 @@ static int usb_extcon_resume(struct device *dev)
>   }
>  
>   enable_irq(info->id_irq);
> + usb_extcon_detect_cable(>wq_detcable.work);

If interrupt is using as wakeup source/irq,
after wake-up from suspend state, the interrupt handler will be handled.

But, if interrupt is not used for wakeup source/irq,
As your patch, we need to check the state on resume().

So, I think you need more condition to check the interrupt is whether wakeup 
source or not.

Thanks,
Chanwoo CHoi

Re: [PATCH] extcon: usb-gpio: Don't miss event during suspend/resume

On 2016년 04월 06일 23:01, Roger Quadros wrote:
> Pin state might have changed during suspend/resume while
> our interrupts were disabled. Scan for change during resume.
> 
> Signed-off-by: Roger Quadros 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..20175ec 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -192,6 +192,7 @@ static int usb_extcon_resume(struct device *dev)
>   }
>  
>   enable_irq(info->id_irq);
> + usb_extcon_detect_cable(>wq_detcable.work);

If interrupt is using as wakeup source/irq,
after wake-up from suspend state, the interrupt handler will be handled.

But, if interrupt is not used for wakeup source/irq,
As your patch, we need to check the state on resume().

So, I think you need more condition to check the interrupt is whether wakeup 
source or not.

Thanks,
Chanwoo CHoi

Re: [PATCH 1/2] clk: Add Oxford Semiconductor OXNAS Standard Clocks

On 04/03, Neil Armstrong wrote:
> On 04/02/2016 02:50 AM, Stephen Boyd wrote:
> > On 04/01, Neil Armstrong wrote:
> >> +  if (!onecell_data)
> >> +  return -ENOMEM;
> >> +
> >> +  regmap = syscon_node_to_regmap(of_get_parent(np));
> > 
> > Can we use dev_get_regmap(>dev.parent) here instead? I'd
> > prefer device APIs over DT APIs here. 
> > 
> 
> It will not work here since the parent node is a syscon, the call to 
> syscon_node_to_regmap() will call of_syscon_register() and create the regmap, 
> the dev_get_regmap() needs a proper platform device registered as regmap here.
> 

Ok. I was hoping that we could make simple-mfd look to see if
there's a syscon and then attach it to the parent device, but it
seems that simple-mfd is not actually a driver and it might not
even make a parent device for the children nodes?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH 1/2] clk: Add Oxford Semiconductor OXNAS Standard Clocks

On 04/03, Neil Armstrong wrote:
> On 04/02/2016 02:50 AM, Stephen Boyd wrote:
> > On 04/01, Neil Armstrong wrote:
> >> +  if (!onecell_data)
> >> +  return -ENOMEM;
> >> +
> >> +  regmap = syscon_node_to_regmap(of_get_parent(np));
> > 
> > Can we use dev_get_regmap(>dev.parent) here instead? I'd
> > prefer device APIs over DT APIs here. 
> > 
> 
> It will not work here since the parent node is a syscon, the call to 
> syscon_node_to_regmap() will call of_syscon_register() and create the regmap, 
> the dev_get_regmap() needs a proper platform device registered as regmap here.
> 

Ok. I was hoping that we could make simple-mfd look to see if
there's a syscon and then attach it to the parent device, but it
seems that simple-mfd is not actually a driver and it might not
even make a parent device for the children nodes?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH] block: make sure big bio is splitted into at most 256 bvecs

2016-04-07 Thread Eric Wheeler

On Thu, 7 Apr 2016, Ming Lei wrote:

> On Thu, Apr 7, 2016 at 9:56 AM, Eric Wheeler  
> wrote:
> > On Thu, 7 Apr 2016, Ming Lei wrote:
> >
> >> On Thu, Apr 7, 2016 at 9:36 AM, Eric Wheeler  
> >> wrote:
> >> > On Wed, 6 Apr 2016, Ming Lei wrote:
> >> >
> >> >> After arbitrary bio size is supported, the incoming bio may
> >> >> be very big. We have to split the bio into small bios so that
> >> >> each holds at most BIO_MAX_PAGES bvecs for safety reason, such
> >> >> as bio_clone().
> >> >>
> >> >> This patch fixes the following kernel crash:
> >> >>
> >> >> > [  172.660142] BUG: unable to handle kernel NULL pointer dereference 
> >> >> > at
> >> >> > 0028
> >> >> > [  172.660229] IP: [] bio_trim+0xf/0x2a
> >> >> > [  172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0
> >> >> > [  172.660399] Oops:  [#1] SMP
> >> >> > [...]
> >> >> > [  172.664780] Call Trace:
> >> >> > [  172.664813]  [] ? raid1_make_request+0x2e8/0xad7 
> >> >> > [raid1]
> >> >> > [  172.664846]  [] ? blk_queue_split+0x377/0x3d4
> >> >> > [  172.664880]  [] ? md_make_request+0xf6/0x1e9 
> >> >> > [md_mod]
> >> >> > [  172.664912]  [] ? generic_make_request+0xb5/0x155
> >> >> > [  172.664947]  [] ? prio_io+0x85/0x95 [bcache]
> >> >> > [  172.664981]  [] ? register_cache_set+0x355/0x8d0 
> >> >> > [bcache]
> >> >> > [  172.665016]  [] ? register_bcache+0x1006/0x1174 
> >> >> > [bcache]
> >> >>
> >> >> Fixes: 54efd50(block: make generic_make_request handle arbitrarily 
> >> >> sized bios)
> >> >> Reported-by: Sebastian Roesner 
> >> >> Reported-by: Eric Wheeler 
> >> >> Cc: sta...@vger.kernel.org (4.2+)
> >> >
> >> > Ming Lei,
> >> >
> >> > get_max_io_size doesn't appear until 4.5 based on a quick LXR check so we
> >> > won't see it in stable I don't think.
> >> >
> >> > It would be nice to see this fixed in 4.1 (if affected there).  Is there
> >>
> >> The issue should be introduced to v4.3 via 54efd50
> >>
> >> > another place this could be applied to be a bit more backward compatible?
> >>
> >> The v1 needn't change to get_max_io_size(), and it should be simple enough
> >> to backport to previous stables, please try it:
> >>
> >> http://marc.info/?l=linux-block=145991422422927=2
> >
> > V1 changes blk_bio_segment_split() which doesn't exist until 4.3.
> >
> > How might you port this to v4.1.y?
> 
> Can you see the issue with v4.1?
> 
> You mentioned there are three reports:
> 
> > [2014-02-04] https://bugzilla.redhat.com/show_bug.cgi?id=1061339
> > [2016-01-13] http://www.spinics.net/lists/linux-bcache/msg03335.html
>  https://bugzilla.kernel.org/show_bug.cgi?id=110771
> > [2016-03-25] http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3607 
> > [this thread]
>  https://bugzilla.kernel.org/show_bug.cgi?id=114871
> 
> The first one has been fixed by '8423ae3 block: Fix cloning of
> discard/write same bios', as mentioned in bugzilla.
> 
> The other two are reported on v4.4 and v4.5.
> 
> If you look at the patch description, it is just needed for 4.3+.


Oh, that could be---but just to be sure:

I had thought perhaps this was an old issue since the first mention of 
this backtrace (but not bcache) was in 3.14 back in 2014 based on this 
post:
  https://bugzilla.redhat.com/show_bug.cgi?id=1061339

Is this relevant?

--
Eric Wheeler



> 
> Or I am wrong?
> 
> 
> Thanks,
> 
> >
> > --
> > Eric Wheeler
> >
> >
> >>
> >> Thanks,
> >>
> >> >
> >> >> Cc: Shaohua Li 
> >> >> Signed-off-by: Ming Lei 
> >> >> ---
> >> >> I can reproduce the issue and verify the fix by the following approach:
> >> >>   - create one raid1 over two virtio-blk
> >> >>   - build bcache device over the above raid1 and another cache 
> >> >> device.
> >> >>   - set cache mode as writeback
> >> >>   - run random write over ext4 on the bcache device
> >> >>   - then the crash can be triggered
> >> >>
> >> >>  block/blk-merge.c | 12 
> >> >>  1 file changed, 12 insertions(+)
> >> >>
> >> >> diff --git a/block/blk-merge.c b/block/blk-merge.c
> >> >> index 2613531..9a8651f 100644
> >> >> --- a/block/blk-merge.c
> >> >> +++ b/block/blk-merge.c
> >> >> @@ -79,6 +79,18 @@ static inline unsigned get_max_io_size(struct 
> >> >> request_queue *q,
> >> >>   /* aligned to logical block size */
> >> >>   sectors &= ~(mask >> 9);
> >> >>
> >> >> + /*
> >> >> +  * With arbitrary bio size, the incoming bio may be very big.
> >> >> +  * We have to split the bio into small bios so that each holds
> >> >> +  * at most BIO_MAX_PAGES bvecs for safety reason, such as
> >> >> +  * bio_clone().
> >> >> +  *
> >> >> +  * In the future, the limit might be converted into per-queue
> >> >> +  * flag.
> >> >> +  */
> >> >> + sectors = min_t(unsigned, sectors, BIO_MAX_PAGES <<
> >> >> + (PAGE_CACHE_SHIFT - 9));
>

Re: [PATCH] block: make sure big bio is splitted into at most 256 bvecs

2016-04-07 Thread Eric Wheeler

On Thu, 7 Apr 2016, Ming Lei wrote:

> On Thu, Apr 7, 2016 at 9:56 AM, Eric Wheeler  
> wrote:
> > On Thu, 7 Apr 2016, Ming Lei wrote:
> >
> >> On Thu, Apr 7, 2016 at 9:36 AM, Eric Wheeler  
> >> wrote:
> >> > On Wed, 6 Apr 2016, Ming Lei wrote:
> >> >
> >> >> After arbitrary bio size is supported, the incoming bio may
> >> >> be very big. We have to split the bio into small bios so that
> >> >> each holds at most BIO_MAX_PAGES bvecs for safety reason, such
> >> >> as bio_clone().
> >> >>
> >> >> This patch fixes the following kernel crash:
> >> >>
> >> >> > [  172.660142] BUG: unable to handle kernel NULL pointer dereference 
> >> >> > at
> >> >> > 0028
> >> >> > [  172.660229] IP: [] bio_trim+0xf/0x2a
> >> >> > [  172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0
> >> >> > [  172.660399] Oops:  [#1] SMP
> >> >> > [...]
> >> >> > [  172.664780] Call Trace:
> >> >> > [  172.664813]  [] ? raid1_make_request+0x2e8/0xad7 
> >> >> > [raid1]
> >> >> > [  172.664846]  [] ? blk_queue_split+0x377/0x3d4
> >> >> > [  172.664880]  [] ? md_make_request+0xf6/0x1e9 
> >> >> > [md_mod]
> >> >> > [  172.664912]  [] ? generic_make_request+0xb5/0x155
> >> >> > [  172.664947]  [] ? prio_io+0x85/0x95 [bcache]
> >> >> > [  172.664981]  [] ? register_cache_set+0x355/0x8d0 
> >> >> > [bcache]
> >> >> > [  172.665016]  [] ? register_bcache+0x1006/0x1174 
> >> >> > [bcache]
> >> >>
> >> >> Fixes: 54efd50(block: make generic_make_request handle arbitrarily 
> >> >> sized bios)
> >> >> Reported-by: Sebastian Roesner 
> >> >> Reported-by: Eric Wheeler 
> >> >> Cc: sta...@vger.kernel.org (4.2+)
> >> >
> >> > Ming Lei,
> >> >
> >> > get_max_io_size doesn't appear until 4.5 based on a quick LXR check so we
> >> > won't see it in stable I don't think.
> >> >
> >> > It would be nice to see this fixed in 4.1 (if affected there).  Is there
> >>
> >> The issue should be introduced to v4.3 via 54efd50
> >>
> >> > another place this could be applied to be a bit more backward compatible?
> >>
> >> The v1 needn't change to get_max_io_size(), and it should be simple enough
> >> to backport to previous stables, please try it:
> >>
> >> http://marc.info/?l=linux-block=145991422422927=2
> >
> > V1 changes blk_bio_segment_split() which doesn't exist until 4.3.
> >
> > How might you port this to v4.1.y?
> 
> Can you see the issue with v4.1?
> 
> You mentioned there are three reports:
> 
> > [2014-02-04] https://bugzilla.redhat.com/show_bug.cgi?id=1061339
> > [2016-01-13] http://www.spinics.net/lists/linux-bcache/msg03335.html
>  https://bugzilla.kernel.org/show_bug.cgi?id=110771
> > [2016-03-25] http://thread.gmane.org/gmane.linux.kernel.bcache.devel/3607 
> > [this thread]
>  https://bugzilla.kernel.org/show_bug.cgi?id=114871
> 
> The first one has been fixed by '8423ae3 block: Fix cloning of
> discard/write same bios', as mentioned in bugzilla.
> 
> The other two are reported on v4.4 and v4.5.
> 
> If you look at the patch description, it is just needed for 4.3+.


Oh, that could be---but just to be sure:

I had thought perhaps this was an old issue since the first mention of 
this backtrace (but not bcache) was in 3.14 back in 2014 based on this 
post:
  https://bugzilla.redhat.com/show_bug.cgi?id=1061339

Is this relevant?

--
Eric Wheeler



> 
> Or I am wrong?
> 
> 
> Thanks,
> 
> >
> > --
> > Eric Wheeler
> >
> >
> >>
> >> Thanks,
> >>
> >> >
> >> >> Cc: Shaohua Li 
> >> >> Signed-off-by: Ming Lei 
> >> >> ---
> >> >> I can reproduce the issue and verify the fix by the following approach:
> >> >>   - create one raid1 over two virtio-blk
> >> >>   - build bcache device over the above raid1 and another cache 
> >> >> device.
> >> >>   - set cache mode as writeback
> >> >>   - run random write over ext4 on the bcache device
> >> >>   - then the crash can be triggered
> >> >>
> >> >>  block/blk-merge.c | 12 
> >> >>  1 file changed, 12 insertions(+)
> >> >>
> >> >> diff --git a/block/blk-merge.c b/block/blk-merge.c
> >> >> index 2613531..9a8651f 100644
> >> >> --- a/block/blk-merge.c
> >> >> +++ b/block/blk-merge.c
> >> >> @@ -79,6 +79,18 @@ static inline unsigned get_max_io_size(struct 
> >> >> request_queue *q,
> >> >>   /* aligned to logical block size */
> >> >>   sectors &= ~(mask >> 9);
> >> >>
> >> >> + /*
> >> >> +  * With arbitrary bio size, the incoming bio may be very big.
> >> >> +  * We have to split the bio into small bios so that each holds
> >> >> +  * at most BIO_MAX_PAGES bvecs for safety reason, such as
> >> >> +  * bio_clone().
> >> >> +  *
> >> >> +  * In the future, the limit might be converted into per-queue
> >> >> +  * flag.
> >> >> +  */
> >> >> + sectors = min_t(unsigned, sectors, BIO_MAX_PAGES <<
> >> >> + (PAGE_CACHE_SHIFT - 9));
> >> >> +
> >> >>   return sectors;
> >> >>  }
> >> >>
> >> >> --
> >> >> 1.9.1
> >> >>
> >> >>
> >> > --
> >> > To unsubscribe from this list: send the

Re: [PATCH] ARM: dts: r8a7791: Don't disable referenced optional clocks

On 04/06, Sjoerd Simons wrote:
> On Wed, 2016-04-06 at 15:11 +0200, Geert Uytterhoeven wrote:
> > CC Mike, Stephen, linux-clk (this time with the new Mike)
> > 
> > On Wed, Apr 6, 2016 at 2:52 PM, Sjoerd Simons
> >  wrote:
> > > 
> > > clk_get on a disabled clock node will return EPROBE_DEFER, which
> > > can
> > > cause drivers to be deferred forever if such clocks are referenced
> > > in
> > > their clocks property.
> > Is this a side effect of commit 3e5dd6f6e690048d ("clk: Ignore
> > disabled DT
> > clock providers")?
> 
> Yes it seems so. Reverting that patch means that i can drop this one
> and get the expected behaviour again.

The DT is broken then? Is it possible to mark these status =
"okay" so that things work again?

> 
> Though even so I'm not sure what the convention is for clocks like
> these, the r8a7791.dtsi is inconsistent, as some are disabled while
> others (e.g. the audio clocks) are 0hz. Would be good to get some input
> on that regardless.
> 

What's the question here?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH] ARM: dts: r8a7791: Don't disable referenced optional clocks

On 04/06, Sjoerd Simons wrote:
> On Wed, 2016-04-06 at 15:11 +0200, Geert Uytterhoeven wrote:
> > CC Mike, Stephen, linux-clk (this time with the new Mike)
> > 
> > On Wed, Apr 6, 2016 at 2:52 PM, Sjoerd Simons
> >  wrote:
> > > 
> > > clk_get on a disabled clock node will return EPROBE_DEFER, which
> > > can
> > > cause drivers to be deferred forever if such clocks are referenced
> > > in
> > > their clocks property.
> > Is this a side effect of commit 3e5dd6f6e690048d ("clk: Ignore
> > disabled DT
> > clock providers")?
> 
> Yes it seems so. Reverting that patch means that i can drop this one
> and get the expected behaviour again.

The DT is broken then? Is it possible to mark these status =
"okay" so that things work again?

> 
> Though even so I'm not sure what the convention is for clocks like
> these, the r8a7791.dtsi is inconsistent, as some are disabled while
> others (e.g. the audio clocks) are 0hz. Would be good to get some input
> on that regardless.
> 

What's the question here?

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH net] lockdep: provide always true lockdep_is_held stub if lockdep disabled

On 08.04.2016 01:12, Hannes Frederic Sowa wrote:

I need this to provide a generic lockdep_sock_is_held function which can
be easily used in the kernel without using ifdef PROVEN macros.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Eric Dumazet 
Cc: David Miller 
Signed-off-by: Hannes Frederic Sowa 
---
Hello Peter and Ingo,

if it is possible coud this go in via the net-tree, as this problem is
visible there already? Would be happy to get a review.

I take this patch back, as some call sites test if the lock is 
definitely not held. I come up with a better approach.

Re: [PATCH net] lockdep: provide always true lockdep_is_held stub if lockdep disabled

On 08.04.2016 01:12, Hannes Frederic Sowa wrote:

I need this to provide a generic lockdep_sock_is_held function which can
be easily used in the kernel without using ifdef PROVEN macros.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Eric Dumazet 
Cc: David Miller 
Signed-off-by: Hannes Frederic Sowa 
---
Hello Peter and Ingo,

if it is possible coud this go in via the net-tree, as this problem is
visible there already? Would be happy to get a review.

I take this patch back, as some call sites test if the lock is 
definitely not held. I come up with a better approach.

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Jessica Yu


+++ Jiri Kosina [07/04/16 23:37 +0200]:

On Thu, 7 Apr 2016, Jessica Yu wrote:


Been sort of rattling my head over the scheduler code :-) Just following
the calls in and out of __schedule() it doesn't look like there is a
current flag/mechanism to tell whether or not a task has been
preempted..


Performing the complete stack unwind just to determine whether task has
been preempted non-volutarily is a slight overkill indeed :/


Is there any reason why you didn't just create a new task flag,
something like TIF_PREEMPTED_IRQ, which would be set once
preempt_schedule_irq() is entered and unset after __schedule() returns
(for that task)? This would roughly correspond to setting the task flag
when the frame for preempt_schedule_irq() is pushed and unsetting it
just before the frame preempt_schedule_irq() is popped for that task.
This seems simpler than walking through all the frames just to see if
in_preempt_schedule_irq() had been called. Would that work?


Alternatively, without eating up a TIF_ space, it'd be possible to push a
magic contents on top of the stack in preempt_schedule_irq() (and pop it
once we are returning from there), and if such magic value is detected, we
just don't bother and claim unreliability.


Ah, but wouldn't we still have to walk through the frames (i.e. enter
the loop in patch 7/14) to look for the magic value in this approach?


That has advantages of both aproaches combined, i.e. it's relatively
low-cost in terms of performance penalty, and it's reliable (in a sense
that you don't have false positives).

The small disadvantage is that you can (very rarely, depending on the
chosen magic) have false negatives. That probably doesn't hurt too much,
given the high inprobability and non-lethal consequences.

How does that sound?

--
Jiri Kosina
SUSE Labs

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Jessica Yu


+++ Jiri Kosina [07/04/16 23:37 +0200]:

On Thu, 7 Apr 2016, Jessica Yu wrote:


Been sort of rattling my head over the scheduler code :-) Just following
the calls in and out of __schedule() it doesn't look like there is a
current flag/mechanism to tell whether or not a task has been
preempted..


Performing the complete stack unwind just to determine whether task has
been preempted non-volutarily is a slight overkill indeed :/


Is there any reason why you didn't just create a new task flag,
something like TIF_PREEMPTED_IRQ, which would be set once
preempt_schedule_irq() is entered and unset after __schedule() returns
(for that task)? This would roughly correspond to setting the task flag
when the frame for preempt_schedule_irq() is pushed and unsetting it
just before the frame preempt_schedule_irq() is popped for that task.
This seems simpler than walking through all the frames just to see if
in_preempt_schedule_irq() had been called. Would that work?


Alternatively, without eating up a TIF_ space, it'd be possible to push a
magic contents on top of the stack in preempt_schedule_irq() (and pop it
once we are returning from there), and if such magic value is detected, we
just don't bother and claim unreliability.


Ah, but wouldn't we still have to walk through the frames (i.e. enter
the loop in patch 7/14) to look for the magic value in this approach?


That has advantages of both aproaches combined, i.e. it's relatively
low-cost in terms of performance penalty, and it's reliable (in a sense
that you don't have false positives).

The small disadvantage is that you can (very rarely, depending on the
chosen magic) have false negatives. That probably doesn't hurt too much,
given the high inprobability and non-lethal consequences.

How does that sound?

--
Jiri Kosina
SUSE Labs

[PATCH net] lockdep: provide always true lockdep_is_held stub if lockdep disabled

I need this to provide a generic lockdep_sock_is_held function which can
be easily used in the kernel without using ifdef PROVEN macros.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Eric Dumazet 
Cc: David Miller 
Signed-off-by: Hannes Frederic Sowa 
---
Hello Peter and Ingo,

if it is possible coud this go in via the net-tree, as this problem is
visible there already? Would be happy to get a review.

Thanks,
Hannes

 include/linux/lockdep.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d026b190c53066..dc8d447cb3ab1c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -428,6 +428,8 @@ struct lock_class_key { };
 #define lockdep_pin_lock(l)do { (void)(l); } while 
(0)
 #define lockdep_unpin_lock(l)  do { (void)(l); } while (0)
 
+#define lockdep_is_held(l) ({ (void)(l); (1); })
+
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCK_STAT
-- 
2.5.5

[PATCH net] lockdep: provide always true lockdep_is_held stub if lockdep disabled

I need this to provide a generic lockdep_sock_is_held function which can
be easily used in the kernel without using ifdef PROVEN macros.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Eric Dumazet 
Cc: David Miller 
Signed-off-by: Hannes Frederic Sowa 
---
Hello Peter and Ingo,

if it is possible coud this go in via the net-tree, as this problem is
visible there already? Would be happy to get a review.

Thanks,
Hannes

 include/linux/lockdep.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d026b190c53066..dc8d447cb3ab1c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -428,6 +428,8 @@ struct lock_class_key { };
 #define lockdep_pin_lock(l)do { (void)(l); } while 
(0)
 #define lockdep_unpin_lock(l)  do { (void)(l); } while (0)
 
+#define lockdep_is_held(l) ({ (void)(l); (1); })
+
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCK_STAT
-- 
2.5.5

Re: 4.4, 4.6: camera and unlock buttons produce tons of interrupts (was Re: N900 sleep mode)

2016-04-07 Thread Pavel Machek

Hi!

> > gzipped config is attached.
> > 
> > Note that I'm still using NOLO. I enabled the sleep, then went to
> > runlevel 1. LEDs still stay on, 55mA power consumption. That was with
> > 1 in off_mode.
> 
> Nothing idling for me with your .config.. And it seems slower to boot
> compared to omap2plus_defconfig? Maybe because of the extra GPIO
> interrupts?

Extra interrupts only happen when enable_off_mode is 1, so that should
not be an issue during boot.

> Looks like you have 117 additional entries in .config enabled compared
> to omap2plus_defconfig. Maybe go back to omap2plus_defconfig with minimal
> changes and verify it idles properly first?
> 
> I'm suspecting it's some driver(s) you have enabled causing the
> issue.

I guess so. Do you (or anyone else) have minimum non-modular config
for N900 that boots with video? Could I get lsmod from your system?
(Yes, I still have nightmares from getting .config that works).

Thanks and best regards,
Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Re: 4.4, 4.6: camera and unlock buttons produce tons of interrupts (was Re: N900 sleep mode)

2016-04-07 Thread Pavel Machek

Hi!

> > gzipped config is attached.
> > 
> > Note that I'm still using NOLO. I enabled the sleep, then went to
> > runlevel 1. LEDs still stay on, 55mA power consumption. That was with
> > 1 in off_mode.
> 
> Nothing idling for me with your .config.. And it seems slower to boot
> compared to omap2plus_defconfig? Maybe because of the extra GPIO
> interrupts?

Extra interrupts only happen when enable_off_mode is 1, so that should
not be an issue during boot.

> Looks like you have 117 additional entries in .config enabled compared
> to omap2plus_defconfig. Maybe go back to omap2plus_defconfig with minimal
> changes and verify it idles properly first?
> 
> I'm suspecting it's some driver(s) you have enabled causing the
> issue.

I guess so. Do you (or anyone else) have minimum non-modular config
for N900 that boots with video? Could I get lsmod from your system?
(Yes, I still have nightmares from getting .config that works).

Thanks and best regards,
Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

Re: [PATCH] extcon: usb-gpio: switch to use pm wakeirq apis

On 2016년 04월 07일 02:32, Grygorii Strashko wrote:
> Switch to use PM wakeirq APIs which automates wakeup IRQs
> enabling/disabling and so allows to make code simpler.
> 
> Signed-off-by: Grygorii Strashko 

Applied it.

Thanks,
Chanwoo Choi

> ---
> Unfortunately this simple patch depends on:
> "[RFC PATCH] PM / wakeirq: fix wakeirq setting after wakup re-configuration 
> from sysfs"
> https://lkml.org/lkml/2016/4/6/296
> 
>  drivers/extcon/extcon-usb-gpio.c | 19 ++-
>  1 file changed, 6 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..bc61d11 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -141,7 +142,8 @@ static int usb_extcon_probe(struct platform_device *pdev)
>   }
>  
>   platform_set_drvdata(pdev, info);
> - device_init_wakeup(dev, 1);
> + device_init_wakeup(dev, true);
> + dev_pm_set_wake_irq(dev, info->id_irq);
>  
>   /* Perform initial detection */
>   usb_extcon_detect_cable(>wq_detcable.work);
> @@ -155,6 +157,9 @@ static int usb_extcon_remove(struct platform_device *pdev)
>  
>   cancel_delayed_work_sync(>wq_detcable);
>  
> + dev_pm_clear_wake_irq(>dev);
> + device_init_wakeup(>dev, false);
> +
>   return 0;
>  }
>  
> @@ -164,12 +169,6 @@ static int usb_extcon_suspend(struct device *dev)
>   struct usb_extcon_info *info = dev_get_drvdata(dev);
>   int ret = 0;
>  
> - if (device_may_wakeup(dev)) {
> - ret = enable_irq_wake(info->id_irq);
> - if (ret)
> - return ret;
> - }
> -
>   /*
>* We don't want to process any IRQs after this point
>* as GPIOs used behind I2C subsystem might not be
> @@ -185,12 +184,6 @@ static int usb_extcon_resume(struct device *dev)
>   struct usb_extcon_info *info = dev_get_drvdata(dev);
>   int ret = 0;
>  
> - if (device_may_wakeup(dev)) {
> - ret = disable_irq_wake(info->id_irq);
> - if (ret)
> - return ret;
> - }
> -
>   enable_irq(info->id_irq);
>  
>   return ret;
>

Re: [PATCH] extcon: usb-gpio: switch to use pm wakeirq apis

On 2016년 04월 07일 02:32, Grygorii Strashko wrote:
> Switch to use PM wakeirq APIs which automates wakeup IRQs
> enabling/disabling and so allows to make code simpler.
> 
> Signed-off-by: Grygorii Strashko 

Applied it.

Thanks,
Chanwoo Choi

> ---
> Unfortunately this simple patch depends on:
> "[RFC PATCH] PM / wakeirq: fix wakeirq setting after wakup re-configuration 
> from sysfs"
> https://lkml.org/lkml/2016/4/6/296
> 
>  drivers/extcon/extcon-usb-gpio.c | 19 ++-
>  1 file changed, 6 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..bc61d11 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -141,7 +142,8 @@ static int usb_extcon_probe(struct platform_device *pdev)
>   }
>  
>   platform_set_drvdata(pdev, info);
> - device_init_wakeup(dev, 1);
> + device_init_wakeup(dev, true);
> + dev_pm_set_wake_irq(dev, info->id_irq);
>  
>   /* Perform initial detection */
>   usb_extcon_detect_cable(>wq_detcable.work);
> @@ -155,6 +157,9 @@ static int usb_extcon_remove(struct platform_device *pdev)
>  
>   cancel_delayed_work_sync(>wq_detcable);
>  
> + dev_pm_clear_wake_irq(>dev);
> + device_init_wakeup(>dev, false);
> +
>   return 0;
>  }
>  
> @@ -164,12 +169,6 @@ static int usb_extcon_suspend(struct device *dev)
>   struct usb_extcon_info *info = dev_get_drvdata(dev);
>   int ret = 0;
>  
> - if (device_may_wakeup(dev)) {
> - ret = enable_irq_wake(info->id_irq);
> - if (ret)
> - return ret;
> - }
> -
>   /*
>* We don't want to process any IRQs after this point
>* as GPIOs used behind I2C subsystem might not be
> @@ -185,12 +184,6 @@ static int usb_extcon_resume(struct device *dev)
>   struct usb_extcon_info *info = dev_get_drvdata(dev);
>   int ret = 0;
>  
> - if (device_may_wakeup(dev)) {
> - ret = disable_irq_wake(info->id_irq);
> - if (ret)
> - return ret;
> - }
> -
>   enable_irq(info->id_irq);
>  
>   return ret;
>

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Jiri Kosina

On Thu, 7 Apr 2016, Josh Poimboeuf wrote:

> To do that from C code, I guess we'd still need some arch-specific code
> in an asm() statement to do the actual push?

This could potentially be worked around I believe (thinking for example of 
a onstack-allocated local variable with predefined contents that the 
compiler would not be allowed to optimize out; certainly not the only 
option).

Thanks,

-- 
Jiri Kosina
SUSE Labs

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Jiri Kosina

On Thu, 7 Apr 2016, Josh Poimboeuf wrote:

> To do that from C code, I guess we'd still need some arch-specific code
> in an asm() statement to do the actual push?

This could potentially be worked around I believe (thinking for example of 
a onstack-allocated local variable with predefined contents that the 
compiler would not be allowed to optimize out; certainly not the only 
option).

Thanks,

-- 
Jiri Kosina
SUSE Labs

[PATCH] Revert "Input: atmel_mxt_ts - disable interrupt for 50ms after reset"

2016-04-07 Thread Tom Rini

This reverts commit 885f3fb9fa1f9e185e8a4e905157087495734349 due to this
change breaking the touchpad on the Chromebook Pixel 2015 on resume from
sleep or warm resets.

Cc: Olof Johansson 
Cc: Nick Dyer 
Cc: Dmitry Torokhov 
Cc: Henrik Rydberg 
Signed-off-by: Tom Rini 
---
 drivers/input/touchscreen/atmel_mxt_ts.c |9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c 
b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2160512..9b92b60 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -1098,9 +1098,7 @@ static int mxt_soft_reset(struct mxt_data *data)
struct device *dev = >client->dev;
int ret = 0;
 
-   dev_info(dev, "Resetting device\n");
-
-   disable_irq(data->irq);
+   dev_info(dev, "Resetting chip\n");
 
reinit_completion(>reset_completion);
 
@@ -1108,11 +1106,6 @@ static int mxt_soft_reset(struct mxt_data *data)
if (ret)
return ret;
 
-   /* Ignore CHG line for 100ms after reset */
-   msleep(100);
-
-   enable_irq(data->irq);
-
ret = mxt_wait_for_completion(data, >reset_completion,
  MXT_RESET_TIMEOUT);
if (ret)
-- 
1.7.9.5

[PATCH] Revert "Input: atmel_mxt_ts - disable interrupt for 50ms after reset"

2016-04-07 Thread Tom Rini

This reverts commit 885f3fb9fa1f9e185e8a4e905157087495734349 due to this
change breaking the touchpad on the Chromebook Pixel 2015 on resume from
sleep or warm resets.

Cc: Olof Johansson 
Cc: Nick Dyer 
Cc: Dmitry Torokhov 
Cc: Henrik Rydberg 
Signed-off-by: Tom Rini 
---
 drivers/input/touchscreen/atmel_mxt_ts.c |9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c 
b/drivers/input/touchscreen/atmel_mxt_ts.c
index 2160512..9b92b60 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -1098,9 +1098,7 @@ static int mxt_soft_reset(struct mxt_data *data)
struct device *dev = >client->dev;
int ret = 0;
 
-   dev_info(dev, "Resetting device\n");
-
-   disable_irq(data->irq);
+   dev_info(dev, "Resetting chip\n");
 
reinit_completion(>reset_completion);
 
@@ -1108,11 +1106,6 @@ static int mxt_soft_reset(struct mxt_data *data)
if (ret)
return ret;
 
-   /* Ignore CHG line for 100ms after reset */
-   msleep(100);
-
-   enable_irq(data->irq);
-
ret = mxt_wait_for_completion(data, >reset_completion,
  MXT_RESET_TIMEOUT);
if (ret)
-- 
1.7.9.5

Re: [PATCH v4 01/14] x86/boot: enumerate documentation for the x86 hardware_subarch

On Thu, Apr 07, 2016 at 02:25:38PM +0300, Andy Shevchenko wrote:
> On Wed, 2016-04-06 at 17:06 -0700, Luis R. Rodriguez wrote:
> > Although hardware_subarch has been in place since the x86 boot
> > protocol 2.07 it hasn't been used much. Enumerate current possible
> > values to avoid misuses and help with semantics later at boot
> > time should this be used further.
> > 
> > These enums should only ever be used by architecture x86 code,
> > and all that code should be well contained and compartamentalized,
> > clarify that as well.
> 
> Nitpick:
> 
> > + * @X86_SUBARCH_PC: Should be used if the hardware is enumerable
> > using standard
> > + * PC mechanisms (PCI, ACPI) and doesn't need a special boot
> > flow.
> > + * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
> > + * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV
> > boot path,
> > + * which start at asm startup_xen() entry point and later
> > jump to the C
> > + * xen_start_kernel() entry point.
> > + * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet
> > Device) platform
> > + * systems which do not have the PCI legacy interfaces.
> > + * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100)
> > SOC for
> 
> I think 'SoC' (without quotes) will be better.

Amended, since I think I'll need a re-spin and since we may need to take
care of the dom0 Vs domU semantics I'll also make some changes to include
X86_SUBARCH_XEN documentation to annotate that PV guests can be of domU
or dom0 type...

  Luis

Re: [PATCH v4 01/14] x86/boot: enumerate documentation for the x86 hardware_subarch

On Thu, Apr 07, 2016 at 02:25:38PM +0300, Andy Shevchenko wrote:
> On Wed, 2016-04-06 at 17:06 -0700, Luis R. Rodriguez wrote:
> > Although hardware_subarch has been in place since the x86 boot
> > protocol 2.07 it hasn't been used much. Enumerate current possible
> > values to avoid misuses and help with semantics later at boot
> > time should this be used further.
> > 
> > These enums should only ever be used by architecture x86 code,
> > and all that code should be well contained and compartamentalized,
> > clarify that as well.
> 
> Nitpick:
> 
> > + * @X86_SUBARCH_PC: Should be used if the hardware is enumerable
> > using standard
> > + * PC mechanisms (PCI, ACPI) and doesn't need a special boot
> > flow.
> > + * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
> > + * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV
> > boot path,
> > + * which start at asm startup_xen() entry point and later
> > jump to the C
> > + * xen_start_kernel() entry point.
> > + * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet
> > Device) platform
> > + * systems which do not have the PCI legacy interfaces.
> > + * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100)
> > SOC for
> 
> I think 'SoC' (without quotes) will be better.

Amended, since I think I'll need a re-spin and since we may need to take
care of the dom0 Vs domU semantics I'll also make some changes to include
X86_SUBARCH_XEN documentation to annotate that PV guests can be of domU
or dom0 type...

  Luis

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Josh Poimboeuf

On Thu, Apr 07, 2016 at 11:37:19PM +0200, Jiri Kosina wrote:
> On Thu, 7 Apr 2016, Jessica Yu wrote:
> 
> > Been sort of rattling my head over the scheduler code :-) Just following 
> > the calls in and out of __schedule() it doesn't look like there is a 
> > current flag/mechanism to tell whether or not a task has been 
> > preempted..
> 
> Performing the complete stack unwind just to determine whether task has 
> been preempted non-volutarily is a slight overkill indeed :/
> 
> > Is there any reason why you didn't just create a new task flag, 
> > something like TIF_PREEMPTED_IRQ, which would be set once 
> > preempt_schedule_irq() is entered and unset after __schedule() returns 
> > (for that task)? This would roughly correspond to setting the task flag 
> > when the frame for preempt_schedule_irq() is pushed and unsetting it 
> > just before the frame preempt_schedule_irq() is popped for that task. 
> > This seems simpler than walking through all the frames just to see if 
> > in_preempt_schedule_irq() had been called. Would that work?
> 
> Alternatively, without eating up a TIF_ space, it'd be possible to push a 
> magic contents on top of the stack in preempt_schedule_irq() (and pop it 
> once we are returning from there), and if such magic value is detected, we 
> just don't bother and claim unreliability.
> 
> That has advantages of both aproaches combined, i.e. it's relatively 
> low-cost in terms of performance penalty, and it's reliable (in a sense 
> that you don't have false positives).
> 
> The small disadvantage is that you can (very rarely, depending on the 
> chosen magic) have false negatives. That probably doesn't hurt too much, 
> given the high inprobability and non-lethal consequences.
> 
> How does that sound?

To do that from C code, I guess we'd still need some arch-specific code
in an asm() statement to do the actual push?

I think I'd prefer just updating some field in the task_struct.  That
way it would be simple and arch-independent.  And the stack walker
wouldn't have to scan for some special value on the stack.

-- 
Josh

Re: sched: horrible way to detect whether a task has been preempted

2016-04-07 Thread Josh Poimboeuf

On Thu, Apr 07, 2016 at 11:37:19PM +0200, Jiri Kosina wrote:
> On Thu, 7 Apr 2016, Jessica Yu wrote:
> 
> > Been sort of rattling my head over the scheduler code :-) Just following 
> > the calls in and out of __schedule() it doesn't look like there is a 
> > current flag/mechanism to tell whether or not a task has been 
> > preempted..
> 
> Performing the complete stack unwind just to determine whether task has 
> been preempted non-volutarily is a slight overkill indeed :/
> 
> > Is there any reason why you didn't just create a new task flag, 
> > something like TIF_PREEMPTED_IRQ, which would be set once 
> > preempt_schedule_irq() is entered and unset after __schedule() returns 
> > (for that task)? This would roughly correspond to setting the task flag 
> > when the frame for preempt_schedule_irq() is pushed and unsetting it 
> > just before the frame preempt_schedule_irq() is popped for that task. 
> > This seems simpler than walking through all the frames just to see if 
> > in_preempt_schedule_irq() had been called. Would that work?
> 
> Alternatively, without eating up a TIF_ space, it'd be possible to push a 
> magic contents on top of the stack in preempt_schedule_irq() (and pop it 
> once we are returning from there), and if such magic value is detected, we 
> just don't bother and claim unreliability.
> 
> That has advantages of both aproaches combined, i.e. it's relatively 
> low-cost in terms of performance penalty, and it's reliable (in a sense 
> that you don't have false positives).
> 
> The small disadvantage is that you can (very rarely, depending on the 
> chosen magic) have false negatives. That probably doesn't hurt too much, 
> given the high inprobability and non-lethal consequences.
> 
> How does that sound?

To do that from C code, I guess we'd still need some arch-specific code
in an asm() statement to do the actual push?

I think I'd prefer just updating some field in the task_struct.  That
way it would be simple and arch-independent.  And the stack walker
wouldn't have to scan for some special value on the stack.

-- 
Josh

[PATCH v2] libnvdimm, test: add mock SMART data payload

2016-04-07 Thread Dan Williams

Provide simulated SMART data to enable the ndctl implementation of SMART
data retrieval and parsing.

The payload is defined here, "Section 4.1 SMART and Health Info
(Function Index 1)":

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

Signed-off-by: Dan Williams 
---
Changes in v2:
* #define the ND_SMART_ flags definitions (jth)
* add a struct nd_smart_threshold_payload definition
* add a mock implementation of smart threshold data

 drivers/nvdimm/bus.c |3 +++
 include/uapi/linux/ndctl.h   |   36 ++-
 tools/testing/nvdimm/test/nfit.c |   44 ++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 19f822d7f652..8111b1299515 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -783,6 +783,9 @@ int __init nvdimm_bus_init(void)
 {
int rc;
 
+   BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
+   BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
+
rc = bus_register(_bus_type);
if (rc)
return rc;
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 7cc28ab05b87..59c61e018a86 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, Intel Corporation.
+ * Copyright (c) 2014-2016, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU Lesser General Public License,
@@ -20,11 +20,45 @@ struct nd_cmd_smart {
__u8 data[128];
 } __packed;
 
+#define ND_SMART_HEALTH_VALID  (1 << 0)
+#define ND_SMART_TEMP_VALID(1 << 1)
+#define ND_SMART_SPARES_VALID  (1 << 2)
+#define ND_SMART_ALARM_VALID   (1 << 3)
+#define ND_SMART_USED_VALID(1 << 4)
+#define ND_SMART_SHUTDOWN_VALID(1 << 5)
+#define ND_SMART_VENDOR_VALID  (1 << 6)
+#define ND_SMART_TEMP_TRIP (1 << 0)
+#define ND_SMART_SPARE_TRIP(1 << 1)
+#define ND_SMART_NON_CRITICAL_HEALTH   (1 << 0)
+#define ND_SMART_CRITICAL_HEALTH   (1 << 1)
+#define ND_SMART_FATAL_HEALTH  (1 << 2)
+
+struct nd_smart_payload {
+   __u32 flags;
+   __u8 reserved0[4];
+   __u8 health;
+   __u16 temperature;
+   __u8 spares;
+   __u8 alarm_flags;
+   __u8 life_used;
+   __u8 shutdown_state;
+   __u8 reserved1;
+   __u32 vendor_size;
+   __u8 vendor_data[108];
+} __packed;
+
 struct nd_cmd_smart_threshold {
__u32 status;
__u8 data[8];
 } __packed;
 
+struct nd_smart_threshold_payload {
+   __u16 alarm_control;
+   __u16 temperature;
+   __u8 spares;
+   __u8 reserved[3];
+} __packed;
+
 struct nd_cmd_dimm_flags {
__u32 status;
__u32 flags;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 3187322eeed7..d1c98d4386d4 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -330,6 +330,42 @@ static int nfit_test_cmd_clear_error(struct 
nd_cmd_clear_error *clear_err,
return 0;
 }
 
+static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int 
buf_len)
+{
+   static const struct nd_smart_payload smart_data = {
+   .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
+   | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
+   | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
+   .health = ND_SMART_NON_CRITICAL_HEALTH,
+   .temperature = 23 * 16,
+   .spares = 75,
+   .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
+   .life_used = 5,
+   .shutdown_state = 0,
+   .vendor_size = 0,
+   };
+
+   if (buf_len < sizeof(*smart))
+   return -EINVAL;
+   memcpy(smart->data, _data, sizeof(smart_data));
+   return 0;
+}
+
+static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold 
*smart_t,
+   unsigned int buf_len)
+{
+   static const struct nd_smart_threshold_payload smart_t_data = {
+   .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
+   .temperature = 40 * 16,
+   .spares = 5,
+   };
+
+   if (buf_len < sizeof(*smart_t))
+   return -EINVAL;
+   memcpy(smart_t->data, _t_data, sizeof(smart_t_data));
+   return 0;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -368,6 +404,12 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor 
*nd_desc,
rc = nfit_test_cmd_set_config_data(buf, buf_len,
t->label[i]);
break;
+   case ND_CMD_SMART:
+   rc =

[PATCH v2] libnvdimm, test: add mock SMART data payload

2016-04-07 Thread Dan Williams

Provide simulated SMART data to enable the ndctl implementation of SMART
data retrieval and parsing.

The payload is defined here, "Section 4.1 SMART and Health Info
(Function Index 1)":

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

Signed-off-by: Dan Williams 
---
Changes in v2:
* #define the ND_SMART_ flags definitions (jth)
* add a struct nd_smart_threshold_payload definition
* add a mock implementation of smart threshold data

 drivers/nvdimm/bus.c |3 +++
 include/uapi/linux/ndctl.h   |   36 ++-
 tools/testing/nvdimm/test/nfit.c |   44 ++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 19f822d7f652..8111b1299515 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -783,6 +783,9 @@ int __init nvdimm_bus_init(void)
 {
int rc;
 
+   BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
+   BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
+
rc = bus_register(_bus_type);
if (rc)
return rc;
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 7cc28ab05b87..59c61e018a86 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, Intel Corporation.
+ * Copyright (c) 2014-2016, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU Lesser General Public License,
@@ -20,11 +20,45 @@ struct nd_cmd_smart {
__u8 data[128];
 } __packed;
 
+#define ND_SMART_HEALTH_VALID  (1 << 0)
+#define ND_SMART_TEMP_VALID(1 << 1)
+#define ND_SMART_SPARES_VALID  (1 << 2)
+#define ND_SMART_ALARM_VALID   (1 << 3)
+#define ND_SMART_USED_VALID(1 << 4)
+#define ND_SMART_SHUTDOWN_VALID(1 << 5)
+#define ND_SMART_VENDOR_VALID  (1 << 6)
+#define ND_SMART_TEMP_TRIP (1 << 0)
+#define ND_SMART_SPARE_TRIP(1 << 1)
+#define ND_SMART_NON_CRITICAL_HEALTH   (1 << 0)
+#define ND_SMART_CRITICAL_HEALTH   (1 << 1)
+#define ND_SMART_FATAL_HEALTH  (1 << 2)
+
+struct nd_smart_payload {
+   __u32 flags;
+   __u8 reserved0[4];
+   __u8 health;
+   __u16 temperature;
+   __u8 spares;
+   __u8 alarm_flags;
+   __u8 life_used;
+   __u8 shutdown_state;
+   __u8 reserved1;
+   __u32 vendor_size;
+   __u8 vendor_data[108];
+} __packed;
+
 struct nd_cmd_smart_threshold {
__u32 status;
__u8 data[8];
 } __packed;
 
+struct nd_smart_threshold_payload {
+   __u16 alarm_control;
+   __u16 temperature;
+   __u8 spares;
+   __u8 reserved[3];
+} __packed;
+
 struct nd_cmd_dimm_flags {
__u32 status;
__u32 flags;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 3187322eeed7..d1c98d4386d4 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -330,6 +330,42 @@ static int nfit_test_cmd_clear_error(struct 
nd_cmd_clear_error *clear_err,
return 0;
 }
 
+static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int 
buf_len)
+{
+   static const struct nd_smart_payload smart_data = {
+   .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
+   | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
+   | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
+   .health = ND_SMART_NON_CRITICAL_HEALTH,
+   .temperature = 23 * 16,
+   .spares = 75,
+   .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
+   .life_used = 5,
+   .shutdown_state = 0,
+   .vendor_size = 0,
+   };
+
+   if (buf_len < sizeof(*smart))
+   return -EINVAL;
+   memcpy(smart->data, _data, sizeof(smart_data));
+   return 0;
+}
+
+static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold 
*smart_t,
+   unsigned int buf_len)
+{
+   static const struct nd_smart_threshold_payload smart_t_data = {
+   .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
+   .temperature = 40 * 16,
+   .spares = 5,
+   };
+
+   if (buf_len < sizeof(*smart_t))
+   return -EINVAL;
+   memcpy(smart_t->data, _t_data, sizeof(smart_t_data));
+   return 0;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -368,6 +404,12 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor 
*nd_desc,
rc = nfit_test_cmd_set_config_data(buf, buf_len,
t->label[i]);
break;
+   case ND_CMD_SMART:
+   rc = nfit_test_cmd_smart(buf,

Re: [PATCH v4 08/14] apm32: remove paravirt_enabled() use

On Thu, Apr 07, 2016 at 09:08:36AM -0400, Boris Ostrovsky wrote:
> On 04/06/2016 08:06 PM, Luis R. Rodriguez wrote:
> >There is already a check for apm_info.bios == 0, the
> >apm_info.bios is set from the boot_params.apm_bios_info.
> >Both Xen and lguest, which are also the only ones that set
> >paravirt_enabled to true, never set the apm_bios.info. The
> >
> >Xen folks are sure force disable to 0 is not needed,
> 
> Because apm_info lives in .bss (which we recently made sure is
> cleared on Xen PV). May be worth mentioning in the commit message so
> that we don't forget why this is not needed.

Thanks, I'll change that last paragraph with:

Xen folks are sure force disable to 0 is not needed because
apm_info lives in .bss, we recently forced disabled this on
lguest, and on the Xen side just to be sure Boris zeroed out
the .bss for PV guests through commit 04b6b4a56884327c1648
("xen/x86: Zero out .bss for PV guests"). With this care taken
into consideration the paravirt_enabled() check is simply not
needed anymore.

> I think you also have this statement in other patches.

Indeed, I'll highlight this on the tboot commit log as well.

  Luis

Re: [PATCH v4 08/14] apm32: remove paravirt_enabled() use

On Thu, Apr 07, 2016 at 09:08:36AM -0400, Boris Ostrovsky wrote:
> On 04/06/2016 08:06 PM, Luis R. Rodriguez wrote:
> >There is already a check for apm_info.bios == 0, the
> >apm_info.bios is set from the boot_params.apm_bios_info.
> >Both Xen and lguest, which are also the only ones that set
> >paravirt_enabled to true, never set the apm_bios.info. The
> >
> >Xen folks are sure force disable to 0 is not needed,
> 
> Because apm_info lives in .bss (which we recently made sure is
> cleared on Xen PV). May be worth mentioning in the commit message so
> that we don't forget why this is not needed.

Thanks, I'll change that last paragraph with:

Xen folks are sure force disable to 0 is not needed because
apm_info lives in .bss, we recently forced disabled this on
lguest, and on the Xen side just to be sure Boris zeroed out
the .bss for PV guests through commit 04b6b4a56884327c1648
("xen/x86: Zero out .bss for PV guests"). With this care taken
into consideration the paravirt_enabled() check is simply not
needed anymore.

> I think you also have this statement in other patches.

Indeed, I'll highlight this on the tboot commit log as well.

  Luis

Re: [PATCH v4] ARM: dts: kirkwood: Add DTS for Linksys EA4200v2/EA4500\

2016-04-07 Thread Gregory CLEMENT

Hi Bert,
 
 On mar., avril 05 2016, "Imre Kaloz"  wrote:

> On Mon, 04 Apr 2016 16:55:28 +0200, Andrew Lunn  wrote:
>
>> On Mon, Apr 04, 2016 at 04:46:07PM +0200, Bert Vermeulen wrote:
>>> This platform is based on a Marvell 88E6282 SoC and 88E6171 switch.
>>>
>>> Signed-off-by: Bert Vermeulen 
>>
>> Reviewed-by: Andrew Lunn 
>
> Reviewed-by: Imre Kaloz 


Applied on mvebu/dt with the reviewed-by flags and a fix on the comment
block style.

Thanks,

Gregory


>
>
> Thanks,
>
> Imre

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

Re: [PATCH v4] ARM: dts: kirkwood: Add DTS for Linksys EA4200v2/EA4500\

2016-04-07 Thread Gregory CLEMENT

Hi Bert,
 
 On mar., avril 05 2016, "Imre Kaloz"  wrote:

> On Mon, 04 Apr 2016 16:55:28 +0200, Andrew Lunn  wrote:
>
>> On Mon, Apr 04, 2016 at 04:46:07PM +0200, Bert Vermeulen wrote:
>>> This platform is based on a Marvell 88E6282 SoC and 88E6171 switch.
>>>
>>> Signed-off-by: Bert Vermeulen 
>>
>> Reviewed-by: Andrew Lunn 
>
> Reviewed-by: Imre Kaloz 


Applied on mvebu/dt with the reviewed-by flags and a fix on the comment
block style.

Thanks,

Gregory


>
>
> Thanks,
>
> Imre

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

Re: [PATCH] clocksource: Add missing include of of.h.

On Wed, Mar 9, 2016 at 4:16 PM, David Lechner  wrote:
> This header uses OF_DELCARE_1 which is defined in linux/of.h.
>
> This fixes getting unhelpful compiler error messages about missing ')'
> before a string constant.
>
> Signed-off-by: David Lechner 


Queued for testing.

thanks
-john

Re: [PATCH] clocksource: use clocksource_freq2mult() helper

On Wed, Mar 16, 2016 at 3:21 AM, Alexander Kuleshov
 wrote:
> which is introduced in the 7aca0c072 commit to simplify calculation of
> the mult and shift in the clocks_calc_mult_shift().
>
> Signed-off-by: Alexander Kuleshov 

Queued for testing.

thanks
-john

Re: [PATCH] clocksource: Add missing include of of.h.

On Wed, Mar 9, 2016 at 4:16 PM, David Lechner  wrote:
> This header uses OF_DELCARE_1 which is defined in linux/of.h.
>
> This fixes getting unhelpful compiler error messages about missing ')'
> before a string constant.
>
> Signed-off-by: David Lechner 


Queued for testing.

thanks
-john

Re: [PATCH] clocksource: use clocksource_freq2mult() helper

On Wed, Mar 16, 2016 at 3:21 AM, Alexander Kuleshov
 wrote:
> which is introduced in the 7aca0c072 commit to simplify calculation of
> the mult and shift in the clocks_calc_mult_shift().
>
> Signed-off-by: Alexander Kuleshov 

Queued for testing.

thanks
-john

Greetings My Dear Friend

2016-04-07 Thread Mrs. Hayati Al-Mashood

Greetings My Dear Friend

I had no other means to contact you for this business transaction other than 
through email. I am sorry if you will be offended by my message. I am Mrs. 
Hayati Al-Mashood the Head of Accounting Audit Department of Empire Bank here 
in Dubai, United Arab Emirates. I discovered a sum of $25.5 Million USD in an 
account that belongs to one of our foreign deceased customers who died of heart 
attack here in Dubai, United Arab Emirates and he was from your country. Our 
bank has been waiting for any of his relatives to come to claim the money but 
nobody has done that SINCE 2010.

I seek your consent as my foreign business partner in this transaction to 
present you as the next of kin/Beneficiary to the deceased,so that the funds 
will be released to you because, you fit in to stand as the next of kin of our 
late client because you have the same last name as our late client and am sure 
that the $25.5 Million USD will be released to you and we share 50% for you, 
50% for me

Get back to me with your Full names and direct telephone numbers for more 
details

Best Regards
Mrs. Hayati Al-Mashood

Greetings My Dear Friend

2016-04-07 Thread Mrs. Hayati Al-Mashood

Greetings My Dear Friend

I had no other means to contact you for this business transaction other than 
through email. I am sorry if you will be offended by my message. I am Mrs. 
Hayati Al-Mashood the Head of Accounting Audit Department of Empire Bank here 
in Dubai, United Arab Emirates. I discovered a sum of $25.5 Million USD in an 
account that belongs to one of our foreign deceased customers who died of heart 
attack here in Dubai, United Arab Emirates and he was from your country. Our 
bank has been waiting for any of his relatives to come to claim the money but 
nobody has done that SINCE 2010.

I seek your consent as my foreign business partner in this transaction to 
present you as the next of kin/Beneficiary to the deceased,so that the funds 
will be released to you because, you fit in to stand as the next of kin of our 
late client because you have the same last name as our late client and am sure 
that the $25.5 Million USD will be released to you and we share 50% for you, 
50% for me

Get back to me with your Full names and direct telephone numbers for more 
details

Best Regards
Mrs. Hayati Al-Mashood

Deadlock related to file permissions and/or cgroup, 4.4.6+

2016-04-07 Thread Ben Greear


This is from a modified 4.4.6+ kernel, with local patches.  Git tree found
below, but I don't think this lockup is related to any local changes we have 
made.

http://dmz2.candelatech.com/?p=linux-4.4.dev.y/.git;a=summary

The test case involves using a libcurl based application that
is making an ftp request to a second port on the same machine.
vsftp is serving up the ftp file.
The ports are looped together with an ethernet cable, and routing rules
are set up so that traffic flows over the external interface.

The key change from a working solution and kernel deadlock, is that
with the file-to-be-read has permissions 700, it fails, but 600
does not.  (As I was writing this up, our system-test guy managed to
lock it up with 600 permissions as well, so it is not *just* related to
permission 700).

This is very repeatable permissions 700.

The tainting probably comes from a warning in another (GPL, but out-of-tree 
module that we write),
but very unlikely that has anything to do with this issue.


INFO: task systemd:1 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
systemd D 88006b0ffa60 0 1  0 0x
 88006b0ffa60 00ff811875d3 88006bc9ff98 88006bc9ff80
 88006b0f0008 880069f24c00 88006b0f 88006b0f8000
 88006b0ffac0 0040 8375c6a0 8375c698
Call Trace:
 [] schedule+0xc3/0xe0
 [] percpu_down_write+0x196/0x1d9
 [] ? percpu_up_read+0x4f/0x4f
 [] ? wake_up_atomic_t+0x27/0x27
 [] __cgroup_procs_write+0xd3/0x36f
 [] ? cgroup_attach_task_all+0xba/0xba
 [] ? match_held_lock+0x30/0x103
 [] ? __lock_is_held+0x29/0x64
 [] cgroup_procs_write+0xf/0x11
 [] cgroup_file_write+0xb8/0x1c7
 [] ? rcu_read_unlock+0x5d/0x5d
 [] ? __lock_is_held+0x4a/0x64
 [] ? lock_is_held+0x73/0x7f
 [] ? rcu_read_unlock+0x5d/0x5d
 [] kernfs_fop_write+0x16d/0x1ce
 [] __vfs_write+0xca/0x1e2
 [] ? __vfs_read+0x1da/0x1da
 [] ? __lock_is_held+0x29/0x64
 [] ? lock_is_held+0x73/0x7f
 [] ? rcu_read_lock_sched_held+0x60/0x68
 [] ? rcu_sync_lockdep_assert+0x3c/0x62
 [] ? update_fast_ctr+0x36/0x58
 [] ? percpu_down_read+0x49/0x72
 [] ? __sb_start_write+0x5a/0xab
 [] ? __sb_start_write+0x5a/0xab
 [] vfs_write+0xb5/0x116
 [] SyS_write+0xb5/0x10f
 [] ? SyS_read+0x10f/0x10f
 [] ? trace_hardirqs_on_caller+0x209/0x250
 [] ? trace_hardirqs_on_thunk+0x17/0x19
 [] entry_SYSCALL_64_fastpath+0x16/0x7a
4 locks held by systemd/1:
 #0:  (sb_writers#6){.+.+.+}, at: [] 
__sb_start_write+0x5a/0xab
 #1:  (>mutex){+.+.+.}, at: [] kernfs_fop_write+0xcc/0x1ce
 #2:  (cgroup_mutex){+.+.+.}, at: [] 
cgroup_kn_lock_live+0x8c/0xec
 #3:  (_threadgroup_rwsem){++}, at: [] 
percpu_down_write+0x84/0x1d9
INFO: task kworker/u8:0:6 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/u8:0D 88006b16f890 0 6  2 0x
Workqueue: netns cleanup_net
 88006b16f890 001d 88006bd9ff98 88006bd9ff80
 88006b142608 880051eaa600 88006b142600 88006b168000
 11000d62df18 88006b142600 88006b16fa58 88006b142600
Call Trace:
 [] schedule+0xc3/0xe0
 [] schedule_timeout+0x86/0x181
 [] ? usleep_range+0xc2/0xc2
 [] ? mark_held_locks+0x2d/0x90
 [] ? _raw_spin_unlock_irq+0x27/0x3f
 [] ? trace_hardirqs_on_caller+0x209/0x250
 [] __wait_for_common+0x1ee/0x2a3
 [] ? __wait_for_common+0x1ee/0x2a3
 [] ? mark_held_locks+0x2d/0x90
 [] ? usleep_range+0xc2/0xc2
 [] ? out_of_line_wait_on_bit_lock+0xdd/0xdd
 [] ? wake_up_q+0x5e/0x5e
 [] ? try_to_wake_up+0x2c3/0x2d5
 [] wait_for_completion+0x1f/0x21
 [] kthread_stop+0x132/0x330
 [] pg_net_exit+0x185/0x210 [pktgen]
 [] ? pktgen_device_event+0x181/0x181 [pktgen]
 [] ? rcu_read_unlock+0x5b/0x5d [8021q]
 [] ops_exit_list+0x6b/0x88
 [] cleanup_net+0x2b7/0x3cd
 [] ? net_drop_ns+0x3d/0x3d
 [] ? match_held_lock+0x30/0x103
 [] ? lock_is_held+0x73/0x7f
 [] process_one_work+0x3ed/0x77c
 [] ? pool_mayday_timeout+0x1d9/0x1d9
 [] ? lock_acquire+0x193/0x233
 [] ? worker_clr_flags+0x71/0xa9
 [] worker_thread+0x2ba/0x3c2
 [] ? rescuer_thread+0x534/0x534
 [] kthread+0x162/0x171
 [] ? kthread_parkme+0x2d/0x2d
 [] ? kthread_parkme+0x2d/0x2d
 [] ret_from_fork+0x3f/0x70
 [] ? kthread_parkme+0x2d/0x2d
3 locks held by kworker/u8:0/6:
 #0:  ("%s""netns"){.+.+.+}, at: [] 
process_one_work+0x28b/0x77c
 #1:  (net_cleanup_work){+.+.+.}, at: [] 
process_one_work+0x28b/0x77c
 #2:  (net_mutex){+.+.+.}, at: [] cleanup_net+0x10f/0x3cd
INFO: task kworker/dying:20 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/dying   D 88006b2efaa0 020  2 0x
 88006b2efaa0 00ff88006b2d53c8 88006bd1ff98 88006bd1ff80
 88006b2d4c08 8800684f8000 88006b2d4c00 88006b2e8000
 88006b2d4c00

Deadlock related to file permissions and/or cgroup, 4.4.6+

2016-04-07 Thread Ben Greear


This is from a modified 4.4.6+ kernel, with local patches.  Git tree found
below, but I don't think this lockup is related to any local changes we have 
made.

http://dmz2.candelatech.com/?p=linux-4.4.dev.y/.git;a=summary

The test case involves using a libcurl based application that
is making an ftp request to a second port on the same machine.
vsftp is serving up the ftp file.
The ports are looped together with an ethernet cable, and routing rules
are set up so that traffic flows over the external interface.

The key change from a working solution and kernel deadlock, is that
with the file-to-be-read has permissions 700, it fails, but 600
does not.  (As I was writing this up, our system-test guy managed to
lock it up with 600 permissions as well, so it is not *just* related to
permission 700).

This is very repeatable permissions 700.

The tainting probably comes from a warning in another (GPL, but out-of-tree 
module that we write),
but very unlikely that has anything to do with this issue.


INFO: task systemd:1 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
systemd D 88006b0ffa60 0 1  0 0x
 88006b0ffa60 00ff811875d3 88006bc9ff98 88006bc9ff80
 88006b0f0008 880069f24c00 88006b0f 88006b0f8000
 88006b0ffac0 0040 8375c6a0 8375c698
Call Trace:
 [] schedule+0xc3/0xe0
 [] percpu_down_write+0x196/0x1d9
 [] ? percpu_up_read+0x4f/0x4f
 [] ? wake_up_atomic_t+0x27/0x27
 [] __cgroup_procs_write+0xd3/0x36f
 [] ? cgroup_attach_task_all+0xba/0xba
 [] ? match_held_lock+0x30/0x103
 [] ? __lock_is_held+0x29/0x64
 [] cgroup_procs_write+0xf/0x11
 [] cgroup_file_write+0xb8/0x1c7
 [] ? rcu_read_unlock+0x5d/0x5d
 [] ? __lock_is_held+0x4a/0x64
 [] ? lock_is_held+0x73/0x7f
 [] ? rcu_read_unlock+0x5d/0x5d
 [] kernfs_fop_write+0x16d/0x1ce
 [] __vfs_write+0xca/0x1e2
 [] ? __vfs_read+0x1da/0x1da
 [] ? __lock_is_held+0x29/0x64
 [] ? lock_is_held+0x73/0x7f
 [] ? rcu_read_lock_sched_held+0x60/0x68
 [] ? rcu_sync_lockdep_assert+0x3c/0x62
 [] ? update_fast_ctr+0x36/0x58
 [] ? percpu_down_read+0x49/0x72
 [] ? __sb_start_write+0x5a/0xab
 [] ? __sb_start_write+0x5a/0xab
 [] vfs_write+0xb5/0x116
 [] SyS_write+0xb5/0x10f
 [] ? SyS_read+0x10f/0x10f
 [] ? trace_hardirqs_on_caller+0x209/0x250
 [] ? trace_hardirqs_on_thunk+0x17/0x19
 [] entry_SYSCALL_64_fastpath+0x16/0x7a
4 locks held by systemd/1:
 #0:  (sb_writers#6){.+.+.+}, at: [] 
__sb_start_write+0x5a/0xab
 #1:  (>mutex){+.+.+.}, at: [] kernfs_fop_write+0xcc/0x1ce
 #2:  (cgroup_mutex){+.+.+.}, at: [] 
cgroup_kn_lock_live+0x8c/0xec
 #3:  (_threadgroup_rwsem){++}, at: [] 
percpu_down_write+0x84/0x1d9
INFO: task kworker/u8:0:6 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/u8:0D 88006b16f890 0 6  2 0x
Workqueue: netns cleanup_net
 88006b16f890 001d 88006bd9ff98 88006bd9ff80
 88006b142608 880051eaa600 88006b142600 88006b168000
 11000d62df18 88006b142600 88006b16fa58 88006b142600
Call Trace:
 [] schedule+0xc3/0xe0
 [] schedule_timeout+0x86/0x181
 [] ? usleep_range+0xc2/0xc2
 [] ? mark_held_locks+0x2d/0x90
 [] ? _raw_spin_unlock_irq+0x27/0x3f
 [] ? trace_hardirqs_on_caller+0x209/0x250
 [] __wait_for_common+0x1ee/0x2a3
 [] ? __wait_for_common+0x1ee/0x2a3
 [] ? mark_held_locks+0x2d/0x90
 [] ? usleep_range+0xc2/0xc2
 [] ? out_of_line_wait_on_bit_lock+0xdd/0xdd
 [] ? wake_up_q+0x5e/0x5e
 [] ? try_to_wake_up+0x2c3/0x2d5
 [] wait_for_completion+0x1f/0x21
 [] kthread_stop+0x132/0x330
 [] pg_net_exit+0x185/0x210 [pktgen]
 [] ? pktgen_device_event+0x181/0x181 [pktgen]
 [] ? rcu_read_unlock+0x5b/0x5d [8021q]
 [] ops_exit_list+0x6b/0x88
 [] cleanup_net+0x2b7/0x3cd
 [] ? net_drop_ns+0x3d/0x3d
 [] ? match_held_lock+0x30/0x103
 [] ? lock_is_held+0x73/0x7f
 [] process_one_work+0x3ed/0x77c
 [] ? pool_mayday_timeout+0x1d9/0x1d9
 [] ? lock_acquire+0x193/0x233
 [] ? worker_clr_flags+0x71/0xa9
 [] worker_thread+0x2ba/0x3c2
 [] ? rescuer_thread+0x534/0x534
 [] kthread+0x162/0x171
 [] ? kthread_parkme+0x2d/0x2d
 [] ? kthread_parkme+0x2d/0x2d
 [] ret_from_fork+0x3f/0x70
 [] ? kthread_parkme+0x2d/0x2d
3 locks held by kworker/u8:0/6:
 #0:  ("%s""netns"){.+.+.+}, at: [] 
process_one_work+0x28b/0x77c
 #1:  (net_cleanup_work){+.+.+.}, at: [] 
process_one_work+0x28b/0x77c
 #2:  (net_mutex){+.+.+.}, at: [] cleanup_net+0x10f/0x3cd
INFO: task kworker/dying:20 blocked for more than 180 seconds.
  Tainted: GW  O4.4.6+ #23
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
kworker/dying   D 88006b2efaa0 020  2 0x
 88006b2efaa0 00ff88006b2d53c8 88006bd1ff98 88006bd1ff80
 88006b2d4c08 8800684f8000 88006b2d4c00 88006b2e8000
 88006b2d4c00

Re: [RFT PATCH 0/4] usb: dwc2: Fix core reset and force mode delay problems

2016-04-07 Thread John Youn

On 4/7/2016 1:36 PM, Michael Niewoehner wrote:
> 
> Am 07.04.2016 um 20:41 schrieb John Youn :
> 
>> On 3/31/2016 2:44 PM, Michael Niewoehner wrote:
>>> Hi John,
>>>
>>> Am 29.03.2016 um 04:36 schrieb John Youn :
>>>
 Hi,

 The following patch series addresses the core reset and force mode
 delay problems we have been seeing on dwc2 for some platforms.

 I think I have identified the source of the inconsistencies between
 platforms and this series attempts to address them.

 Basically everything stems from the IDDIG debounce filter delay, which
 is a function of the PHY clock speed and can range from 5-50 ms if
 enabled. This delay must be taken into account on core reset and force
 modes. A full explanation is provided in the patch commit log and code
 comments.

 The first two patches are prerequisites to the force mode fixes,
 including one patch that was sent separately by Przemek Rudy. I have
 resubmitted it with this series for convenience.

 Please help by reviewing and testing on your platforms.

 Patches were tested on:
 * Synopsys HAPS platform IP 3.20a OTG, dr_mode=OTG,HOST,PERIPHERAL

 Regards,
 John

 John Youn (3):
 usb: dwc2: gadget: Only initialize device if in device mode
 usb: dwc2: Add delay to core soft reset
 usb: dwc2: Properly account for the force mode delays

 Przemek Rudy (1):
 usb: dwc2: do not override forced dr_mode in gadget setup

 drivers/usb/dwc2/core.c | 195 
 
 drivers/usb/dwc2/core.h |   2 +-
 drivers/usb/dwc2/gadget.c   |  30 +--
 drivers/usb/dwc2/hcd.c  |   6 +-
 drivers/usb/dwc2/hw.h   |   1 +
 drivers/usb/dwc2/platform.c |   9 +-
 6 files changed, 161 insertions(+), 82 deletions(-)

 -- 
 2.7.4

>>>
>>> after applying your patch series on v4.6-rc1 usb keeps being broken on 
>>> rk3188.
>>> Besides that I get "dwc2 1018.usb: dwc2_wait_for_mode: Couldn't set 
>>> host mode“ repeatedly.
>>>
>>> Currently this works for me:
>>> - Revert "usb: dwc2: Fix probe problem on bcm2835“
>>> - Apply "usb: dwc2: Add a 10 ms delay to dwc2_core_reset()"
>>>
>>>
>>> Best regards
>>> Michael
>>>
>>
>> Thanks Michael.
>>
>> I won't be able to look at this again until next week. In the meantime
>> could you provide a driver log? In particular I want to see the values
>> of your GHWCFG registers, and where you are seeing the
>> dwc2_wait_for_mode() failure.
>>
>> Regards,
>> John
> 
> Looks like the problem is gone on -rc2… on -rc1 the errors came up shortly 
> after "dwc2 1018.usb“ messages.
> USB keeps being broken, though. The USB hub is detected but nothing that is 
> attached to it.
> 
> Here are the logs and register values for each test with Doug’s and your 
> patches.
> 
> Michael
> 
> 
> good usb, Doug's patches
> 
> [0.420125] usbcore: registered new interface driver usbfs 
>   
> [0.426246] usbcore: registered new interface driver hub   
>   
> [0.432296] usbcore: registered new device driver usb
> [...]
> [0.853769] 1018.usb supply vusb_d not found, using dummy regulator
>   
> [0.860560] 1018.usb supply vusb_a not found, using dummy regulator
>   
> [0.867365] dwc2 1018.usb: Configuration mismatch. dr_mode forced to 
> host
> [0.977737] dwc2 1018.usb: 128 invalid for host_nperio_tx_fifo_size. 
> Check H.
> [0.986562] dwc2 1018.usb: 256 invalid for host_perio_tx_fifo_size. 
> Check HW.
> [1.047959] dwc2 1018.usb: DWC OTG Controller  
>   
> [1.052732] dwc2 1018.usb: new USB bus registered, assigned bus number 
> 1 
> [1.059868] dwc2 1018.usb: irq 24, io mem 0x   
>   
> [1.065586] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002  
>   
> [1.072430] usb usb1: New USB device strings: Mfr=3, Product=2, 
> SerialNumber=1   
> [1.079706] usb usb1: Product: DWC OTG Controller  
>   
> [1.084432] usb usb1: Manufacturer: Linux 4.6.0-rc2+ dwc2_hsotg
>   
> [1.090390] usb usb1: SerialNumber: 1018.usb   
>   
> [1.096000] hub 1-0:1.0: USB hub found 
>   
> [1.099884] hub 1-0:1.0: 1 port detected   
>   
> [1.104668] 101c.usb supply vusb_d not found, using dummy regulator
>   
> [1.111428] 101c.usb supply vusb_a not found, using dummy regulator
>   
> [1.247968] dwc2 101c.usb: DWC OTG Controller  
>   
> [1.252743] dwc2 101c.usb: new USB bus registered, assigned bus number 
> 2 
> [1.259879] dwc2 101c.usb: irq 25,

Re: [RFT PATCH 0/4] usb: dwc2: Fix core reset and force mode delay problems

2016-04-07 Thread John Youn

On 4/7/2016 1:36 PM, Michael Niewoehner wrote:
> 
> Am 07.04.2016 um 20:41 schrieb John Youn :
> 
>> On 3/31/2016 2:44 PM, Michael Niewoehner wrote:
>>> Hi John,
>>>
>>> Am 29.03.2016 um 04:36 schrieb John Youn :
>>>
 Hi,

 The following patch series addresses the core reset and force mode
 delay problems we have been seeing on dwc2 for some platforms.

 I think I have identified the source of the inconsistencies between
 platforms and this series attempts to address them.

 Basically everything stems from the IDDIG debounce filter delay, which
 is a function of the PHY clock speed and can range from 5-50 ms if
 enabled. This delay must be taken into account on core reset and force
 modes. A full explanation is provided in the patch commit log and code
 comments.

 The first two patches are prerequisites to the force mode fixes,
 including one patch that was sent separately by Przemek Rudy. I have
 resubmitted it with this series for convenience.

 Please help by reviewing and testing on your platforms.

 Patches were tested on:
 * Synopsys HAPS platform IP 3.20a OTG, dr_mode=OTG,HOST,PERIPHERAL

 Regards,
 John

 John Youn (3):
 usb: dwc2: gadget: Only initialize device if in device mode
 usb: dwc2: Add delay to core soft reset
 usb: dwc2: Properly account for the force mode delays

 Przemek Rudy (1):
 usb: dwc2: do not override forced dr_mode in gadget setup

 drivers/usb/dwc2/core.c | 195 
 
 drivers/usb/dwc2/core.h |   2 +-
 drivers/usb/dwc2/gadget.c   |  30 +--
 drivers/usb/dwc2/hcd.c  |   6 +-
 drivers/usb/dwc2/hw.h   |   1 +
 drivers/usb/dwc2/platform.c |   9 +-
 6 files changed, 161 insertions(+), 82 deletions(-)

 -- 
 2.7.4

>>>
>>> after applying your patch series on v4.6-rc1 usb keeps being broken on 
>>> rk3188.
>>> Besides that I get "dwc2 1018.usb: dwc2_wait_for_mode: Couldn't set 
>>> host mode“ repeatedly.
>>>
>>> Currently this works for me:
>>> - Revert "usb: dwc2: Fix probe problem on bcm2835“
>>> - Apply "usb: dwc2: Add a 10 ms delay to dwc2_core_reset()"
>>>
>>>
>>> Best regards
>>> Michael
>>>
>>
>> Thanks Michael.
>>
>> I won't be able to look at this again until next week. In the meantime
>> could you provide a driver log? In particular I want to see the values
>> of your GHWCFG registers, and where you are seeing the
>> dwc2_wait_for_mode() failure.
>>
>> Regards,
>> John
> 
> Looks like the problem is gone on -rc2… on -rc1 the errors came up shortly 
> after "dwc2 1018.usb“ messages.
> USB keeps being broken, though. The USB hub is detected but nothing that is 
> attached to it.
> 
> Here are the logs and register values for each test with Doug’s and your 
> patches.
> 
> Michael
> 
> 
> good usb, Doug's patches
> 
> [0.420125] usbcore: registered new interface driver usbfs 
>   
> [0.426246] usbcore: registered new interface driver hub   
>   
> [0.432296] usbcore: registered new device driver usb
> [...]
> [0.853769] 1018.usb supply vusb_d not found, using dummy regulator
>   
> [0.860560] 1018.usb supply vusb_a not found, using dummy regulator
>   
> [0.867365] dwc2 1018.usb: Configuration mismatch. dr_mode forced to 
> host
> [0.977737] dwc2 1018.usb: 128 invalid for host_nperio_tx_fifo_size. 
> Check H.
> [0.986562] dwc2 1018.usb: 256 invalid for host_perio_tx_fifo_size. 
> Check HW.
> [1.047959] dwc2 1018.usb: DWC OTG Controller  
>   
> [1.052732] dwc2 1018.usb: new USB bus registered, assigned bus number 
> 1 
> [1.059868] dwc2 1018.usb: irq 24, io mem 0x   
>   
> [1.065586] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002  
>   
> [1.072430] usb usb1: New USB device strings: Mfr=3, Product=2, 
> SerialNumber=1   
> [1.079706] usb usb1: Product: DWC OTG Controller  
>   
> [1.084432] usb usb1: Manufacturer: Linux 4.6.0-rc2+ dwc2_hsotg
>   
> [1.090390] usb usb1: SerialNumber: 1018.usb   
>   
> [1.096000] hub 1-0:1.0: USB hub found 
>   
> [1.099884] hub 1-0:1.0: 1 port detected   
>   
> [1.104668] 101c.usb supply vusb_d not found, using dummy regulator
>   
> [1.111428] 101c.usb supply vusb_a not found, using dummy regulator
>   
> [1.247968] dwc2 101c.usb: DWC OTG Controller  
>   
> [1.252743] dwc2 101c.usb: new USB bus registered, assigned bus number 
> 2 
> [1.259879] dwc2 101c.usb: irq 25, io mem 0x   
>   
>

Re: [PATCH] pinctrl: tegra: clear park bit for all pins

2016-04-07 Thread Stephen Warren


On 04/07/2016 03:37 PM, Rhyland Klein wrote:

Parking bits might not be cleared by the bootloader properly (if for
instance it doesn't use the device configured by that pin). Clear
the park bits for all the pins during pinctrl probe.

This is present on T210 platforms but not earlier ones, so for earlier
generations, set parked_reg = -1 to disable.

The park bit is used to prevent glitching when reprogramming pinctrl
registers.

Based on work by:
Shravani Dingari 

Signed-off-by: Rhyland Klein 


Acked-by: Stephen Warren

Re: [PATCH] pinctrl: tegra: clear park bit for all pins

2016-04-07 Thread Stephen Warren


On 04/07/2016 03:37 PM, Rhyland Klein wrote:

Parking bits might not be cleared by the bootloader properly (if for
instance it doesn't use the device configured by that pin). Clear
the park bits for all the pins during pinctrl probe.

This is present on T210 platforms but not earlier ones, so for earlier
generations, set parked_reg = -1 to disable.

The park bit is used to prevent glitching when reprogramming pinctrl
registers.

Based on work by:
Shravani Dingari 

Signed-off-by: Rhyland Klein 


Acked-by: Stephen Warren

Re: [RFC PATCH 0/3] restartable sequences v2: fast user-space percpu critical sections

On Thu, Apr 7, 2016 at 1:11 PM, Peter Zijlstra  wrote:
> On Thu, Apr 07, 2016 at 09:43:33AM -0700, Andy Lutomirski wrote:
>> More concretely, this looks like (using totally arbitrary register
>> assingments -- probably far from ideal, especially given how GCC's
>> constraints work):
>>
>> enter the critical section:
>> 1:
>> movq %[cpu], %%r12
>> movq {address of counter for our cpu}, %%r13
>> movq {some fresh value}, (%%r13)
>> cmpq %[cpu], %%r12
>> jne 1b
>>
>> ... do whatever setup or computation is needed...
>>
>> movq $%l[failed], %%rcx
>> movq $1f, %[commit_instr]
>> cmpq {whatever counter we chose}, (%%r13)
>> jne %l[failed]
>> cmpq %[cpu], %%r12
>> jne %l[failed]
>>
>> <-- a signal in here that conflicts with us would clobber (%%r13), and
>> the kernel would notice and send us to the failed label
>>
>> movq %[to_write], (%[target])
>> 1: movq $0, %[commit_instr]
>
> And the kernel, for every thread that has had the syscall called and a
> thingy registered, needs to (at preempt/signal-setup):
>
> if (get_user(post_commit_ip, current->post_commit_ip))
> return -EFAULT;
>
> if (likely(!post_commit_ip))
> return 0;
>
> if (regs->ip >= post_commit_ip)
> return 0;
>
> if (get_user(seq, (u32 __user *)regs->r13))
> return -EFAULT;
>
> if (regs->$(which one holds our chosen seq?) == seq) {
> /* nothing changed, do not cancel, proceed to commit. */
> return 0;

Only return zero if regs->${which one holds the cpu) == smp_processor_id().

> }
>
> if (put_user(0UL, current->post_commit_ip))
> return -EFAULT;
>
> regs->ip = regs->rcx;

I was imagining this happening at (return to userspace or preempt) and
possibly at signal return, but yes, more or less.

>
>
>> In contrast to Paul's scheme, this has two additional (highly
>> predictable) branches and requires generation of a seqcount in
>> userspace.  In its favor, though, it doesnt need preemption hooks,
>
> Without preemption hooks, how would one thread preempting another at the
> above <-- clobber anything and cause the commit to fail?

It doesn't, which is what I like about my variant.  If the thread
accesses the protected data structure, though, it should bump the
sequence count, which will cause the first thread to about when it
gets scheduled in.

>
>> it's inherently debuggable,
>
> It is more debuggable, agreed.
>
>> and it allows multiple independent
>> rseq-protected things to coexist without forcing each other to abort.
>
> And the kernel only needs to load the second cacheline if it lands in
> the middle of a finish block, which should be manageable overhead I
> suppose.
>
> But the userspace chunk is lots slower as it needs to always touch
> multiple lines, since the @cpu, @seq and @post_commit_ip all live in
> separate lines (although I suppose @cpu and @post_commit_ip could live
> in the same).
>
> The finish thing needs 3 registers for:
>
>  - fail ip
>  - seq pointer
>  - seq value
>
> Which I suppose is possible even on register constrained architectures
> like i386.

I think this can all be munged into two cachelines:

One cacheline contains the per-thread CPU number and post_commit_ip
(either by doing it over Linus' dead body or by having userspace
allocate it carefully).  The other contains the sequence counter *and*
the percpu data structure that's protected.  So in some sense it's the
same number of cache lines as Paul's version.

--Andy

-- 
Andy Lutomirski
AMA Capital Management, LLC

Re: [RFC PATCH 0/3] restartable sequences v2: fast user-space percpu critical sections

On Thu, Apr 7, 2016 at 1:11 PM, Peter Zijlstra  wrote:
> On Thu, Apr 07, 2016 at 09:43:33AM -0700, Andy Lutomirski wrote:
>> More concretely, this looks like (using totally arbitrary register
>> assingments -- probably far from ideal, especially given how GCC's
>> constraints work):
>>
>> enter the critical section:
>> 1:
>> movq %[cpu], %%r12
>> movq {address of counter for our cpu}, %%r13
>> movq {some fresh value}, (%%r13)
>> cmpq %[cpu], %%r12
>> jne 1b
>>
>> ... do whatever setup or computation is needed...
>>
>> movq $%l[failed], %%rcx
>> movq $1f, %[commit_instr]
>> cmpq {whatever counter we chose}, (%%r13)
>> jne %l[failed]
>> cmpq %[cpu], %%r12
>> jne %l[failed]
>>
>> <-- a signal in here that conflicts with us would clobber (%%r13), and
>> the kernel would notice and send us to the failed label
>>
>> movq %[to_write], (%[target])
>> 1: movq $0, %[commit_instr]
>
> And the kernel, for every thread that has had the syscall called and a
> thingy registered, needs to (at preempt/signal-setup):
>
> if (get_user(post_commit_ip, current->post_commit_ip))
> return -EFAULT;
>
> if (likely(!post_commit_ip))
> return 0;
>
> if (regs->ip >= post_commit_ip)
> return 0;
>
> if (get_user(seq, (u32 __user *)regs->r13))
> return -EFAULT;
>
> if (regs->$(which one holds our chosen seq?) == seq) {
> /* nothing changed, do not cancel, proceed to commit. */
> return 0;

Only return zero if regs->${which one holds the cpu) == smp_processor_id().

> }
>
> if (put_user(0UL, current->post_commit_ip))
> return -EFAULT;
>
> regs->ip = regs->rcx;

I was imagining this happening at (return to userspace or preempt) and
possibly at signal return, but yes, more or less.

>
>
>> In contrast to Paul's scheme, this has two additional (highly
>> predictable) branches and requires generation of a seqcount in
>> userspace.  In its favor, though, it doesnt need preemption hooks,
>
> Without preemption hooks, how would one thread preempting another at the
> above <-- clobber anything and cause the commit to fail?

It doesn't, which is what I like about my variant.  If the thread
accesses the protected data structure, though, it should bump the
sequence count, which will cause the first thread to about when it
gets scheduled in.

>
>> it's inherently debuggable,
>
> It is more debuggable, agreed.
>
>> and it allows multiple independent
>> rseq-protected things to coexist without forcing each other to abort.
>
> And the kernel only needs to load the second cacheline if it lands in
> the middle of a finish block, which should be manageable overhead I
> suppose.
>
> But the userspace chunk is lots slower as it needs to always touch
> multiple lines, since the @cpu, @seq and @post_commit_ip all live in
> separate lines (although I suppose @cpu and @post_commit_ip could live
> in the same).
>
> The finish thing needs 3 registers for:
>
>  - fail ip
>  - seq pointer
>  - seq value
>
> Which I suppose is possible even on register constrained architectures
> like i386.

I think this can all be munged into two cachelines:

One cacheline contains the per-thread CPU number and post_commit_ip
(either by doing it over Linus' dead body or by having userspace
allocate it carefully).  The other contains the sequence counter *and*
the percpu data structure that's protected.  So in some sense it's the
same number of cache lines as Paul's version.

--Andy

-- 
Andy Lutomirski
AMA Capital Management, LLC

Re: [PATCH] staging: android: ion: dummy: fix dereference of ERR_PTR

2016-04-07 Thread Laura Abbott


On 04/07/2016 09:32 AM, Sudip Mukherjee wrote:

ion_device_create() can fail and if it fails then it returns the error
value in ERR_PTR.



Reviewed-by: Laura Abbott 


Signed-off-by: Sudip Mukherjee 
---
  drivers/staging/android/ion/ion_dummy_driver.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/staging/android/ion/ion_dummy_driver.c 
b/drivers/staging/android/ion/ion_dummy_driver.c
index 5678870..806e76b 100644
--- a/drivers/staging/android/ion/ion_dummy_driver.c
+++ b/drivers/staging/android/ion/ion_dummy_driver.c
@@ -68,6 +68,8 @@ static int __init ion_dummy_init(void)
int i, err;

idev = ion_device_create(NULL);
+   if (IS_ERR(idev))
+   return PTR_ERR(idev);
heaps = kcalloc(dummy_ion_pdata.nr, sizeof(struct ion_heap *),
GFP_KERNEL);
if (!heaps)

Re: [PATCH] staging: android: ion: dummy: fix dereference of ERR_PTR

2016-04-07 Thread Laura Abbott


On 04/07/2016 09:32 AM, Sudip Mukherjee wrote:

ion_device_create() can fail and if it fails then it returns the error
value in ERR_PTR.



Reviewed-by: Laura Abbott 


Signed-off-by: Sudip Mukherjee 
---
  drivers/staging/android/ion/ion_dummy_driver.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/drivers/staging/android/ion/ion_dummy_driver.c 
b/drivers/staging/android/ion/ion_dummy_driver.c
index 5678870..806e76b 100644
--- a/drivers/staging/android/ion/ion_dummy_driver.c
+++ b/drivers/staging/android/ion/ion_dummy_driver.c
@@ -68,6 +68,8 @@ static int __init ion_dummy_init(void)
int i, err;

idev = ion_device_create(NULL);
+   if (IS_ERR(idev))
+   return PTR_ERR(idev);
heaps = kcalloc(dummy_ion_pdata.nr, sizeof(struct ion_heap *),
GFP_KERNEL);
if (!heaps)

Re: [PATCH] cpufreq: Skip all governor-related actions for cpufreq_suspended set

2016-04-07 Thread Rafael J. Wysocki

On Thursday, April 07, 2016 05:35:03 PM Viresh Kumar wrote:
> On 07-04-16, 13:44, Rafael J. Wysocki wrote:
> > I'm not sure I'm following.
> > 
> > Without this patch fast switch is disabled when we offline the nonboot
> > CPUs during suspend, because cpufreq_exit_governor() runs then, but
> > the cpufreq_governor() called by it does nothing.  Also
> > cpufreq_governor() during nonboot CPUs online does nothing.
> > 
> > That has to be made consistent somehow.  This patch is one way.
> > Another way would be to disable fast switch from the governor ->exit
> > callback, but the net result would be the same.
> 
> Actually things are working fine today by chance IMO, because we don't
> free the policy structures anymore while we offline CPUs.

Yes, that's why they work.  And that's because the code has been written
that way.  Whether that happened by chance or by design, or because of
a favorable concentration of the Force, I don't care.

> Otherwise, policy->governor_data would have been lost together with
> the policy, and governor wouldn't have worked properly after resume.
> 
> What we are doing today is something like this:
> 
> Suspend
> ---
> 
> -> cpufreq_suspend()
>  -> STOP governor
>  -> cpufreq_suspended = true
> 
> -> Offline non-boot CPUs
>   -> cpufreq_offline()
> -> SKIP calling EXIT governor (governor had allocated few
> resources earlier)
> 
> Resume
> --
> 
> -> Bring back non-boot CPUs
>   -> cpufreq_online()
> -> SKIP calling INIT governor (policy->governor_data doesn't get
> reset, luckily)

policy->governor_data is not reset.  Period.

> 
> -> cpufreq_resume()
>  -> cpufreq_suspended = false
>  -> START governor

Yes, that's what the code flow is.

> 
> That's *ugly* and it works by chance, unless I am misreading it
> completely.

I'm assuming that what you mean by "ugly" here is "not really straightforward",
which I agree with, but then it is really disappointing to see comments like
that from you about the code that you helped to write.

But instead of going for a rant about how disappointed I am, let me focus on
the technical side of things.

As per the code today, the only legitimate role of cpufreq_suspended is to 
prevent
governor operations from being carried out when disabling nonboot CPUs during
system suspend.  My *interpretation* of that is that this is to avoid accessing
hardware resources that may not be available at that point, which is fair 
enough.
It also has a nice side effect that the disabling/enabling nonboot CPUs doesn't
run code that it doesn't have to run (like remmoving/creating governor tunables
directory in sysfs in the tunables-per-policy case).  That is good.

The bad thing is how that is different from the runtime CPU offline.

> One of the solutions to get this cleaned is to stop checking for
> cpufreq_suspended flag in cpufreq_governor() and put that *only* in
> places where we are trying to interact with the hardware. And that
> essentially is the callbacks provided by the cpufreq drivers. So,
> ignore calling cpufreq-driver callbacks if cpufreq_suspended is true.

No, cpufreq_suspended is not sufficient for that, because the setting/clearing
of it is generally racy with respect to pretty much anything except for the
suspend process itself.  Checking it outside of the suspend process would be
a bug.

Moreover, runtime CPU offline *also* doesn't have to run the governor exit/init
for the same reason why the policy directory doesn't have to be removed on
CPU offline: it is just pointless to do that.  The governor has been stopped
already and it won't do anything more.  The only problem here is to prevent
governor tunable sysfs attributes from triggering actions in that state,
but that shouldn't be too difficult to arrange for.  If that's done,
cpufreq_suspended can be dropped, modulo changing cpufreq_start_governor()
to return immediately if the governor has been started already.

And if something else is needed to protect driver callbacks from being invoked
outside of the suspend-resume path, a more robust mechanism has to be added
for that.

But in the meantime, I'd like to address the fast switch problem first and
then you're free to clean up things on top of that.  Or I will clean them up
if I have the time.

Thanks,
Rafael

Re: [PATCH] cpufreq: Skip all governor-related actions for cpufreq_suspended set

2016-04-07 Thread Rafael J. Wysocki

On Thursday, April 07, 2016 05:35:03 PM Viresh Kumar wrote:
> On 07-04-16, 13:44, Rafael J. Wysocki wrote:
> > I'm not sure I'm following.
> > 
> > Without this patch fast switch is disabled when we offline the nonboot
> > CPUs during suspend, because cpufreq_exit_governor() runs then, but
> > the cpufreq_governor() called by it does nothing.  Also
> > cpufreq_governor() during nonboot CPUs online does nothing.
> > 
> > That has to be made consistent somehow.  This patch is one way.
> > Another way would be to disable fast switch from the governor ->exit
> > callback, but the net result would be the same.
> 
> Actually things are working fine today by chance IMO, because we don't
> free the policy structures anymore while we offline CPUs.

Yes, that's why they work.  And that's because the code has been written
that way.  Whether that happened by chance or by design, or because of
a favorable concentration of the Force, I don't care.

> Otherwise, policy->governor_data would have been lost together with
> the policy, and governor wouldn't have worked properly after resume.
> 
> What we are doing today is something like this:
> 
> Suspend
> ---
> 
> -> cpufreq_suspend()
>  -> STOP governor
>  -> cpufreq_suspended = true
> 
> -> Offline non-boot CPUs
>   -> cpufreq_offline()
> -> SKIP calling EXIT governor (governor had allocated few
> resources earlier)
> 
> Resume
> --
> 
> -> Bring back non-boot CPUs
>   -> cpufreq_online()
> -> SKIP calling INIT governor (policy->governor_data doesn't get
> reset, luckily)

policy->governor_data is not reset.  Period.

> 
> -> cpufreq_resume()
>  -> cpufreq_suspended = false
>  -> START governor

Yes, that's what the code flow is.

> 
> That's *ugly* and it works by chance, unless I am misreading it
> completely.

I'm assuming that what you mean by "ugly" here is "not really straightforward",
which I agree with, but then it is really disappointing to see comments like
that from you about the code that you helped to write.

But instead of going for a rant about how disappointed I am, let me focus on
the technical side of things.

As per the code today, the only legitimate role of cpufreq_suspended is to 
prevent
governor operations from being carried out when disabling nonboot CPUs during
system suspend.  My *interpretation* of that is that this is to avoid accessing
hardware resources that may not be available at that point, which is fair 
enough.
It also has a nice side effect that the disabling/enabling nonboot CPUs doesn't
run code that it doesn't have to run (like remmoving/creating governor tunables
directory in sysfs in the tunables-per-policy case).  That is good.

The bad thing is how that is different from the runtime CPU offline.

> One of the solutions to get this cleaned is to stop checking for
> cpufreq_suspended flag in cpufreq_governor() and put that *only* in
> places where we are trying to interact with the hardware. And that
> essentially is the callbacks provided by the cpufreq drivers. So,
> ignore calling cpufreq-driver callbacks if cpufreq_suspended is true.

No, cpufreq_suspended is not sufficient for that, because the setting/clearing
of it is generally racy with respect to pretty much anything except for the
suspend process itself.  Checking it outside of the suspend process would be
a bug.

Moreover, runtime CPU offline *also* doesn't have to run the governor exit/init
for the same reason why the policy directory doesn't have to be removed on
CPU offline: it is just pointless to do that.  The governor has been stopped
already and it won't do anything more.  The only problem here is to prevent
governor tunable sysfs attributes from triggering actions in that state,
but that shouldn't be too difficult to arrange for.  If that's done,
cpufreq_suspended can be dropped, modulo changing cpufreq_start_governor()
to return immediately if the governor has been started already.

And if something else is needed to protect driver callbacks from being invoked
outside of the suspend-resume path, a more robust mechanism has to be added
for that.

But in the meantime, I'd like to address the fast switch problem first and
then you're free to clean up things on top of that.  Or I will clean them up
if I have the time.

Thanks,
Rafael

Re: [PATCH V2 5/8] net: mediatek: fix mtk_pending_work

Hi John,

[auto build test ERROR on net-next/master]
[also build test ERROR on v4.6-rc2 next-20160407]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/John-Crispin/net-mediatek-make-the-driver-pass-stress-tests/20160408-033430
config: arm-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

Note: the 
linux-review/John-Crispin/net-mediatek-make-the-driver-pass-stress-tests/20160408-033430
 HEAD e648090f60723da77108430208b4b957c481048b builds fine.
  It only hurts bisectibility.

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/list.h:8:0,
from include/linux/kobject.h:20,
from include/linux/device.h:17,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from include/linux/of_device.h:4,
from drivers/net/ethernet/mediatek/mtk_eth_soc.c:15:
   drivers/net/ethernet/mediatek/mtk_eth_soc.c: In function 'mtk_pending_work':
>> include/linux/kernel.h:824:27: error: 'struct mtk_eth' has no member named 
>> 'pending_work'
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
  ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   include/linux/kernel.h:824:48: warning: initialization from incompatible 
pointer type
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
   ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   include/linux/kernel.h:824:48: warning: (near initialization for 'eth')
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
   ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   In file included from include/linux/compiler.h:60:0,
from include/linux/ioport.h:12,
from include/linux/device.h:16,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from include/linux/of_device.h:4,
from drivers/net/ethernet/mediatek/mtk_eth_soc.c:15:
>> include/linux/compiler-gcc.h:158:2: error: 'struct mtk_eth' has no member 
>> named 'pending_work'
 __builtin_offsetof(a, b)
 ^
   include/linux/stddef.h:16:32: note: in expansion of macro 
'__compiler_offsetof'
#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER)
   ^
   include/linux/kernel.h:825:29: note: in expansion of macro 'offsetof'
 (type *)( (char *)__mptr - offsetof(type,member) );})
^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^

vim +824 include/linux/kernel.h

^1da177e Linus Torvalds 2005-04-16  818   * @ptr:   the pointer to the 
member.
^1da177e Linus Torvalds 2005-04-16  819   * @type:  the type of the 
container struct this is embedded in.
^1da177e Linus Torvalds 2005-04-16  820   * @member:the name of the member 
within the struct.
^1da177e Linus Torvalds 2005-04-16  821   *
^1da177e Linus Torvalds 2005-04-16  822   */
^1da177e Linus Torvalds 2005-04-16  823  #define container_of(ptr, type, 
member) ({ \
^1da177e Linus Torvalds 2005-04-16 @824 const typeof( ((type 
*)0)->member ) *__mptr = (ptr);\
^1da177e Linus Torvalds 2005-04-16  825 (type *)( (char *)__mptr - 
offsetof(type,member) );})
^1da177e Linus Torvalds 2005-04-16  826  
b9d4f426 Arnaud Lacombe 2011-07-25  827  /* Rebuild everything on 
CONFIG_FTRACE_MCOUNT_RECORD */

:: The code at line 824 was first introduced by commit
:: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:: TO: Linus Torvalds <torva...@ppc970.osdl.org>
:: CC: Linus Torvalds <torva...@ppc970.osdl.org>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data

Re: [PATCH V2 5/8] net: mediatek: fix mtk_pending_work

Hi John,

[auto build test ERROR on net-next/master]
[also build test ERROR on v4.6-rc2 next-20160407]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/John-Crispin/net-mediatek-make-the-driver-pass-stress-tests/20160408-033430
config: arm-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm 

Note: the 
linux-review/John-Crispin/net-mediatek-make-the-driver-pass-stress-tests/20160408-033430
 HEAD e648090f60723da77108430208b4b957c481048b builds fine.
  It only hurts bisectibility.

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/list.h:8:0,
from include/linux/kobject.h:20,
from include/linux/device.h:17,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from include/linux/of_device.h:4,
from drivers/net/ethernet/mediatek/mtk_eth_soc.c:15:
   drivers/net/ethernet/mediatek/mtk_eth_soc.c: In function 'mtk_pending_work':
>> include/linux/kernel.h:824:27: error: 'struct mtk_eth' has no member named 
>> 'pending_work'
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
  ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   include/linux/kernel.h:824:48: warning: initialization from incompatible 
pointer type
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
   ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   include/linux/kernel.h:824:48: warning: (near initialization for 'eth')
 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
   ^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^
   In file included from include/linux/compiler.h:60:0,
from include/linux/ioport.h:12,
from include/linux/device.h:16,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from include/linux/of_device.h:4,
from drivers/net/ethernet/mediatek/mtk_eth_soc.c:15:
>> include/linux/compiler-gcc.h:158:2: error: 'struct mtk_eth' has no member 
>> named 'pending_work'
 __builtin_offsetof(a, b)
 ^
   include/linux/stddef.h:16:32: note: in expansion of macro 
'__compiler_offsetof'
#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER)
   ^
   include/linux/kernel.h:825:29: note: in expansion of macro 'offsetof'
 (type *)( (char *)__mptr - offsetof(type,member) );})
^
>> drivers/net/ethernet/mediatek/mtk_eth_soc.c:1433:24: note: in expansion of 
>> macro 'container_of'
 struct mtk_eth *eth = container_of(work, struct mtk_eth, pending_work);
   ^

vim +824 include/linux/kernel.h

^1da177e Linus Torvalds 2005-04-16  818   * @ptr:   the pointer to the 
member.
^1da177e Linus Torvalds 2005-04-16  819   * @type:  the type of the 
container struct this is embedded in.
^1da177e Linus Torvalds 2005-04-16  820   * @member:the name of the member 
within the struct.
^1da177e Linus Torvalds 2005-04-16  821   *
^1da177e Linus Torvalds 2005-04-16  822   */
^1da177e Linus Torvalds 2005-04-16  823  #define container_of(ptr, type, 
member) ({ \
^1da177e Linus Torvalds 2005-04-16 @824 const typeof( ((type 
*)0)->member ) *__mptr = (ptr);\
^1da177e Linus Torvalds 2005-04-16  825 (type *)( (char *)__mptr - 
offsetof(type,member) );})
^1da177e Linus Torvalds 2005-04-16  826  
b9d4f426 Arnaud Lacombe 2011-07-25  827  /* Rebuild everything on 
CONFIG_FTRACE_MCOUNT_RECORD */

:: The code at line 824 was first introduced by commit
:: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Linux-2.6.12-rc2

:: TO: Linus Torvalds 
:: CC: Linus Torvalds 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data

Re: [PATCH 5/5] max44000: Initial triggered buffer support

Hi Crestez,

[auto build test WARNING on iio/togreg]
[also build test WARNING on v4.6-rc2 next-20160407]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Crestez-Dan-Leonard/Support-for-max44000-Ambient-and-Infrared-Proximity-Sensor/20160408-003121
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git togreg
config: s390-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390 

All warnings (new ones prefixed by >>):

   drivers/iio/light/max44000.c: In function 'max44000_trigger_handler':
>> drivers/iio/light/max44000.c:513:1: warning: 'max44000_trigger_handler' uses 
>> dynamic stack allocation
}
^

vim +/max44000_trigger_handler +513 drivers/iio/light/max44000.c

   497  if (*indio_dev->active_scan_mask & (1 << 
MAX44000_SCAN_INDEX_PRX)) {
   498  ret = regmap_read(data->regmap, MAX44000_REG_PRX_DATA, 
);
   499  if (ret < 0)
   500  goto out_unlock;
   501  *pos = regval;
   502  }
   503  mutex_unlock(>lock);
   504  
   505  iio_push_to_buffers_with_timestamp(indio_dev, buf, 
iio_get_time_ns());
   506  iio_trigger_notify_done(indio_dev->trig);
   507  return IRQ_HANDLED;
   508  
   509  out_unlock:
   510  mutex_unlock(>lock);
   511  iio_trigger_notify_done(indio_dev->trig);
   512  return IRQ_HANDLED;
 > 513  }
   514  
   515  static int max44000_probe(struct i2c_client *client,
   516const struct i2c_device_id *id)
   517  {
   518  struct max44000_data *data;
   519  struct iio_dev *indio_dev;
   520  int ret, reg;
   521  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data

Re: [PATCH 5/5] max44000: Initial triggered buffer support

Hi Crestez,

[auto build test WARNING on iio/togreg]
[also build test WARNING on v4.6-rc2 next-20160407]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Crestez-Dan-Leonard/Support-for-max44000-Ambient-and-Infrared-Proximity-Sensor/20160408-003121
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git togreg
config: s390-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390 

All warnings (new ones prefixed by >>):

   drivers/iio/light/max44000.c: In function 'max44000_trigger_handler':
>> drivers/iio/light/max44000.c:513:1: warning: 'max44000_trigger_handler' uses 
>> dynamic stack allocation
}
^

vim +/max44000_trigger_handler +513 drivers/iio/light/max44000.c

   497  if (*indio_dev->active_scan_mask & (1 << 
MAX44000_SCAN_INDEX_PRX)) {
   498  ret = regmap_read(data->regmap, MAX44000_REG_PRX_DATA, 
);
   499  if (ret < 0)
   500  goto out_unlock;
   501  *pos = regval;
   502  }
   503  mutex_unlock(>lock);
   504  
   505  iio_push_to_buffers_with_timestamp(indio_dev, buf, 
iio_get_time_ns());
   506  iio_trigger_notify_done(indio_dev->trig);
   507  return IRQ_HANDLED;
   508  
   509  out_unlock:
   510  mutex_unlock(>lock);
   511  iio_trigger_notify_done(indio_dev->trig);
   512  return IRQ_HANDLED;
 > 513  }
   514  
   515  static int max44000_probe(struct i2c_client *client,
   516const struct i2c_device_id *id)
   517  {
   518  struct max44000_data *data;
   519  struct iio_dev *indio_dev;
   520  int ret, reg;
   521  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data

Re: [PATCH v5 30/46] regulator: pwm: retrieve correct voltage

Hi Mark,

On Wed, 30 Mar 2016 14:24:10 -0700
Mark Brown  wrote:

> On Wed, Mar 30, 2016 at 10:03:53PM +0200, Boris Brezillon wrote:
> > The continuous PWM voltage regulator is caching the voltage value in
> > the ->volt_uV field. While most of the time this value should reflect the
> > real voltage, sometime it can be sightly different if the PWM device
> > rounded the set_duty_cycle request.
> > Moreover, this value is not valid until someone has modified the regulator
> > output.
> 
> Acked-by: Mark Brown 

Actually this patch introduces a bug (reported by Stephen):

"
I applied your patch series [PATCH v5 00/46] pwm: add support for
atomic update and found a null pointer dereference when probing a
pwm-regulator at boot. See the below stack trace:

[ 4.282374] [] pwm_regulator_get_voltage+0x78/0xa0
[ 4.289344] [] regulator_attr_is_visible+0x7c/0x264
[ 4.296408] [] internal_create_group+0x14c/0x280
[ 4.303184] [] sysfs_create_group+0x14/0x1c
[ 4.309483] [] sysfs_create_groups+0x30/0x78
[ 4.315881] [] device_add+0x224/0x4d8
[ 4.321609] [] device_register+0x1c/0x28
[ 4.327623] [] regulator_register+0x2e4/0xc14
[ 4.334112] [] devm_regulator_register+0x54/0x94
[ 4.340887] [] pwm_regulator_probe+0x278/0x2b8
[ 4.347473] [] platform_drv_probe+0x58/0xa4
[ 4.353772] [] driver_probe_device+0x114/0x2ac
[ 4.360358] [] __driver_attach+0x64/0x90
[ 4.366371] [] bus_for_each_dev+0x74/0x90
[ 4.372478] [] driver_attach+0x20/0x28
[ 4.378299] [] bus_add_driver+0xe8/0x1e0
[ 4.384312] [] driver_register+0x98/0xe4
[ 4.390326] [] __platform_driver_register+0x48/0x50
[ 4.397388] [] pwm_regulator_driver_init+0x18/0x20
[ 4.404356] [] do_one_initcall+0xf8/0x180
[ 4.410466] [] kernel_init_freeable+0x154/0x1f4
[ 4.417148] [] kernel_init+0x10/0xf8
[ 4.422782] [] ret_from_fork+0x10/0x40

It looks like the root cause is that regulator_attr_is_visible will
try to get the voltage, but at this point in regulator_register,
rdev->constraints is still null. So
pwm_duty_cycle_percentage_to_voltage will dereference a null
rdev->constraints pointer.
"

The problem is that we need to know the min and max voltage constraints
to calculate the current voltage. ->get_voltage() is called when the
sysfs attributes are created (part of device registration), and
set_machine_constraints() is called after device_register(), thus
leading to the NULL pointer dereference.

Is there any reason for calling set_machine_constraints() after
device_register() in regulator_register()?

Best Regards,

Boris

Re: [PATCH v5 30/46] regulator: pwm: retrieve correct voltage

Hi Mark,

On Wed, 30 Mar 2016 14:24:10 -0700
Mark Brown  wrote:

> On Wed, Mar 30, 2016 at 10:03:53PM +0200, Boris Brezillon wrote:
> > The continuous PWM voltage regulator is caching the voltage value in
> > the ->volt_uV field. While most of the time this value should reflect the
> > real voltage, sometime it can be sightly different if the PWM device
> > rounded the set_duty_cycle request.
> > Moreover, this value is not valid until someone has modified the regulator
> > output.
> 
> Acked-by: Mark Brown 

Actually this patch introduces a bug (reported by Stephen):

"
I applied your patch series [PATCH v5 00/46] pwm: add support for
atomic update and found a null pointer dereference when probing a
pwm-regulator at boot. See the below stack trace:

[ 4.282374] [] pwm_regulator_get_voltage+0x78/0xa0
[ 4.289344] [] regulator_attr_is_visible+0x7c/0x264
[ 4.296408] [] internal_create_group+0x14c/0x280
[ 4.303184] [] sysfs_create_group+0x14/0x1c
[ 4.309483] [] sysfs_create_groups+0x30/0x78
[ 4.315881] [] device_add+0x224/0x4d8
[ 4.321609] [] device_register+0x1c/0x28
[ 4.327623] [] regulator_register+0x2e4/0xc14
[ 4.334112] [] devm_regulator_register+0x54/0x94
[ 4.340887] [] pwm_regulator_probe+0x278/0x2b8
[ 4.347473] [] platform_drv_probe+0x58/0xa4
[ 4.353772] [] driver_probe_device+0x114/0x2ac
[ 4.360358] [] __driver_attach+0x64/0x90
[ 4.366371] [] bus_for_each_dev+0x74/0x90
[ 4.372478] [] driver_attach+0x20/0x28
[ 4.378299] [] bus_add_driver+0xe8/0x1e0
[ 4.384312] [] driver_register+0x98/0xe4
[ 4.390326] [] __platform_driver_register+0x48/0x50
[ 4.397388] [] pwm_regulator_driver_init+0x18/0x20
[ 4.404356] [] do_one_initcall+0xf8/0x180
[ 4.410466] [] kernel_init_freeable+0x154/0x1f4
[ 4.417148] [] kernel_init+0x10/0xf8
[ 4.422782] [] ret_from_fork+0x10/0x40

It looks like the root cause is that regulator_attr_is_visible will
try to get the voltage, but at this point in regulator_register,
rdev->constraints is still null. So
pwm_duty_cycle_percentage_to_voltage will dereference a null
rdev->constraints pointer.
"

The problem is that we need to know the min and max voltage constraints
to calculate the current voltage. ->get_voltage() is called when the
sysfs attributes are created (part of device registration), and
set_machine_constraints() is called after device_register(), thus
leading to the NULL pointer dereference.

Is there any reason for calling set_machine_constraints() after
device_register() in regulator_register()?

Best Regards,

Boris

Re: [PATCH 17/19] perf tools: Build syscall table .c header from kernel's syscall_64.tbl

Em Thu, Apr 07, 2016 at 03:49:56PM -0600, David Ahern escreveu:
> Upon further review ...
> 
> On 4/7/16 2:58 PM, Arnaldo Carvalho de Melo wrote:
> >From: Arnaldo Carvalho de Melo 
> >
> >We used libaudit to map ids to syscall names and vice-versa, but that
> >imposes a delay in supporting new syscalls, having to wait for libaudit
> >to get those new syscalls on its tables.
> >
> >To remove that delay, for x86_64 initially, grab a copy of
> >arch/x86/entry/syscalls/syscall_64.tbl and use it to generate those
> >tables.
> 
> 
> >  tools/perf/Makefile.perf  |  11 +-
> >  tools/perf/arch/x86/Makefile  |  23 ++
> >  tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 374 
> > ++
> >  tools/perf/arch/x86/entry/syscalls/syscalltbl.sh  |  39 +++
> 
> Why make a copies of the files? Why can't perf reference the ones 2
> levels up?

We did that in the past, but then, after the build broke in tools/ due
to changes in the referenced files, we decided to use this "coherency
protocol" where we benefit from using the kernel files but don't use it
directly, being warned when changes happen so that we can do some
analysis before updating our copy.

- Arnaldo

Re: [PATCH 17/19] perf tools: Build syscall table .c header from kernel's syscall_64.tbl

Em Thu, Apr 07, 2016 at 03:49:56PM -0600, David Ahern escreveu:
> Upon further review ...
> 
> On 4/7/16 2:58 PM, Arnaldo Carvalho de Melo wrote:
> >From: Arnaldo Carvalho de Melo 
> >
> >We used libaudit to map ids to syscall names and vice-versa, but that
> >imposes a delay in supporting new syscalls, having to wait for libaudit
> >to get those new syscalls on its tables.
> >
> >To remove that delay, for x86_64 initially, grab a copy of
> >arch/x86/entry/syscalls/syscall_64.tbl and use it to generate those
> >tables.
> 
> 
> >  tools/perf/Makefile.perf  |  11 +-
> >  tools/perf/arch/x86/Makefile  |  23 ++
> >  tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 374 
> > ++
> >  tools/perf/arch/x86/entry/syscalls/syscalltbl.sh  |  39 +++
> 
> Why make a copies of the files? Why can't perf reference the ones 2
> levels up?

We did that in the past, but then, after the build broke in tools/ due
to changes in the referenced files, we decided to use this "coherency
protocol" where we benefit from using the kernel files but don't use it
directly, being warned when changes happen so that we can do some
analysis before updating our copy.

- Arnaldo

Re: [PATCH 17/19] perf tools: Build syscall table .c header from kernel's syscall_64.tbl

Em Thu, Apr 07, 2016 at 03:39:21PM -0600, David Ahern escreveu:
> On 4/7/16 2:58 PM, Arnaldo Carvalho de Melo wrote:
> >We used libaudit to map ids to syscall names and vice-versa, but that
> >imposes a delay in supporting new syscalls, having to wait for libaudit
> >to get those new syscalls on its tables.
 
> and for the distribution to get the new libaudit.

yeap
 
> >To remove that delay, for x86_64 initially, grab a copy of
> >arch/x86/entry/syscalls/syscall_64.tbl and use it to generate those
> >tables.
> 
> Now that's a benefit for perf in the kernel tree. What happens for out of
> tree perf builds? Are the requisite files included in the tarball?
> (apparently no, since the manifest file is not updated or perhaps I missed
> it)

Well, we're removing references to files outside tools/, so what was
done in this case was to copy the files:

[acme@jouet linux]$ ls -la tools/perf/arch/x86/entry/syscalls/
total 28
drwxrwxr-x. 2 acme acme  4096 Apr  7 16:59 .
drwxrwxr-x. 3 acme acme  4096 Apr  7 16:59 ..
-rw-rw-r--. 1 acme acme 12972 Apr  7 16:59 syscall_64.tbl
-rwxrwxr-x. 1 acme acme   596 Apr  7 16:59 syscalltbl.sh
[acme@jouet linux]$ 

Then we have logic for when the kernel one gets out of sync, to be
warned about, see:

tools/perf/arch/x86/Makefile

E.g.:

[acme@jouet linux]$ git diff
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
b/arch/x86/entry/syscalls/syscall_64.tbl
index 2e5b565adacc..e8af21d804f8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -333,6 +333,7 @@
 324common  membarrier  sys_membarrier
 325common  mlock2  sys_mlock2
 326common  copy_file_range sys_copy_file_range
+327common  leftpad sys_leftpad
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
[acme@jouet linux]$
[acme@jouet linux]$ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; m
make: Entering directory '/home/acme/git/linux/tools/perf'

[acme@jouet linux]$ make -C tools/perf build-test
make: Entering directory '/home/acme/git/linux/tools/perf'
- tarpkg: ./tests/perf-targz-src-pkg .

  BUILD:   Doing 'make -j4' parallel build

Auto-detecting system features:
... dwarf: [ on  ]


  GEN  /tmp/build/perf/common-cmds.h
Warning: x86_64's syscall_64.tbl differs from kernel
  CC   /tmp/build/perf/fixdep.o
  LD   /tmp/build/perf/fixdep-in.o

---

Looking at it now it needs some more polish, i.e. it should have a GEN ...
line, but that can be done on top of this patchkit :-)

For completeness:

[acme@jouet linux]$ diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 
arch/x86/entry/syscalls/syscall_64.tbl
--- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   2016-04-07 
16:59:44.938061779 -0300
+++ arch/x86/entry/syscalls/syscall_64.tbl  2016-04-07 18:43:51.543992781 
-0300
@@ -333,6 +333,7 @@
 324common  membarrier  sys_membarrier
 325common  mlock2  sys_mlock2
 326common  copy_file_range sys_copy_file_range
+327common  leftpad sys_leftpad
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
[acme@jouet linux]$

And 'make -C tools/perf build-test' does the tarball creation and out-of-tree
build as its first step, to make sure it continues working, so all that is
needed is there :-)

[acme@jouet linux]$ make -C tools/perf build-test
make: Entering directory '/home/acme/git/linux/tools/perf'
- tarpkg: ./tests/perf-targz-src-pkg .
- /home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP: cd . && make 
FEATURE_DUMP_COPY=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP  
feature-dump
cd . && make 
FEATURE_DUMP_COPY=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP 
feature-dump
  make_minimal_O: cd . && make NO_LIBPERL=1 NO_LIBPYTHON=1 
NO_NEWT=1 NO_GTK2=1 NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 
NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 
NO_LIBBPF=1 NO_LIBCRYPTO=1 
FEATURES_DUMP=/home/acme/git/linux/tools/perf/BUILD_TEST_FEATURE_DUMP -j4 
O=/tmp/tmp.6KCw8TRjm7 DESTDIR=/tmp/tmp.6yw98RESmd


Best regards,

- Arnaldo

Re: [PATCH v3] ARM64: ACPI: Update documentation for latest specification version

2016-04-07 Thread Al Stone

On 03/28/2016 06:06 PM, Al Stone wrote:
> The ACPI 6.1 specification was recently released at the end of January
> 2016, but the arm64 kernel documentation for the use of ACPI was written
> for the 5.1 version of the spec.  There were significant additions to the
> spec that had not yet been mentioned -- for example, the 6.0 mechanisms
> added to make it easier to define processors and low power idle states,
> as well as the 6.1 addition allowing regular interrupts (not just from
> GPIO) be used to signal ACPI general purpose events.
> 
> This patch reflects going back through and examining the specs in detail
> and updating content appropriately.  Whilst there, a few odds and ends of
> typos were caught as well.  This brings the documentation up to date with
> ACPI 6.1 for arm64.
> 
> Changes for v3:
>-- Clarify use of _LPI/_RDI (Vikas Sajjan)
>-- Whitespace cleanup as pointed out by checkpatch
> 
> Changes for v2:
>-- Clean up white space (Harb Abdulhahmid)
>-- Clarification on _CCA usage (Harb Abdulhamid)
>-- IORT moved to required from recommended (Hanjun Guo)
>-- Clarify IORT description (Hanjun Guo)
> 
> Signed-off-by: Al Stone 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Jonathan Corbet 
> ---
>  Documentation/arm64/acpi_object_usage.txt | 446 
> ++
>  Documentation/arm64/arm-acpi.txt  |  28 +-
>  2 files changed, 357 insertions(+), 117 deletions(-)
> [snip...]

Ping?  Any further comments or is this good to go?

Thanks.

-- 
ciao,
al
---
Al Stone
Software Engineer
Red Hat, Inc.
a...@redhat.com
---

Re: [PATCH 17/19] perf tools: Build syscall table .c header from kernel's syscall_64.tbl