linux-next: build warnings after merge of the dt-rh tree

2016-03-06 Thread Stephen Rothwell
Hi Rob,

After merging the dt-rh tree, today's linux-next build (powerpc allyesconfig)
produced these warnings:

Warning (unit_address_vs_reg): Node 
/testcase-data/interrupts/interrupts-extended0 has a reg or ranges property, 
but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest100 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest101 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest0 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest1 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest2 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest3 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest5 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest6 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest7 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest8 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus has a reg or ranges property, 
but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest12 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest13 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest14 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest14/i2c@0/test-mux-dev
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay0/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay1/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay2/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay3/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay4/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay4/fragment@0/__overlay__/test-unittest4 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay5/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay6/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay7/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay8/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay9/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay10/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay10/fragment@0/__overlay__/test-unittest10 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay10/fragment@0/__overlay__/test-unittest10/test-unittest101
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay11/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay11/fragment@0/__overlay__/test-unittest11 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay11/fragment@0/__overlay__/test-unittest11/test-unittest111
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay12/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay13/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay15/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay15/fragment@0/__overlay__/test-unittest15 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 

linux-next: build warnings after merge of the dt-rh tree

2016-03-06 Thread Stephen Rothwell
Hi Rob,

After merging the dt-rh tree, today's linux-next build (powerpc allyesconfig)
produced these warnings:

Warning (unit_address_vs_reg): Node 
/testcase-data/interrupts/interrupts-extended0 has a reg or ranges property, 
but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest100 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest101 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest0 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest1 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest2 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest3 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest5 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest6 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest7 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/test-unittest8 has a reg or ranges 
property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus has a reg or ranges property, 
but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest12 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest13 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest14 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay-node/test-bus/i2c-test-bus/test-unittest14/i2c@0/test-mux-dev
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay0/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay1/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay2/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay3/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay4/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay4/fragment@0/__overlay__/test-unittest4 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay5/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay6/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay7/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay8/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay9/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay10/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay10/fragment@0/__overlay__/test-unittest10 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay10/fragment@0/__overlay__/test-unittest10/test-unittest101
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay11/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay11/fragment@0/__overlay__/test-unittest11 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay11/fragment@0/__overlay__/test-unittest11/test-unittest111
 has a reg or ranges property, but no unit name
Warning (unit_address_vs_reg): Node /testcase-data/overlay12/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay13/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node /testcase-data/overlay15/fragment@0 has a 
unit name, but no reg property
Warning (unit_address_vs_reg): Node 
/testcase-data/overlay15/fragment@0/__overlay__/test-unittest15 has a reg or 
ranges property, but no unit name
Warning (unit_address_vs_reg): Node 

[PATCH 2/2 V2] staging: dgnc: use tty_alloc_driver instead of kcalloc

2016-03-06 Thread Daeseok Youn
the tty_alloc_driver() can allocate memory for ttys and termios.
And also it can release allocated memory easly with using
put_tty_driver().

Signed-off-by: Daeseok Youn 
---
V2: removes appended email header on top of commit log

 drivers/staging/dgnc/dgnc_tty.c | 86 +++--
 1 file changed, 31 insertions(+), 55 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index e91cf70..6437a81 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -176,9 +176,15 @@ int dgnc_tty_preinit(void)
  */
 int dgnc_tty_register(struct dgnc_board *brd)
 {
-   int rc = 0;
+   int rc;
+
+   brd->serial_driver = tty_alloc_driver(brd->maxports,
+ TTY_DRIVER_REAL_RAW |
+ TTY_DRIVER_DYNAMIC_DEV |
+ TTY_DRIVER_HARDWARE_BREAK);
 
-   brd->serial_driver->magic = TTY_DRIVER_MAGIC;
+   if (IS_ERR(brd->serial_driver))
+   return PTR_ERR(brd->serial_driver);
 
snprintf(brd->SerialName, MAXTTYNAMELEN, "tty_dgnc_%d_", brd->boardnum);
 
@@ -186,31 +192,10 @@ int dgnc_tty_register(struct dgnc_board *brd)
brd->serial_driver->name_base = 0;
brd->serial_driver->major = 0;
brd->serial_driver->minor_start = 0;
-   brd->serial_driver->num = brd->maxports;
brd->serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
brd->serial_driver->subtype = SERIAL_TYPE_NORMAL;
brd->serial_driver->init_termios = DgncDefaultTermios;
brd->serial_driver->driver_name = DRVSTR;
-   brd->serial_driver->flags = (TTY_DRIVER_REAL_RAW |
-  TTY_DRIVER_DYNAMIC_DEV |
-  TTY_DRIVER_HARDWARE_BREAK);
-
-   /*
-* The kernel wants space to store pointers to
-* tty_struct's and termios's.
-*/
-   brd->serial_driver->ttys = kcalloc(brd->maxports,
-sizeof(*brd->serial_driver->ttys),
-GFP_KERNEL);
-   if (!brd->serial_driver->ttys)
-   return -ENOMEM;
-
-   kref_init(>serial_driver->kref);
-   brd->serial_driver->termios = kcalloc(brd->maxports,
-   
sizeof(*brd->serial_driver->termios),
-   GFP_KERNEL);
-   if (!brd->serial_driver->termios)
-   return -ENOMEM;
 
/*
 * Entry points for driver.  Called by the kernel from
@@ -224,7 +209,7 @@ int dgnc_tty_register(struct dgnc_board *brd)
if (rc < 0) {
dev_dbg(>pdev->dev,
"Can't register tty device (%d)\n", rc);
-   return rc;
+   goto free_serial_driver;
}
brd->dgnc_Major_Serial_Registered = true;
}
@@ -234,38 +219,26 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * again, separately so we don't get the LD confused about what major
 * we are when we get into the dgnc_tty_open() routine.
 */
-   brd->print_driver->magic = TTY_DRIVER_MAGIC;
+   brd->print_driver = tty_alloc_driver(brd->maxports,
+TTY_DRIVER_REAL_RAW |
+TTY_DRIVER_DYNAMIC_DEV |
+TTY_DRIVER_HARDWARE_BREAK);
+
+   if (IS_ERR(brd->print_driver)) {
+   rc = PTR_ERR(brd->print_driver);
+   goto unregister_serial_driver;
+   }
+
snprintf(brd->PrintName, MAXTTYNAMELEN, "pr_dgnc_%d_", brd->boardnum);
 
brd->print_driver->name = brd->PrintName;
brd->print_driver->name_base = 0;
brd->print_driver->major = brd->serial_driver->major;
brd->print_driver->minor_start = 0x80;
-   brd->print_driver->num = brd->maxports;
brd->print_driver->type = TTY_DRIVER_TYPE_SERIAL;
brd->print_driver->subtype = SERIAL_TYPE_NORMAL;
brd->print_driver->init_termios = DgncDefaultTermios;
brd->print_driver->driver_name = DRVSTR;
-   brd->print_driver->flags = (TTY_DRIVER_REAL_RAW |
- TTY_DRIVER_DYNAMIC_DEV |
- TTY_DRIVER_HARDWARE_BREAK);
-
-   /*
-* The kernel wants space to store pointers to
-* tty_struct's and termios's.  Must be separated from
-* the Serial Driver so we don't get confused
-*/
-   brd->print_driver->ttys = kcalloc(brd->maxports,
-   sizeof(*brd->print_driver->ttys),
-   GFP_KERNEL);
-   if (!brd->print_driver->ttys)
-   return -ENOMEM;
-   kref_init(>print_driver->kref);
-   brd->print_driver->termios = 

[PATCH 2/2 V2] staging: dgnc: use tty_alloc_driver instead of kcalloc

2016-03-06 Thread Daeseok Youn
the tty_alloc_driver() can allocate memory for ttys and termios.
And also it can release allocated memory easly with using
put_tty_driver().

Signed-off-by: Daeseok Youn 
---
V2: removes appended email header on top of commit log

 drivers/staging/dgnc/dgnc_tty.c | 86 +++--
 1 file changed, 31 insertions(+), 55 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index e91cf70..6437a81 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -176,9 +176,15 @@ int dgnc_tty_preinit(void)
  */
 int dgnc_tty_register(struct dgnc_board *brd)
 {
-   int rc = 0;
+   int rc;
+
+   brd->serial_driver = tty_alloc_driver(brd->maxports,
+ TTY_DRIVER_REAL_RAW |
+ TTY_DRIVER_DYNAMIC_DEV |
+ TTY_DRIVER_HARDWARE_BREAK);
 
-   brd->serial_driver->magic = TTY_DRIVER_MAGIC;
+   if (IS_ERR(brd->serial_driver))
+   return PTR_ERR(brd->serial_driver);
 
snprintf(brd->SerialName, MAXTTYNAMELEN, "tty_dgnc_%d_", brd->boardnum);
 
@@ -186,31 +192,10 @@ int dgnc_tty_register(struct dgnc_board *brd)
brd->serial_driver->name_base = 0;
brd->serial_driver->major = 0;
brd->serial_driver->minor_start = 0;
-   brd->serial_driver->num = brd->maxports;
brd->serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
brd->serial_driver->subtype = SERIAL_TYPE_NORMAL;
brd->serial_driver->init_termios = DgncDefaultTermios;
brd->serial_driver->driver_name = DRVSTR;
-   brd->serial_driver->flags = (TTY_DRIVER_REAL_RAW |
-  TTY_DRIVER_DYNAMIC_DEV |
-  TTY_DRIVER_HARDWARE_BREAK);
-
-   /*
-* The kernel wants space to store pointers to
-* tty_struct's and termios's.
-*/
-   brd->serial_driver->ttys = kcalloc(brd->maxports,
-sizeof(*brd->serial_driver->ttys),
-GFP_KERNEL);
-   if (!brd->serial_driver->ttys)
-   return -ENOMEM;
-
-   kref_init(>serial_driver->kref);
-   brd->serial_driver->termios = kcalloc(brd->maxports,
-   
sizeof(*brd->serial_driver->termios),
-   GFP_KERNEL);
-   if (!brd->serial_driver->termios)
-   return -ENOMEM;
 
/*
 * Entry points for driver.  Called by the kernel from
@@ -224,7 +209,7 @@ int dgnc_tty_register(struct dgnc_board *brd)
if (rc < 0) {
dev_dbg(>pdev->dev,
"Can't register tty device (%d)\n", rc);
-   return rc;
+   goto free_serial_driver;
}
brd->dgnc_Major_Serial_Registered = true;
}
@@ -234,38 +219,26 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * again, separately so we don't get the LD confused about what major
 * we are when we get into the dgnc_tty_open() routine.
 */
-   brd->print_driver->magic = TTY_DRIVER_MAGIC;
+   brd->print_driver = tty_alloc_driver(brd->maxports,
+TTY_DRIVER_REAL_RAW |
+TTY_DRIVER_DYNAMIC_DEV |
+TTY_DRIVER_HARDWARE_BREAK);
+
+   if (IS_ERR(brd->print_driver)) {
+   rc = PTR_ERR(brd->print_driver);
+   goto unregister_serial_driver;
+   }
+
snprintf(brd->PrintName, MAXTTYNAMELEN, "pr_dgnc_%d_", brd->boardnum);
 
brd->print_driver->name = brd->PrintName;
brd->print_driver->name_base = 0;
brd->print_driver->major = brd->serial_driver->major;
brd->print_driver->minor_start = 0x80;
-   brd->print_driver->num = brd->maxports;
brd->print_driver->type = TTY_DRIVER_TYPE_SERIAL;
brd->print_driver->subtype = SERIAL_TYPE_NORMAL;
brd->print_driver->init_termios = DgncDefaultTermios;
brd->print_driver->driver_name = DRVSTR;
-   brd->print_driver->flags = (TTY_DRIVER_REAL_RAW |
- TTY_DRIVER_DYNAMIC_DEV |
- TTY_DRIVER_HARDWARE_BREAK);
-
-   /*
-* The kernel wants space to store pointers to
-* tty_struct's and termios's.  Must be separated from
-* the Serial Driver so we don't get confused
-*/
-   brd->print_driver->ttys = kcalloc(brd->maxports,
-   sizeof(*brd->print_driver->ttys),
-   GFP_KERNEL);
-   if (!brd->print_driver->ttys)
-   return -ENOMEM;
-   kref_init(>print_driver->kref);
-   brd->print_driver->termios = kcalloc(brd->maxports,
- 

[PATCH trivial] scripts/prune-kernel: remove kdump images

2016-03-06 Thread Xiong Zhou
Signed-off-by: Xiong Zhou 
---
 scripts/prune-kernel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/prune-kernel b/scripts/prune-kernel
index ab5034e..9c67be2 100755
--- a/scripts/prune-kernel
+++ b/scripts/prune-kernel
@@ -14,7 +14,7 @@ do
 echo "removing $f"
 rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
 rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
-rm -rf "/lib/modules/$f"
+rm -rf "/lib/modules/$f" "/boot/initramfs-${f}kdump.img"
 new-kernel-pkg --remove $f
 fi
 done
-- 
2.5.0


[PATCH trivial] scripts/prune-kernel: remove kdump images

2016-03-06 Thread Xiong Zhou
Signed-off-by: Xiong Zhou 
---
 scripts/prune-kernel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/prune-kernel b/scripts/prune-kernel
index ab5034e..9c67be2 100755
--- a/scripts/prune-kernel
+++ b/scripts/prune-kernel
@@ -14,7 +14,7 @@ do
 echo "removing $f"
 rm -f "/boot/initramfs-$f.img" "/boot/System.map-$f"
 rm -f "/boot/vmlinuz-$f"   "/boot/config-$f"
-rm -rf "/lib/modules/$f"
+rm -rf "/lib/modules/$f" "/boot/initramfs-${f}kdump.img"
 new-kernel-pkg --remove $f
 fi
 done
-- 
2.5.0


[PATCH 1/2 V2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread Daeseok Youn
For using tty_alloc_driver, SerialDriver has to be pointer type.
It also has checkpatch.pl warning about Camelcase, so
SerialDriver is changed to serial_driver.

Signed-off-by: Daeseok Youn 
---
V2: removes appended email header on top of commit log

 drivers/staging/dgnc/dgnc_driver.h |   4 +-
 drivers/staging/dgnc/dgnc_tty.c| 118 ++---
 2 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.h 
b/drivers/staging/dgnc/dgnc_driver.h
index ce7cd9b..1c7a8fa 100644
--- a/drivers/staging/dgnc/dgnc_driver.h
+++ b/drivers/staging/dgnc/dgnc_driver.h
@@ -205,9 +205,9 @@ struct dgnc_board {
 * to our channels.
 */
 
-   struct tty_driver   SerialDriver;
+   struct tty_driver *serial_driver;
charSerialName[200];
-   struct tty_driver   PrintDriver;
+   struct tty_driver *print_driver;
charPrintName[200];
 
booldgnc_Major_Serial_Registered;
diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index b79eab0..e91cf70 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -178,20 +178,20 @@ int dgnc_tty_register(struct dgnc_board *brd)
 {
int rc = 0;
 
-   brd->SerialDriver.magic = TTY_DRIVER_MAGIC;
+   brd->serial_driver->magic = TTY_DRIVER_MAGIC;
 
snprintf(brd->SerialName, MAXTTYNAMELEN, "tty_dgnc_%d_", brd->boardnum);
 
-   brd->SerialDriver.name = brd->SerialName;
-   brd->SerialDriver.name_base = 0;
-   brd->SerialDriver.major = 0;
-   brd->SerialDriver.minor_start = 0;
-   brd->SerialDriver.num = brd->maxports;
-   brd->SerialDriver.type = TTY_DRIVER_TYPE_SERIAL;
-   brd->SerialDriver.subtype = SERIAL_TYPE_NORMAL;
-   brd->SerialDriver.init_termios = DgncDefaultTermios;
-   brd->SerialDriver.driver_name = DRVSTR;
-   brd->SerialDriver.flags = (TTY_DRIVER_REAL_RAW |
+   brd->serial_driver->name = brd->SerialName;
+   brd->serial_driver->name_base = 0;
+   brd->serial_driver->major = 0;
+   brd->serial_driver->minor_start = 0;
+   brd->serial_driver->num = brd->maxports;
+   brd->serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+   brd->serial_driver->subtype = SERIAL_TYPE_NORMAL;
+   brd->serial_driver->init_termios = DgncDefaultTermios;
+   brd->serial_driver->driver_name = DRVSTR;
+   brd->serial_driver->flags = (TTY_DRIVER_REAL_RAW |
   TTY_DRIVER_DYNAMIC_DEV |
   TTY_DRIVER_HARDWARE_BREAK);
 
@@ -199,28 +199,28 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * The kernel wants space to store pointers to
 * tty_struct's and termios's.
 */
-   brd->SerialDriver.ttys = kcalloc(brd->maxports,
-sizeof(*brd->SerialDriver.ttys),
+   brd->serial_driver->ttys = kcalloc(brd->maxports,
+sizeof(*brd->serial_driver->ttys),
 GFP_KERNEL);
-   if (!brd->SerialDriver.ttys)
+   if (!brd->serial_driver->ttys)
return -ENOMEM;
 
-   kref_init(>SerialDriver.kref);
-   brd->SerialDriver.termios = kcalloc(brd->maxports,
-   sizeof(*brd->SerialDriver.termios),
+   kref_init(>serial_driver->kref);
+   brd->serial_driver->termios = kcalloc(brd->maxports,
+   
sizeof(*brd->serial_driver->termios),
GFP_KERNEL);
-   if (!brd->SerialDriver.termios)
+   if (!brd->serial_driver->termios)
return -ENOMEM;
 
/*
 * Entry points for driver.  Called by the kernel from
 * tty_io.c and n_tty.c.
 */
-   tty_set_operations(>SerialDriver, _tty_ops);
+   tty_set_operations(brd->serial_driver, _tty_ops);
 
if (!brd->dgnc_Major_Serial_Registered) {
/* Register tty devices */
-   rc = tty_register_driver(>SerialDriver);
+   rc = tty_register_driver(brd->serial_driver);
if (rc < 0) {
dev_dbg(>pdev->dev,
"Can't register tty device (%d)\n", rc);
@@ -234,19 +234,19 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * again, separately so we don't get the LD confused about what major
 * we are when we get into the dgnc_tty_open() routine.
 */
-   brd->PrintDriver.magic = TTY_DRIVER_MAGIC;
+   brd->print_driver->magic = TTY_DRIVER_MAGIC;
snprintf(brd->PrintName, MAXTTYNAMELEN, "pr_dgnc_%d_", brd->boardnum);
 
-   brd->PrintDriver.name = brd->PrintName;
-   brd->PrintDriver.name_base = 0;
-   brd->PrintDriver.major = 

[PATCH 1/2 V2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread Daeseok Youn
For using tty_alloc_driver, SerialDriver has to be pointer type.
It also has checkpatch.pl warning about Camelcase, so
SerialDriver is changed to serial_driver.

Signed-off-by: Daeseok Youn 
---
V2: removes appended email header on top of commit log

 drivers/staging/dgnc/dgnc_driver.h |   4 +-
 drivers/staging/dgnc/dgnc_tty.c| 118 ++---
 2 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.h 
b/drivers/staging/dgnc/dgnc_driver.h
index ce7cd9b..1c7a8fa 100644
--- a/drivers/staging/dgnc/dgnc_driver.h
+++ b/drivers/staging/dgnc/dgnc_driver.h
@@ -205,9 +205,9 @@ struct dgnc_board {
 * to our channels.
 */
 
-   struct tty_driver   SerialDriver;
+   struct tty_driver *serial_driver;
charSerialName[200];
-   struct tty_driver   PrintDriver;
+   struct tty_driver *print_driver;
charPrintName[200];
 
booldgnc_Major_Serial_Registered;
diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index b79eab0..e91cf70 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -178,20 +178,20 @@ int dgnc_tty_register(struct dgnc_board *brd)
 {
int rc = 0;
 
-   brd->SerialDriver.magic = TTY_DRIVER_MAGIC;
+   brd->serial_driver->magic = TTY_DRIVER_MAGIC;
 
snprintf(brd->SerialName, MAXTTYNAMELEN, "tty_dgnc_%d_", brd->boardnum);
 
-   brd->SerialDriver.name = brd->SerialName;
-   brd->SerialDriver.name_base = 0;
-   brd->SerialDriver.major = 0;
-   brd->SerialDriver.minor_start = 0;
-   brd->SerialDriver.num = brd->maxports;
-   brd->SerialDriver.type = TTY_DRIVER_TYPE_SERIAL;
-   brd->SerialDriver.subtype = SERIAL_TYPE_NORMAL;
-   brd->SerialDriver.init_termios = DgncDefaultTermios;
-   brd->SerialDriver.driver_name = DRVSTR;
-   brd->SerialDriver.flags = (TTY_DRIVER_REAL_RAW |
+   brd->serial_driver->name = brd->SerialName;
+   brd->serial_driver->name_base = 0;
+   brd->serial_driver->major = 0;
+   brd->serial_driver->minor_start = 0;
+   brd->serial_driver->num = brd->maxports;
+   brd->serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+   brd->serial_driver->subtype = SERIAL_TYPE_NORMAL;
+   brd->serial_driver->init_termios = DgncDefaultTermios;
+   brd->serial_driver->driver_name = DRVSTR;
+   brd->serial_driver->flags = (TTY_DRIVER_REAL_RAW |
   TTY_DRIVER_DYNAMIC_DEV |
   TTY_DRIVER_HARDWARE_BREAK);
 
@@ -199,28 +199,28 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * The kernel wants space to store pointers to
 * tty_struct's and termios's.
 */
-   brd->SerialDriver.ttys = kcalloc(brd->maxports,
-sizeof(*brd->SerialDriver.ttys),
+   brd->serial_driver->ttys = kcalloc(brd->maxports,
+sizeof(*brd->serial_driver->ttys),
 GFP_KERNEL);
-   if (!brd->SerialDriver.ttys)
+   if (!brd->serial_driver->ttys)
return -ENOMEM;
 
-   kref_init(>SerialDriver.kref);
-   brd->SerialDriver.termios = kcalloc(brd->maxports,
-   sizeof(*brd->SerialDriver.termios),
+   kref_init(>serial_driver->kref);
+   brd->serial_driver->termios = kcalloc(brd->maxports,
+   
sizeof(*brd->serial_driver->termios),
GFP_KERNEL);
-   if (!brd->SerialDriver.termios)
+   if (!brd->serial_driver->termios)
return -ENOMEM;
 
/*
 * Entry points for driver.  Called by the kernel from
 * tty_io.c and n_tty.c.
 */
-   tty_set_operations(>SerialDriver, _tty_ops);
+   tty_set_operations(brd->serial_driver, _tty_ops);
 
if (!brd->dgnc_Major_Serial_Registered) {
/* Register tty devices */
-   rc = tty_register_driver(>SerialDriver);
+   rc = tty_register_driver(brd->serial_driver);
if (rc < 0) {
dev_dbg(>pdev->dev,
"Can't register tty device (%d)\n", rc);
@@ -234,19 +234,19 @@ int dgnc_tty_register(struct dgnc_board *brd)
 * again, separately so we don't get the LD confused about what major
 * we are when we get into the dgnc_tty_open() routine.
 */
-   brd->PrintDriver.magic = TTY_DRIVER_MAGIC;
+   brd->print_driver->magic = TTY_DRIVER_MAGIC;
snprintf(brd->PrintName, MAXTTYNAMELEN, "pr_dgnc_%d_", brd->boardnum);
 
-   brd->PrintDriver.name = brd->PrintName;
-   brd->PrintDriver.name_base = 0;
-   brd->PrintDriver.major = brd->SerialDriver.major;
-   

[RFC PATCH v4 6/7] vfio-pci: Allow to mmap MSI-X table if IOMMU_CAP_INTR_REMAP was set

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap MSI-X
table in case that user get to touch this directly.

But we should allow to mmap these MSI-X tables if IOMMU
supports interrupt remapping which can ensure that a
given pci device can only shoot the MSIs assigned for it.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c  |8 +---
 drivers/vfio/pci/vfio_pci_rdwr.c |4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 49d7a69..d6f4788 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -592,13 +592,14 @@ static long vfio_pci_ioctl(void *device_data,
IORESOURCE_MEM && !pci_resources_share_page(pdev,
info.index)) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-   if (info.index == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus,
+   IOMMU_CAP_INTR_REMAP) &&
+   info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, );
if (ret)
return ret;
}
}
-
break;
case VFIO_PCI_ROM_REGION_INDEX:
{
@@ -1029,7 +1030,8 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
return -EINVAL;
 
-   if (index == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus, IOMMU_CAP_INTR_REMAP) &&
+   index == vdev->msix_bar) {
/*
 * Disallow mmaps overlapping the MSI-X table; users don't
 * get to touch this directly.  We could find somewhere
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 5ffd1d9..1c46c29 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vfio_pci_private.h"
 
@@ -164,7 +165,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char 
__user *buf,
} else
io = vdev->barmap[bar];
 
-   if (bar == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus, IOMMU_CAP_INTR_REMAP) &&
+   bar == vdev->msix_bar) {
x_start = vdev->msix_offset;
x_end = vdev->msix_offset + vdev->msix_size;
}
-- 
1.7.9.5



[RFC PATCH v4 3/7] PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set

2016-03-06 Thread Yongji Xie
The resource_alignment will releases memory resources
allocated by firmware so that kernel can reassign new
resources later on. But this will cause the problem
that no resources can be allocated by kernel if
PCI_PROBE_ONLY was set, e.g. on pSeries platform
because PCI_PROBE_ONLY force kernel to use firmware
setup and not to reassign any resources.

To solve this problem, this patch ignores
resource_alignment if PCI_PROBE_ONLY was set.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 ++
 drivers/pci/probe.c |3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index d8b29ab..8028631 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2922,6 +2922,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
windows need to be expanded.
noresize: Don't change the resources' sizes when
reassigning alignment.
+   Note that this option will not work if
+   PCI_PROBE_ONLY is set.
ecrc=   Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9b..bc31cad 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1719,7 +1719,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus 
*bus)
pci_fixup_device(pci_fixup_header, dev);
 
/* moved out from quirk header fixup code */
-   pci_reassigndev_resource_alignment(dev);
+   if (!pci_has_flag(PCI_PROBE_ONLY))
+   pci_reassigndev_resource_alignment(dev);
 
/* Clear the state_saved flag. */
dev->state_saved = false;
-- 
1.7.9.5



[RFC PATCH v4 6/7] vfio-pci: Allow to mmap MSI-X table if IOMMU_CAP_INTR_REMAP was set

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap MSI-X
table in case that user get to touch this directly.

But we should allow to mmap these MSI-X tables if IOMMU
supports interrupt remapping which can ensure that a
given pci device can only shoot the MSIs assigned for it.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c  |8 +---
 drivers/vfio/pci/vfio_pci_rdwr.c |4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 49d7a69..d6f4788 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -592,13 +592,14 @@ static long vfio_pci_ioctl(void *device_data,
IORESOURCE_MEM && !pci_resources_share_page(pdev,
info.index)) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-   if (info.index == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus,
+   IOMMU_CAP_INTR_REMAP) &&
+   info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, );
if (ret)
return ret;
}
}
-
break;
case VFIO_PCI_ROM_REGION_INDEX:
{
@@ -1029,7 +1030,8 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
return -EINVAL;
 
-   if (index == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus, IOMMU_CAP_INTR_REMAP) &&
+   index == vdev->msix_bar) {
/*
 * Disallow mmaps overlapping the MSI-X table; users don't
 * get to touch this directly.  We could find somewhere
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 5ffd1d9..1c46c29 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "vfio_pci_private.h"
 
@@ -164,7 +165,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char 
__user *buf,
} else
io = vdev->barmap[bar];
 
-   if (bar == vdev->msix_bar) {
+   if (!iommu_capable(pdev->dev.bus, IOMMU_CAP_INTR_REMAP) &&
+   bar == vdev->msix_bar) {
x_start = vdev->msix_offset;
x_end = vdev->msix_offset + vdev->msix_size;
}
-- 
1.7.9.5



[RFC PATCH v4 3/7] PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set

2016-03-06 Thread Yongji Xie
The resource_alignment will releases memory resources
allocated by firmware so that kernel can reassign new
resources later on. But this will cause the problem
that no resources can be allocated by kernel if
PCI_PROBE_ONLY was set, e.g. on pSeries platform
because PCI_PROBE_ONLY force kernel to use firmware
setup and not to reassign any resources.

To solve this problem, this patch ignores
resource_alignment if PCI_PROBE_ONLY was set.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 ++
 drivers/pci/probe.c |3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index d8b29ab..8028631 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2922,6 +2922,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
windows need to be expanded.
noresize: Don't change the resources' sizes when
reassigning alignment.
+   Note that this option will not work if
+   PCI_PROBE_ONLY is set.
ecrc=   Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6d7ab9b..bc31cad 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1719,7 +1719,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus 
*bus)
pci_fixup_device(pci_fixup_header, dev);
 
/* moved out from quirk header fixup code */
-   pci_reassigndev_resource_alignment(dev);
+   if (!pci_has_flag(PCI_PROBE_ONLY))
+   pci_reassigndev_resource_alignment(dev);
 
/* Clear the state_saved flag. */
dev->state_saved = false;
-- 
1.7.9.5



Re: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread DaeSeok Youn
2016-03-07 16:10 GMT+09:00 Sudip Mukherjee :
>
> On Mon, Feb 29, 2016 at 11:15:51AM +0900, Daeseok Youn wrote:
> > From 70f8703b3bd73fa56f4ea91e98967b8925550aa6 Mon Sep 17 00:00:00 2001
> > From: Daeseok Youn 
> > Date: Thu, 25 Feb 2016 14:53:37 +0900
> > Subject: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct
>
> These lines above are not required in the commit log. Please send v2 of
> both your patches without these lines.

ok. I will send my patches again.

Thanks.

regards,
Daeseok.
>
> regards
> sudip


Re: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread DaeSeok Youn
2016-03-07 16:10 GMT+09:00 Sudip Mukherjee :
>
> On Mon, Feb 29, 2016 at 11:15:51AM +0900, Daeseok Youn wrote:
> > From 70f8703b3bd73fa56f4ea91e98967b8925550aa6 Mon Sep 17 00:00:00 2001
> > From: Daeseok Youn 
> > Date: Thu, 25 Feb 2016 14:53:37 +0900
> > Subject: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct
>
> These lines above are not required in the commit log. Please send v2 of
> both your patches without these lines.

ok. I will send my patches again.

Thanks.

regards,
Daeseok.
>
> regards
> sudip


[RFC PATCH v4 5/7] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs.

But we should allow to mmap these sub-page MMIO BARs if PCI
resource allocator can make sure these BARs' mmio page will
not be shared with other BARs.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1ce1d36..49d7a69 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -589,7 +589,8 @@ static long vfio_pci_ioctl(void *device_data,
 VFIO_REGION_INFO_FLAG_WRITE;
if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
pci_resource_flags(pdev, info.index) &
-   IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+   IORESOURCE_MEM && !pci_resources_share_page(pdev,
+   info.index)) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
if (info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, );
@@ -1016,6 +1017,10 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
return -EINVAL;
 
phys_len = pci_resource_len(pdev, index);
+
+   if (!pci_resources_share_page(pdev, index))
+   phys_len = PAGE_ALIGN(phys_len);
+
req_len = vma->vm_end - vma->vm_start;
pgoff = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
-- 
1.7.9.5



[RFC PATCH v4 7/7] powerpc/powernv/pci-ioda: Add IOMMU_CAP_INTR_REMAP for IODA host bridge

2016-03-06 Thread Yongji Xie
This patch adds IOMMU_CAP_INTR_REMAP for IODA host bridge so that
we can mmap MSI-X table in vfio driver.

Signed-off-by: Yongji Xie 
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f90dc04..f01b9ab 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1955,6 +1955,20 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.free = pnv_ioda2_table_free,
 };
 
+static bool pnv_ioda_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_INTR_REMAP:
+   return true;
+   default:
+   return false;
+   }
+}
+
+static struct iommu_ops pnv_ioda_iommu_ops = {
+   .capable = pnv_ioda_iommu_capable,
+};
+
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
  struct pnv_ioda_pe *pe, unsigned int base,
  unsigned int segs)
@@ -3078,6 +3092,9 @@ static void pnv_pci_ioda_fixup(void)
 
/* Link NPU IODA tables to their PCI devices. */
pnv_npu_ioda_fixup();
+
+   /* Add IOMMU_CAP_INTR_REMAP */
+   bus_set_iommu(_bus_type, _ioda_iommu_ops);
 }
 
 /*
-- 
1.7.9.5



[RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-06 Thread Yongji Xie
When vfio passthrough a PCI device of which MMIO BARs
are smaller than PAGE_SIZE, guest will not handle the
mmio accesses to the BARs which leads to mmio emulations
in host.

This is because vfio will not allow to passthrough one
BAR's mmio page which may be shared with other BARs.

To solve this performance issue, this patch modifies
resource_alignment to support syntax where multiple
devices get the same alignment. So we can use something
like "pci=resource_alignment=*:*:*.*:noresize" to
enforce the alignment of all MMIO BARs to be at least
PAGE_SIZE so that one BAR's mmio page would not be
shared with other BARs.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 +
 drivers/pci/pci.c   |   90 ++-
 include/linux/pci.h |4 ++
 3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 8028631..74b38ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 760cce5..44ab59f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -102,6 +102,8 @@ unsigned int pcibios_max_latency = 255;
 /* If set, the PCIe ARI capability will not be used. */
 static bool pcie_ari_disabled;
 
+bool pci_resources_page_aligned;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -4604,6 +4606,7 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
char *p;
+   bool invalid = false;
 
spin_lock(_alignment_lock);
p = resource_alignment_param;
@@ -4615,16 +4618,49 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
} else {
align_order = -1;
}
-   if (sscanf(p, "%x:%x:%x.%x%n",
-   , , , , ) != 4) {
+   if (p[0] == '*' && p[1] == ':') {
+   seg = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != ':') {
+   invalid = true;
+   break;
+   }
+   p += count + 1;
+   if (*p == '*') {
+   bus = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1) {
+   invalid = true;
+   break;
+   }
+   p += count;
+   if (*p == '.') {
+   slot = bus;
+   bus = seg;
seg = 0;
-   if (sscanf(p, "%x:%x.%x%n",
-   , , , ) != 3) {
-   /* Invalid format */
-   printk(KERN_ERR "PCI: Can't parse 
resource_alignment parameter: %s\n",
-   p);
+   p++;
+   } else if (*p == ':') {
+   p++;
+   if (p[0] == '*' && p[1] == '.') {
+   slot = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != '.') {
+   invalid = true;
break;
}
+   p += count + 1;
+   } else {
+   invalid = true;
+   break;
+   }
+   if (*p == '*') {
+   func = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1) {
+   invalid = true;
+   break;
}
p += count;
if (!strncmp(p, ":noresize", 9)) {
@@ -4632,23 +4668,34 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
p += 9;
} else
*resize = true;
-

[RFC PATCH v4 5/7] vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
page may be shared with other BARs.

But we should allow to mmap these sub-page MMIO BARs if PCI
resource allocator can make sure these BARs' mmio page will
not be shared with other BARs.

Signed-off-by: Yongji Xie 
---
 drivers/vfio/pci/vfio_pci.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1ce1d36..49d7a69 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -589,7 +589,8 @@ static long vfio_pci_ioctl(void *device_data,
 VFIO_REGION_INFO_FLAG_WRITE;
if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
pci_resource_flags(pdev, info.index) &
-   IORESOURCE_MEM && info.size >= PAGE_SIZE) {
+   IORESOURCE_MEM && !pci_resources_share_page(pdev,
+   info.index)) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
if (info.index == vdev->msix_bar) {
ret = msix_sparse_mmap_cap(vdev, );
@@ -1016,6 +1017,10 @@ static int vfio_pci_mmap(void *device_data, struct 
vm_area_struct *vma)
return -EINVAL;
 
phys_len = pci_resource_len(pdev, index);
+
+   if (!pci_resources_share_page(pdev, index))
+   phys_len = PAGE_ALIGN(phys_len);
+
req_len = vma->vm_end - vma->vm_start;
pgoff = vma->vm_pgoff &
((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
-- 
1.7.9.5



[RFC PATCH v4 7/7] powerpc/powernv/pci-ioda: Add IOMMU_CAP_INTR_REMAP for IODA host bridge

2016-03-06 Thread Yongji Xie
This patch adds IOMMU_CAP_INTR_REMAP for IODA host bridge so that
we can mmap MSI-X table in vfio driver.

Signed-off-by: Yongji Xie 
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index f90dc04..f01b9ab 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1955,6 +1955,20 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.free = pnv_ioda2_table_free,
 };
 
+static bool pnv_ioda_iommu_capable(enum iommu_cap cap)
+{
+   switch (cap) {
+   case IOMMU_CAP_INTR_REMAP:
+   return true;
+   default:
+   return false;
+   }
+}
+
+static struct iommu_ops pnv_ioda_iommu_ops = {
+   .capable = pnv_ioda_iommu_capable,
+};
+
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
  struct pnv_ioda_pe *pe, unsigned int base,
  unsigned int segs)
@@ -3078,6 +3092,9 @@ static void pnv_pci_ioda_fixup(void)
 
/* Link NPU IODA tables to their PCI devices. */
pnv_npu_ioda_fixup();
+
+   /* Add IOMMU_CAP_INTR_REMAP */
+   bus_set_iommu(_bus_type, _ioda_iommu_ops);
 }
 
 /*
-- 
1.7.9.5



[RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-06 Thread Yongji Xie
When vfio passthrough a PCI device of which MMIO BARs
are smaller than PAGE_SIZE, guest will not handle the
mmio accesses to the BARs which leads to mmio emulations
in host.

This is because vfio will not allow to passthrough one
BAR's mmio page which may be shared with other BARs.

To solve this performance issue, this patch modifies
resource_alignment to support syntax where multiple
devices get the same alignment. So we can use something
like "pci=resource_alignment=*:*:*.*:noresize" to
enforce the alignment of all MMIO BARs to be at least
PAGE_SIZE so that one BAR's mmio page would not be
shared with other BARs.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 +
 drivers/pci/pci.c   |   90 ++-
 include/linux/pci.h |4 ++
 3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 8028631..74b38ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 760cce5..44ab59f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -102,6 +102,8 @@ unsigned int pcibios_max_latency = 255;
 /* If set, the PCIe ARI capability will not be used. */
 static bool pcie_ari_disabled;
 
+bool pci_resources_page_aligned;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -4604,6 +4606,7 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
char *p;
+   bool invalid = false;
 
spin_lock(_alignment_lock);
p = resource_alignment_param;
@@ -4615,16 +4618,49 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
} else {
align_order = -1;
}
-   if (sscanf(p, "%x:%x:%x.%x%n",
-   , , , , ) != 4) {
+   if (p[0] == '*' && p[1] == ':') {
+   seg = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != ':') {
+   invalid = true;
+   break;
+   }
+   p += count + 1;
+   if (*p == '*') {
+   bus = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1) {
+   invalid = true;
+   break;
+   }
+   p += count;
+   if (*p == '.') {
+   slot = bus;
+   bus = seg;
seg = 0;
-   if (sscanf(p, "%x:%x.%x%n",
-   , , , ) != 3) {
-   /* Invalid format */
-   printk(KERN_ERR "PCI: Can't parse 
resource_alignment parameter: %s\n",
-   p);
+   p++;
+   } else if (*p == ':') {
+   p++;
+   if (p[0] == '*' && p[1] == '.') {
+   slot = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1 ||
+   p[count] != '.') {
+   invalid = true;
break;
}
+   p += count + 1;
+   } else {
+   invalid = true;
+   break;
+   }
+   if (*p == '*') {
+   func = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", , ) != 1) {
+   invalid = true;
+   break;
}
p += count;
if (!strncmp(p, ":noresize", 9)) {
@@ -4632,23 +4668,34 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
p += 9;
} else
*resize = true;
-   if (seg == 

[RFC PATCH v4 1/7] PCI: Add a new option for resource_alignment to reassign alignment

2016-03-06 Thread Yongji Xie
When using resource_alignment kernel parameter, the current
implement reassigns the alignment by changing resources' size
which can potentially break some drivers.

So this patch adds a new option "noresize" for the parameter
to solve this problem.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |5 -
 drivers/pci/pci.c   |   36 +--
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 9a53c92..d8b29ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2912,13 +2912,16 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
window. The default value is 64 megabytes.
resource_alignment=
Format:
-   [@][:]:.[; ...]
+   [@][:]:.
+   [:noresize][; ...]
Specifies alignment and device to reassign
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
+   noresize: Don't change the resources' sizes when
+   reassigning alignment.
ecrc=   Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..760cce5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4598,7 +4598,8 @@ static DEFINE_SPINLOCK(resource_alignment_lock);
  * RETURNS: Resource alignment if it is specified.
  *  Zero if it is not specified.
  */
-static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
+   bool *resize)
 {
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
@@ -4626,6 +4627,11 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev)
}
}
p += count;
+   if (!strncmp(p, ":noresize", 9)) {
+   *resize = false;
+   p += 9;
+   } else
+   *resize = true;
if (seg == pci_domain_nr(dev->bus) &&
bus == dev->bus->number &&
slot == PCI_SLOT(dev->devfn) &&
@@ -4658,11 +4664,12 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
 {
int i;
struct resource *r;
+   bool resize;
resource_size_t align, size;
u16 command;
 
/* check if specified PCI is target device to reassign */
-   align = pci_specified_resource_alignment(dev);
+   align = pci_specified_resource_alignment(dev, );
if (!align)
return;
 
@@ -4684,15 +4691,24 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
if (!(r->flags & IORESOURCE_MEM))
continue;
size = resource_size(r);
-   if (size < align) {
-   size = align;
-   dev_info(>dev,
-   "Rounding up size of resource #%d to %#llx.\n",
-   i, (unsigned long long)size);
+   if (resize) {
+   if (size < align) {
+   size = align;
+   dev_info(>dev,
+   "Rounding up size of resource #%d to 
%#llx.\n",
+   i, (unsigned long long)size);
+   }
+   r->flags |= IORESOURCE_UNSET;
+   r->end = size - 1;
+   r->start = 0;
+   } else {
+   if (size > align)
+   align = size;
+   r->flags &= ~IORESOURCE_SIZEALIGN;
+   r->flags |= IORESOURCE_STARTALIGN | IORESOURCE_UNSET;
+   r->start = align;
+   r->end = r->start + size - 1;
}
-   r->flags |= IORESOURCE_UNSET;
-   r->end = size - 1;
-   r->start = 0;
}
/* Need to disable bridge's resource window,
 * to enable the kernel to reassign new resource
-- 
1.7.9.5



[RFC PATCH v4 1/7] PCI: Add a new option for resource_alignment to reassign alignment

2016-03-06 Thread Yongji Xie
When using resource_alignment kernel parameter, the current
implement reassigns the alignment by changing resources' size
which can potentially break some drivers.

So this patch adds a new option "noresize" for the parameter
to solve this problem.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |5 -
 drivers/pci/pci.c   |   36 +--
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 9a53c92..d8b29ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2912,13 +2912,16 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
window. The default value is 64 megabytes.
resource_alignment=
Format:
-   [@][:]:.[; ...]
+   [@][:]:.
+   [:noresize][; ...]
Specifies alignment and device to reassign
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
+   noresize: Don't change the resources' sizes when
+   reassigning alignment.
ecrc=   Enable/disable PCIe ECRC (transaction layer
end-to-end CRC checking).
bios: Use BIOS/firmware settings. This is the
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 602eb42..760cce5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4598,7 +4598,8 @@ static DEFINE_SPINLOCK(resource_alignment_lock);
  * RETURNS: Resource alignment if it is specified.
  *  Zero if it is not specified.
  */
-static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev)
+static resource_size_t pci_specified_resource_alignment(struct pci_dev *dev,
+   bool *resize)
 {
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
@@ -4626,6 +4627,11 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev)
}
}
p += count;
+   if (!strncmp(p, ":noresize", 9)) {
+   *resize = false;
+   p += 9;
+   } else
+   *resize = true;
if (seg == pci_domain_nr(dev->bus) &&
bus == dev->bus->number &&
slot == PCI_SLOT(dev->devfn) &&
@@ -4658,11 +4664,12 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
 {
int i;
struct resource *r;
+   bool resize;
resource_size_t align, size;
u16 command;
 
/* check if specified PCI is target device to reassign */
-   align = pci_specified_resource_alignment(dev);
+   align = pci_specified_resource_alignment(dev, );
if (!align)
return;
 
@@ -4684,15 +4691,24 @@ void pci_reassigndev_resource_alignment(struct pci_dev 
*dev)
if (!(r->flags & IORESOURCE_MEM))
continue;
size = resource_size(r);
-   if (size < align) {
-   size = align;
-   dev_info(>dev,
-   "Rounding up size of resource #%d to %#llx.\n",
-   i, (unsigned long long)size);
+   if (resize) {
+   if (size < align) {
+   size = align;
+   dev_info(>dev,
+   "Rounding up size of resource #%d to 
%#llx.\n",
+   i, (unsigned long long)size);
+   }
+   r->flags |= IORESOURCE_UNSET;
+   r->end = size - 1;
+   r->start = 0;
+   } else {
+   if (size > align)
+   align = size;
+   r->flags &= ~IORESOURCE_SIZEALIGN;
+   r->flags |= IORESOURCE_STARTALIGN | IORESOURCE_UNSET;
+   r->start = align;
+   r->end = r->start + size - 1;
}
-   r->flags |= IORESOURCE_UNSET;
-   r->end = size - 1;
-   r->start = 0;
}
/* Need to disable bridge's resource window,
 * to enable the kernel to reassign new resource
-- 
1.7.9.5



[RFC PATCH v4 2/7] PCI: Use IORESOURCE_WINDOW to identify bridge resources

2016-03-06 Thread Yongji Xie
Now we use the IORESOURCE_STARTALIGN to identify bridge
resources in __assign_resources_sorted(). But there would
be some problems because some PCI devices' resources may
also use IORESOURCE_STARTALIGN, e.g. using "noresize"
option of resource_alignment kernel parameter.

So this patch replaces IORESOURCE_STARTALIGN with
IORESOURCE_WINDOW.

Signed-off-by: Yongji Xie 
---
 drivers/pci/setup-bus.c |   21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7796d0a..4ff10ca 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -411,11 +411,11 @@ static void __assign_resources_sorted(struct list_head 
*head,
 
/*
 * There are two kinds of additional resources in the list:
-* 1. bridge resource  -- IORESOURCE_STARTALIGN
-* 2. SR-IOV resource   -- IORESOURCE_SIZEALIGN
+* 1. bridge resource -- IORESOURCE_WINDOW
+* 2. SR-IOV resource
 * Here just fix the additional alignment for bridge
 */
-   if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+   if (!(dev_res->res->flags & IORESOURCE_WINDOW))
continue;
 
add_align = get_res_add_align(realloc_head, dev_res->res);
@@ -956,7 +956,7 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
 
b_res->start = min_align;
b_res->end = b_res->start + size0 - 1;
-   b_res->flags |= IORESOURCE_STARTALIGN;
+   b_res->flags |= IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (size1 > size0 && realloc_head) {
add_to_list(realloc_head, bus->self, b_res, size1-size0,
min_align);
@@ -1104,7 +1104,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned 
long mask,
}
b_res->start = min_align;
b_res->end = size0 + min_align - 1;
-   b_res->flags |= IORESOURCE_STARTALIGN;
+   b_res->flags |= IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (size1 > size0 && realloc_head) {
add_to_list(realloc_head, bus->self, b_res, size1-size0, 
add_align);
dev_printk(KERN_DEBUG, >self->dev, "bridge window %pR to 
%pR add_size %llx add_align %llx\n",
@@ -1140,7 +1140,8 @@ static void pci_bus_size_cardbus(struct pci_bus *bus,
 */
b_res[0].start = pci_cardbus_io_size;
b_res[0].end = b_res[0].start + pci_cardbus_io_size - 1;
-   b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+   b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[0].end -= pci_cardbus_io_size;
add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size,
@@ -1152,7 +1153,8 @@ handle_b_res_1:
goto handle_b_res_2;
b_res[1].start = pci_cardbus_io_size;
b_res[1].end = b_res[1].start + pci_cardbus_io_size - 1;
-   b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+   b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[1].end -= pci_cardbus_io_size;
add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size,
@@ -1190,7 +1192,7 @@ handle_b_res_2:
b_res[2].start = pci_cardbus_mem_size;
b_res[2].end = b_res[2].start + pci_cardbus_mem_size - 1;
b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH |
- IORESOURCE_STARTALIGN;
+ IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (realloc_head) {
b_res[2].end -= pci_cardbus_mem_size;
add_to_list(realloc_head, bridge, b_res+2,
@@ -1206,7 +1208,8 @@ handle_b_res_3:
goto handle_done;
b_res[3].start = pci_cardbus_mem_size;
b_res[3].end = b_res[3].start + b_res_3_size - 1;
-   b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN;
+   b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[3].end -= b_res_3_size;
add_to_list(realloc_head, bridge, b_res+3, b_res_3_size,
-- 
1.7.9.5



[RFC PATCH v4 2/7] PCI: Use IORESOURCE_WINDOW to identify bridge resources

2016-03-06 Thread Yongji Xie
Now we use the IORESOURCE_STARTALIGN to identify bridge
resources in __assign_resources_sorted(). But there would
be some problems because some PCI devices' resources may
also use IORESOURCE_STARTALIGN, e.g. using "noresize"
option of resource_alignment kernel parameter.

So this patch replaces IORESOURCE_STARTALIGN with
IORESOURCE_WINDOW.

Signed-off-by: Yongji Xie 
---
 drivers/pci/setup-bus.c |   21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7796d0a..4ff10ca 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -411,11 +411,11 @@ static void __assign_resources_sorted(struct list_head 
*head,
 
/*
 * There are two kinds of additional resources in the list:
-* 1. bridge resource  -- IORESOURCE_STARTALIGN
-* 2. SR-IOV resource   -- IORESOURCE_SIZEALIGN
+* 1. bridge resource -- IORESOURCE_WINDOW
+* 2. SR-IOV resource
 * Here just fix the additional alignment for bridge
 */
-   if (!(dev_res->res->flags & IORESOURCE_STARTALIGN))
+   if (!(dev_res->res->flags & IORESOURCE_WINDOW))
continue;
 
add_align = get_res_add_align(realloc_head, dev_res->res);
@@ -956,7 +956,7 @@ static void pbus_size_io(struct pci_bus *bus, 
resource_size_t min_size,
 
b_res->start = min_align;
b_res->end = b_res->start + size0 - 1;
-   b_res->flags |= IORESOURCE_STARTALIGN;
+   b_res->flags |= IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (size1 > size0 && realloc_head) {
add_to_list(realloc_head, bus->self, b_res, size1-size0,
min_align);
@@ -1104,7 +1104,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned 
long mask,
}
b_res->start = min_align;
b_res->end = size0 + min_align - 1;
-   b_res->flags |= IORESOURCE_STARTALIGN;
+   b_res->flags |= IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (size1 > size0 && realloc_head) {
add_to_list(realloc_head, bus->self, b_res, size1-size0, 
add_align);
dev_printk(KERN_DEBUG, >self->dev, "bridge window %pR to 
%pR add_size %llx add_align %llx\n",
@@ -1140,7 +1140,8 @@ static void pci_bus_size_cardbus(struct pci_bus *bus,
 */
b_res[0].start = pci_cardbus_io_size;
b_res[0].end = b_res[0].start + pci_cardbus_io_size - 1;
-   b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+   b_res[0].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[0].end -= pci_cardbus_io_size;
add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size,
@@ -1152,7 +1153,8 @@ handle_b_res_1:
goto handle_b_res_2;
b_res[1].start = pci_cardbus_io_size;
b_res[1].end = b_res[1].start + pci_cardbus_io_size - 1;
-   b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN;
+   b_res[1].flags |= IORESOURCE_IO | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[1].end -= pci_cardbus_io_size;
add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size,
@@ -1190,7 +1192,7 @@ handle_b_res_2:
b_res[2].start = pci_cardbus_mem_size;
b_res[2].end = b_res[2].start + pci_cardbus_mem_size - 1;
b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH |
- IORESOURCE_STARTALIGN;
+ IORESOURCE_STARTALIGN | IORESOURCE_WINDOW;
if (realloc_head) {
b_res[2].end -= pci_cardbus_mem_size;
add_to_list(realloc_head, bridge, b_res+2,
@@ -1206,7 +1208,8 @@ handle_b_res_3:
goto handle_done;
b_res[3].start = pci_cardbus_mem_size;
b_res[3].end = b_res[3].start + b_res_3_size - 1;
-   b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN;
+   b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_STARTALIGN |
+ IORESOURCE_WINDOW;
if (realloc_head) {
b_res[3].end -= b_res_3_size;
add_to_list(realloc_head, bridge, b_res+3, b_res_3_size,
-- 
1.7.9.5



[RFC PATCH v4 0/7] vfio-pci: Allow to mmap sub-page MMIO BARs and MSI-X table

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs and MSI-X table. This is because
sub-page BARs' mmio page may be shared with other BARs and MSI-X table
should not be accessed directly from the guest for security reasons.

But these will easily cause some performance issues for mmio accesses
in guest when vfio passthrough sub-page BARs or BARs containing MSI-X
table on PPC64 platform. This is because PAGE_SIZE is 64KB by default
on PPC64 platform and the big page may easily hit the sub-page MMIO
BARs' unmmapping and cause the unmmaping of the mmio page which
MSI-X table locate in, which lead to mmio emulation in host.

For sub-page MMIO BARs' unmmapping, this patchset modifies
resource_alignment kernel parameter to enforce the alignment of all
MMIO BARs to be at least PAGE_SZIE so that sub-page BAR's mmio page
will not be shared with other BARs. Then we can mmap sub-page MMIO BARs
in vfio-pci driver with the modified resource_alignment.

For MSI-X table's unmmapping, we think MSI-X table is safe to access
directly from userspace if PCI host bridge support filtering of MSIs
which can ensure that a given pci device can only shoot the MSIs
assigned for it. So we allow to mmap MSI-X table if
IOMMU_CAP_INTR_REMAP was set. And we add IOMMU_CAP_INTR_REMAP for
IODA host bridge on PPC64 platform.

With this patchset applied, we can get almost 100% improvement on
performance for mmio accesses when we passthrough sub-page BARs to guest
in our test.

The two vfio related patches(patch 5 and patch 6) are based on the
proposed patchset[1].

Changelog v4:
- Rebase on v4.5-rc6 with patchset[1] applied.
- Remove resource_page_aligned kernel parameter
- Fix some problems with resource_alignment kernel parameter
- Modify resource_alignment kernel parameter to support multiple
  devices.
- Remove host bridge attribute: msi_filtered
- Use IOMMU_CAP_INTR_REMAP to check if MSI-X table can be mmapped
- Add IOMMU_CAP_INTR_REMAP for IODA host bridge on PPC64 platform

Changelog v3:
- Rebase on new linux kernel mainline with the patchset[1] applied.
- Add a function to check whether PCI BARs'mmio page is shared with
  other BARs.
- Add a host bridge attribute to indicate PCI host bridge support
  filtering of MSIs.
- Use the new host bridge attribute to check if MSI-X table can
  be mmapped instead of CONFIG_EEH.
- Remove Kconfig option VFIO_PCI_MMAP_MSIX

Changelog v2:
- Rebase on v4.4-rc6 with the patchset[1] applied.
- Use kernel parameter to enforce all MMIO BARs to be page aligned
  on PCI core code instead of doing it on PPC64 arch code.
- Remove flags: VFIO_DEVICE_FLAGS_PCI_PAGE_ALIGNED
VFIO_DEVICE_FLAGS_PCI_MSIX_MMAP
- Add a Kconfig option to support for mmapping MSI-X table.

[1] http://www.spinics.net/lists/kvm/msg127812.html

Yongji Xie (7):
  PCI: Add a new option for resource_alignment to reassign alignment
  PCI: Use IORESOURCE_WINDOW to identify bridge resources
  PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set
  PCI: Modify resource_alignment to support multiple devices
  vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  vfio-pci: Allow to mmap MSI-X table if IOMMU_CAP_INTR_REMAP was set
  powerpc/powernv/pci-ioda: Add IOMMU_CAP_INTR_REMAP for IODA host bridge

 Documentation/kernel-parameters.txt   |9 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c |   17 
 drivers/pci/pci.c |  126 -
 drivers/pci/probe.c   |3 +-
 drivers/pci/setup-bus.c   |   21 ++---
 drivers/vfio/pci/vfio_pci.c   |   15 +++-
 drivers/vfio/pci/vfio_pci_rdwr.c  |4 +-
 include/linux/pci.h   |4 +
 8 files changed, 162 insertions(+), 37 deletions(-)

-- 
1.7.9.5



[RFC PATCH v4 0/7] vfio-pci: Allow to mmap sub-page MMIO BARs and MSI-X table

2016-03-06 Thread Yongji Xie
Current vfio-pci implementation disallows to mmap
sub-page(size < PAGE_SIZE) MMIO BARs and MSI-X table. This is because
sub-page BARs' mmio page may be shared with other BARs and MSI-X table
should not be accessed directly from the guest for security reasons.

But these will easily cause some performance issues for mmio accesses
in guest when vfio passthrough sub-page BARs or BARs containing MSI-X
table on PPC64 platform. This is because PAGE_SIZE is 64KB by default
on PPC64 platform and the big page may easily hit the sub-page MMIO
BARs' unmmapping and cause the unmmaping of the mmio page which
MSI-X table locate in, which lead to mmio emulation in host.

For sub-page MMIO BARs' unmmapping, this patchset modifies
resource_alignment kernel parameter to enforce the alignment of all
MMIO BARs to be at least PAGE_SZIE so that sub-page BAR's mmio page
will not be shared with other BARs. Then we can mmap sub-page MMIO BARs
in vfio-pci driver with the modified resource_alignment.

For MSI-X table's unmmapping, we think MSI-X table is safe to access
directly from userspace if PCI host bridge support filtering of MSIs
which can ensure that a given pci device can only shoot the MSIs
assigned for it. So we allow to mmap MSI-X table if
IOMMU_CAP_INTR_REMAP was set. And we add IOMMU_CAP_INTR_REMAP for
IODA host bridge on PPC64 platform.

With this patchset applied, we can get almost 100% improvement on
performance for mmio accesses when we passthrough sub-page BARs to guest
in our test.

The two vfio related patches(patch 5 and patch 6) are based on the
proposed patchset[1].

Changelog v4:
- Rebase on v4.5-rc6 with patchset[1] applied.
- Remove resource_page_aligned kernel parameter
- Fix some problems with resource_alignment kernel parameter
- Modify resource_alignment kernel parameter to support multiple
  devices.
- Remove host bridge attribute: msi_filtered
- Use IOMMU_CAP_INTR_REMAP to check if MSI-X table can be mmapped
- Add IOMMU_CAP_INTR_REMAP for IODA host bridge on PPC64 platform

Changelog v3:
- Rebase on new linux kernel mainline with the patchset[1] applied.
- Add a function to check whether PCI BARs'mmio page is shared with
  other BARs.
- Add a host bridge attribute to indicate PCI host bridge support
  filtering of MSIs.
- Use the new host bridge attribute to check if MSI-X table can
  be mmapped instead of CONFIG_EEH.
- Remove Kconfig option VFIO_PCI_MMAP_MSIX

Changelog v2:
- Rebase on v4.4-rc6 with the patchset[1] applied.
- Use kernel parameter to enforce all MMIO BARs to be page aligned
  on PCI core code instead of doing it on PPC64 arch code.
- Remove flags: VFIO_DEVICE_FLAGS_PCI_PAGE_ALIGNED
VFIO_DEVICE_FLAGS_PCI_MSIX_MMAP
- Add a Kconfig option to support for mmapping MSI-X table.

[1] http://www.spinics.net/lists/kvm/msg127812.html

Yongji Xie (7):
  PCI: Add a new option for resource_alignment to reassign alignment
  PCI: Use IORESOURCE_WINDOW to identify bridge resources
  PCI: Ignore resource_alignment if PCI_PROBE_ONLY was set
  PCI: Modify resource_alignment to support multiple devices
  vfio-pci: Allow to mmap sub-page MMIO BARs if the mmio page is exclusive
  vfio-pci: Allow to mmap MSI-X table if IOMMU_CAP_INTR_REMAP was set
  powerpc/powernv/pci-ioda: Add IOMMU_CAP_INTR_REMAP for IODA host bridge

 Documentation/kernel-parameters.txt   |9 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c |   17 
 drivers/pci/pci.c |  126 -
 drivers/pci/probe.c   |3 +-
 drivers/pci/setup-bus.c   |   21 ++---
 drivers/vfio/pci/vfio_pci.c   |   15 +++-
 drivers/vfio/pci/vfio_pci_rdwr.c  |4 +-
 include/linux/pci.h   |4 +
 8 files changed, 162 insertions(+), 37 deletions(-)

-- 
1.7.9.5



Applied "regulator: pv88090: fix incorrect clear of event register" to the regulator tree

2016-03-06 Thread Mark Brown
The patch

   regulator: pv88090: fix incorrect clear of event register

has been applied to the regulator tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 623f7b933968b159081559baca56d98ed4b60e33 Mon Sep 17 00:00:00 2001
From: James Ban 
Date: Mon, 7 Mar 2016 16:08:20 +0900
Subject: [PATCH] regulator: pv88090: fix incorrect clear of event register

This is a patch to fix incorrect clear of event register.

Signed-off-by: James Ban 
Signed-off-by: Mark Brown 
---
 drivers/regulator/pv88090-regulator.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/pv88090-regulator.c 
b/drivers/regulator/pv88090-regulator.c
index ac15f31b5fe0..0057c6740d6f 100644
--- a/drivers/regulator/pv88090-regulator.c
+++ b/drivers/regulator/pv88090-regulator.c
@@ -283,8 +283,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_VDD_FLT, PV88090_E_VDD_FLT);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_VDD_FLT);
if (err < 0)
goto error_i2c;
 
@@ -300,8 +300,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_OVER_TEMP, PV88090_E_OVER_TEMP);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_OVER_TEMP);
if (err < 0)
goto error_i2c;
 
-- 
2.7.0



Applied "regulator: pv88090: fix incorrect clear of event register" to the regulator tree

2016-03-06 Thread Mark Brown
The patch

   regulator: pv88090: fix incorrect clear of event register

has been applied to the regulator tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 623f7b933968b159081559baca56d98ed4b60e33 Mon Sep 17 00:00:00 2001
From: James Ban 
Date: Mon, 7 Mar 2016 16:08:20 +0900
Subject: [PATCH] regulator: pv88090: fix incorrect clear of event register

This is a patch to fix incorrect clear of event register.

Signed-off-by: James Ban 
Signed-off-by: Mark Brown 
---
 drivers/regulator/pv88090-regulator.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/pv88090-regulator.c 
b/drivers/regulator/pv88090-regulator.c
index ac15f31b5fe0..0057c6740d6f 100644
--- a/drivers/regulator/pv88090-regulator.c
+++ b/drivers/regulator/pv88090-regulator.c
@@ -283,8 +283,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_VDD_FLT, PV88090_E_VDD_FLT);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_VDD_FLT);
if (err < 0)
goto error_i2c;
 
@@ -300,8 +300,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_OVER_TEMP, PV88090_E_OVER_TEMP);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_OVER_TEMP);
if (err < 0)
goto error_i2c;
 
-- 
2.7.0



Re: Applied "regulator: max8973: add support for junction thermal warning" to the regulator tree

2016-03-06 Thread Mark Brown
On Mon, Mar 07, 2016 at 12:06:46PM +0530, Laxman Dewangan wrote:

> Following will not help
> depends on THERMAL_OF if THERMAL_OF
> because THERMAL_OF is always "y" even if THERMAL is "m".

> Build error can by resolved by adding below in the Kconfig
> depends on THERMAL

> but the issue is if THERMAL is "m" and  REGULATOR_MAX8973 is "y" as per the

So that should be depends on THERMAL if THERMAL_OF

> failure rand config then REGULATOR_MAX8973 automatically become "m". This
> may break some existing platform.

That's an inevitable consequence of adding this support, you can't get
around it.

> Also this driver does not need hard dependency in the thermal as max8973
> does not support thermal but max77621 supports it which is again optional.

> Some of driver use
> drivers/power/charger-manager.c:#ifdef CONFIG_THERMAL
> drivers/power/power_supply_core.c:#ifdef CONFIG_THERMAL

> So can we give the similar try here and test for build?

This is still a hack; if this is causing real problems the thermal
subsystem should be doing something to avoid the issue (for example
providing an always built in stub) though I suspect in reality it's not
a practical issue.


signature.asc
Description: PGP signature


Re: Applied "regulator: max8973: add support for junction thermal warning" to the regulator tree

2016-03-06 Thread Mark Brown
On Mon, Mar 07, 2016 at 12:06:46PM +0530, Laxman Dewangan wrote:

> Following will not help
> depends on THERMAL_OF if THERMAL_OF
> because THERMAL_OF is always "y" even if THERMAL is "m".

> Build error can by resolved by adding below in the Kconfig
> depends on THERMAL

> but the issue is if THERMAL is "m" and  REGULATOR_MAX8973 is "y" as per the

So that should be depends on THERMAL if THERMAL_OF

> failure rand config then REGULATOR_MAX8973 automatically become "m". This
> may break some existing platform.

That's an inevitable consequence of adding this support, you can't get
around it.

> Also this driver does not need hard dependency in the thermal as max8973
> does not support thermal but max77621 supports it which is again optional.

> Some of driver use
> drivers/power/charger-manager.c:#ifdef CONFIG_THERMAL
> drivers/power/power_supply_core.c:#ifdef CONFIG_THERMAL

> So can we give the similar try here and test for build?

This is still a hack; if this is causing real problems the thermal
subsystem should be doing something to avoid the issue (for example
providing an always built in stub) though I suspect in reality it's not
a practical issue.


signature.asc
Description: PGP signature


Re: [PATCH] gpio: xgene: Fix kconfig for standby GIPO contoller

2016-03-06 Thread Linus Walleij
On Fri, Mar 4, 2016 at 6:42 PM, Matthias Brugger  wrote:

> The standby GPIO controller can be used as a interrupt controller.
> Select GPIOLIB_IRQCHIP when compiling this driver. Otherwise we get
> a compilation error:
>
> drivers/gpio/gpio-xgene-sb.c: In function 'xgene_gpio_sb_probe':
> drivers/gpio/gpio-xgene-sb.c:312:10: error: 'struct gpio_chip' has no member 
> named 'irqdomain'
>   priv->gc.irqdomain = priv->irq_domain;
>   ^
> scripts/Makefile.build:295: recipe for target 'drivers/gpio/gpio-xgene-sb.o' 
> failed
> make[2]: *** [drivers/gpio/gpio-xgene-sb.o] Error 1
>
> Apart if compiled as module, we get the following modpost errors:
> ERROR: "irq_chip_eoi_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_unmask_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_mask_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_domain_create_hierarchy" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "gpiochip_get_data" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_set_type_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_domain_alloc_irqs_parent" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "irq_domain_set_hwirq_and_chip" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "irq_domain_reset_irq_data" [drivers/gpio/gpio-xgene-sb.ko] undefined!
>
> This patch changes the kconfig so that the gpio controller can only be
> build-in and selects GPIOLIB_IRQCHIP.
>
> Fixes: 1013fc41 "gpio: xgene: Enable X-Gene standby GPIO as interrupt 
> controller"
> Cc: Quan Nguyen 
> Signed-off-by: Matthias Brugger 
> ---
>
> Changes for v2:
> - Add modprobe fix, changing tristate to bool
> - Add Cc tag

Quan has sent a patch exporting the offending functions, but no reply from
the irqchip maintainers yet.

If they don't react we'll have to merge this and handle the module loading
later.

Yours,
Linus Walleij


Re: [PATCH] gpio: xgene: Fix kconfig for standby GIPO contoller

2016-03-06 Thread Linus Walleij
On Fri, Mar 4, 2016 at 6:42 PM, Matthias Brugger  wrote:

> The standby GPIO controller can be used as a interrupt controller.
> Select GPIOLIB_IRQCHIP when compiling this driver. Otherwise we get
> a compilation error:
>
> drivers/gpio/gpio-xgene-sb.c: In function 'xgene_gpio_sb_probe':
> drivers/gpio/gpio-xgene-sb.c:312:10: error: 'struct gpio_chip' has no member 
> named 'irqdomain'
>   priv->gc.irqdomain = priv->irq_domain;
>   ^
> scripts/Makefile.build:295: recipe for target 'drivers/gpio/gpio-xgene-sb.o' 
> failed
> make[2]: *** [drivers/gpio/gpio-xgene-sb.o] Error 1
>
> Apart if compiled as module, we get the following modpost errors:
> ERROR: "irq_chip_eoi_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_unmask_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_mask_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_domain_create_hierarchy" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "gpiochip_get_data" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_chip_set_type_parent" [drivers/gpio/gpio-xgene-sb.ko] undefined!
> ERROR: "irq_domain_alloc_irqs_parent" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "irq_domain_set_hwirq_and_chip" [drivers/gpio/gpio-xgene-sb.ko] 
> undefined!
> ERROR: "irq_domain_reset_irq_data" [drivers/gpio/gpio-xgene-sb.ko] undefined!
>
> This patch changes the kconfig so that the gpio controller can only be
> build-in and selects GPIOLIB_IRQCHIP.
>
> Fixes: 1013fc41 "gpio: xgene: Enable X-Gene standby GPIO as interrupt 
> controller"
> Cc: Quan Nguyen 
> Signed-off-by: Matthias Brugger 
> ---
>
> Changes for v2:
> - Add modprobe fix, changing tristate to bool
> - Add Cc tag

Quan has sent a patch exporting the offending functions, but no reply from
the irqchip maintainers yet.

If they don't react we'll have to merge this and handle the module loading
later.

Yours,
Linus Walleij


Re: [PATCH] genirq: Export IRQ functions for module use

2016-03-06 Thread Linus Walleij
On Thu, Mar 3, 2016 at 9:56 PM, Quan Nguyen  wrote:

> Export list of functions below to build gpio-xgene-sb driver as module
> + irq_chip_*_parent()
> + irq_domain_create_hierarchy(),
> + irq_domain_set_hwirq_and_chip(),
> + irq_domain_reset_irq_data(),
> + irq_domain_alloc/free_irqs_parent()
>
> Signed-off-by: Quan Nguyen 
> Link: https://lists.01.org/pipermail/kbuild-all/2016-February/017914.html

Seems like the right solution if we should have run-time loaded
IRQchips.

Acked-by: Linus Walleij 

This causes a build error in -next so if it's not solved before the merge window
we may have to mark the xgene standby driver as bool to overcome the problem.

Yours,
Linus Walleij


Re: [PATCH] genirq: Export IRQ functions for module use

2016-03-06 Thread Linus Walleij
On Thu, Mar 3, 2016 at 9:56 PM, Quan Nguyen  wrote:

> Export list of functions below to build gpio-xgene-sb driver as module
> + irq_chip_*_parent()
> + irq_domain_create_hierarchy(),
> + irq_domain_set_hwirq_and_chip(),
> + irq_domain_reset_irq_data(),
> + irq_domain_alloc/free_irqs_parent()
>
> Signed-off-by: Quan Nguyen 
> Link: https://lists.01.org/pipermail/kbuild-all/2016-February/017914.html

Seems like the right solution if we should have run-time loaded
IRQchips.

Acked-by: Linus Walleij 

This causes a build error in -next so if it's not solved before the merge window
we may have to mark the xgene standby driver as bool to overcome the problem.

Yours,
Linus Walleij


Re: [PATCH 5/5] usb: gadget: f_midi: updated copyright

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:13:05 AM GMT+00:00, Felipe Balbi  wrote:
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> Signed-off-by: Felipe F. Tonello 
>>
>>no commit log == no commit
>
> Got it. 
>
>>
>>> ---
>>>  drivers/usb/gadget/function/f_midi.c | 3 +++
>>>  1 file changed, 3 insertions(+)
>>>
>>> diff --git a/drivers/usb/gadget/function/f_midi.c
>>b/drivers/usb/gadget/function/f_midi.c
>>> index 9a9e6112e224..5c7f5c780fda 100644
>>> --- a/drivers/usb/gadget/function/f_midi.c
>>> +++ b/drivers/usb/gadget/function/f_midi.c
>>> @@ -5,6 +5,9 @@
>>>   * Developed for Thumtronics by Grey Innovation
>>>   * Ben Williamson 
>>>   *
>>> + * Copyright (C) 2015,2016 ROLI Ltd.
>>> + * Felipe F. Tonello 
>>
>>Did you check with your company's lawyer that your changes are enough
>>to
>>justify a copyright ?
>
> Yes. Specially if that state machine refractor gets approved. TBH I
> can't see it won't.

okay, so did that same lawyer tell you to change the driver's license ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 5/5] usb: gadget: f_midi: updated copyright

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:13:05 AM GMT+00:00, Felipe Balbi  wrote:
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> Signed-off-by: Felipe F. Tonello 
>>
>>no commit log == no commit
>
> Got it. 
>
>>
>>> ---
>>>  drivers/usb/gadget/function/f_midi.c | 3 +++
>>>  1 file changed, 3 insertions(+)
>>>
>>> diff --git a/drivers/usb/gadget/function/f_midi.c
>>b/drivers/usb/gadget/function/f_midi.c
>>> index 9a9e6112e224..5c7f5c780fda 100644
>>> --- a/drivers/usb/gadget/function/f_midi.c
>>> +++ b/drivers/usb/gadget/function/f_midi.c
>>> @@ -5,6 +5,9 @@
>>>   * Developed for Thumtronics by Grey Innovation
>>>   * Ben Williamson 
>>>   *
>>> + * Copyright (C) 2015,2016 ROLI Ltd.
>>> + * Felipe F. Tonello 
>>
>>Did you check with your company's lawyer that your changes are enough
>>to
>>justify a copyright ?
>
> Yes. Specially if that state machine refractor gets approved. TBH I
> can't see it won't.

okay, so did that same lawyer tell you to change the driver's license ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 4/5] usb: gadget: f_midi: cleanups and typos fixes

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Michal, 
>
> On March 5, 2016 4:28:45 PM GMT+00:00, Michal Nazarewicz  
> wrote:
 On Wed, Mar 02 2016, Felipe F. Tonello wrote:
> @@ -16,7 +16,7 @@
>   *   Copyright (C) 2006 Thumtronics Pty Ltd.
>   *   Ben Williamson 
>   *
> - * Licensed under the GPL-2 or later.
> + * Licensed under the GPLv2.
>>
>>> On March 4, 2016 7:17:31 PM GMT+00:00, Michal Nazarewicz
>> wrote:
 Any particular reason to do that?
>>
>>On Fri, Mar 04 2016, Felipe Ferreri Tonello wrote:
>>> Because the kernel is v2 only and not later. 
>>
>>Linux as a whole is GPLv2 only, but that doesn’t necessarily mean that
>>parts of it cannot be dual licensed (or GPLv2+).  It’s safer to leave
>>copyright noticed clear unless you explicitly want your contribution be
>>GPLv2 only which brings the whole file GPLv2 only.
>>
>>> I just tried to make this driver more consistent with the coding
>>style
>>> used across the kernel. That's it.
>>
>>Column alignment of field names or RHS of assignment operators is quite
>>inconsistent already within drivers/usb/gadget/ which is why I’m
>>concerned whether this is really helping.
>>
>>Anyway, I actually don’t care much, just adding my two rappen.
>
> Right, I am ok with Balbi completely ignoring this patch. But I prefer
> to have at least this driver consistent than nothing. Of course I'll
> remove the license change I made.

consistent in what way ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 4/5] usb: gadget: f_midi: cleanups and typos fixes

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Michal, 
>
> On March 5, 2016 4:28:45 PM GMT+00:00, Michal Nazarewicz  
> wrote:
 On Wed, Mar 02 2016, Felipe F. Tonello wrote:
> @@ -16,7 +16,7 @@
>   *   Copyright (C) 2006 Thumtronics Pty Ltd.
>   *   Ben Williamson 
>   *
> - * Licensed under the GPL-2 or later.
> + * Licensed under the GPLv2.
>>
>>> On March 4, 2016 7:17:31 PM GMT+00:00, Michal Nazarewicz
>> wrote:
 Any particular reason to do that?
>>
>>On Fri, Mar 04 2016, Felipe Ferreri Tonello wrote:
>>> Because the kernel is v2 only and not later. 
>>
>>Linux as a whole is GPLv2 only, but that doesn’t necessarily mean that
>>parts of it cannot be dual licensed (or GPLv2+).  It’s safer to leave
>>copyright noticed clear unless you explicitly want your contribution be
>>GPLv2 only which brings the whole file GPLv2 only.
>>
>>> I just tried to make this driver more consistent with the coding
>>style
>>> used across the kernel. That's it.
>>
>>Column alignment of field names or RHS of assignment operators is quite
>>inconsistent already within drivers/usb/gadget/ which is why I’m
>>concerned whether this is really helping.
>>
>>Anyway, I actually don’t care much, just adding my two rappen.
>
> Right, I am ok with Balbi completely ignoring this patch. But I prefer
> to have at least this driver consistent than nothing. Of course I'll
> remove the license change I made.

consistent in what way ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 2/5] usb: gadget: f_midi: added spinlock on transmit function

2016-03-06 Thread Felipe Balbi

Hi,

(please break your lines at 80-characters, have a look at
Documentation/email-clients.txt if needed)

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:20:10 AM GMT+00:00, Felipe Balbi  wrote:
>>
>>Hi,
>>
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> Since f_midi_transmit is called by both ALSA and USB frameworks, it
>>can
>>> potentially cause a race condition between both calls. This is bad
>>because the
>>> way f_midi_transmit is implemented can't handle concurrent calls.
>>This is due
>>> to the fact that the usb request fifo looks for the next element and
>>only if
>>> it has data to process it enqueues the request, otherwise re-uses it.
>>If both
>>> (ALSA and USB) frameworks calls this function at the same time, the
>>> kfifo_seek() will return the same usb_request, which will cause a
>>race
>>> condition.
>>>
>>> To solve this problem a syncronization mechanism is necessary. In
>>this case it
>>> is used a spinlock since f_midi_transmit is also called by
>>usb_request->complete
>>> callback in interrupt context.
>>>
>>> On benchmarks realized by me, spinlocks were more efficient then
>>scheduling
>>> the f_midi_transmit tasklet in process context and using a mutex to
>>> synchronize. Also it performs better then previous implementation
>>that
>>> allocated a usb_request for every new transmit made.
>>
>>behaves better in what way ? Also, previous implementation would not
>>suffer from this concurrency problem, right ?
>
> The spin lock is faster than allocating usb requests all the time,
> even if the udc uses da for it.

did you measure ? Is the extra speed really necessary ? How did you
benchmark this ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 2/5] usb: gadget: f_midi: added spinlock on transmit function

2016-03-06 Thread Felipe Balbi

Hi,

(please break your lines at 80-characters, have a look at
Documentation/email-clients.txt if needed)

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:20:10 AM GMT+00:00, Felipe Balbi  wrote:
>>
>>Hi,
>>
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> Since f_midi_transmit is called by both ALSA and USB frameworks, it
>>can
>>> potentially cause a race condition between both calls. This is bad
>>because the
>>> way f_midi_transmit is implemented can't handle concurrent calls.
>>This is due
>>> to the fact that the usb request fifo looks for the next element and
>>only if
>>> it has data to process it enqueues the request, otherwise re-uses it.
>>If both
>>> (ALSA and USB) frameworks calls this function at the same time, the
>>> kfifo_seek() will return the same usb_request, which will cause a
>>race
>>> condition.
>>>
>>> To solve this problem a syncronization mechanism is necessary. In
>>this case it
>>> is used a spinlock since f_midi_transmit is also called by
>>usb_request->complete
>>> callback in interrupt context.
>>>
>>> On benchmarks realized by me, spinlocks were more efficient then
>>scheduling
>>> the f_midi_transmit tasklet in process context and using a mutex to
>>> synchronize. Also it performs better then previous implementation
>>that
>>> allocated a usb_request for every new transmit made.
>>
>>behaves better in what way ? Also, previous implementation would not
>>suffer from this concurrency problem, right ?
>
> The spin lock is faster than allocating usb requests all the time,
> even if the udc uses da for it.

did you measure ? Is the extra speed really necessary ? How did you
benchmark this ?

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 3/5] usb: gadget: gmidi: remove bus powered requirement on bmAttributes

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:16:42 AM GMT+00:00, Felipe Balbi  wrote:
>>
>>Hi,
>>
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> This gadget uses a bmAttributes and MaxPower that requires the USB
>>bus to be
>>> powered from the host, which is not correct because this
>>configuration is device
>>> specific, not a USB-MIDI requirement.
>>>
>>> This patch adds two modules parameters, bmAttributes and MaxPower,
>>that allows
>>> the user to set those flags. The default values are what the gadget
>>used to use
>>> for backward compatibility.
>>>
>>> Signed-off-by: Felipe F. Tonello 
>>> ---
>>>  drivers/usb/gadget/legacy/gmidi.c | 14 --
>>>  1 file changed, 12 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/usb/gadget/legacy/gmidi.c
>>b/drivers/usb/gadget/legacy/gmidi.c
>>> index fc2ac150f5ff..0553435cc360 100644
>>> --- a/drivers/usb/gadget/legacy/gmidi.c
>>> +++ b/drivers/usb/gadget/legacy/gmidi.c
>>> @@ -63,6 +63,14 @@ static unsigned int out_ports = 1;
>>>  module_param(out_ports, uint, S_IRUGO);
>>>  MODULE_PARM_DESC(out_ports, "Number of MIDI output ports");
>>>  
>>> +static unsigned int bmAttributes = USB_CONFIG_ATT_ONE;
>>> +module_param(bmAttributes, uint, S_IRUGO);
>>> +MODULE_PARM_DESC(bmAttributes, "Configuration Descriptor's
>>bmAttributes parameter");
>>> +
>>> +static unsigned int MaxPower = CONFIG_USB_GADGET_VBUS_DRAW;
>>> +module_param(MaxPower, uint, S_IRUGO);
>>> +MODULE_PARM_DESC(MaxPower, "Used to calculate Configuration
>>Descriptor's bMaxPower parameter");
>>
>>you didn't run checkpatch, did you ? Also, are you sure you will need
>>to
>>change this by simply reloading the module ? I'm not convinced.
>
> Yes I always run checkpatch :)

do you really ? Why do you have a 98-character line, then ?

> What do you mean by reloading the module?

modprobe g_midi MaxPower=4
modprobe -r g_midi
modprobe g_midi MaxPower=10

I'm not convinced this is a valid use-case. Specially since you can just
provide several configurations and let the host choose the one it suits
it best.

>>> @@ -119,8 +127,8 @@ static struct usb_configuration midi_config = {
>>> .label  = "MIDI Gadget",
>>> .bConfigurationValue = 1,
>>> /* .iConfiguration = DYNAMIC */
>>> -   .bmAttributes   = USB_CONFIG_ATT_ONE,
>>
>>nack, nackety nack nack nack :-)
>>
>>USB_CONFIG_ATT_ONE *must* always be set. With your module parameter you
>>give users the oportunity to violate USB spec.
>
> You are right. It needs to check the value before it assigns to
> bmAttributes.

why check ? You can just unconditionally or USB_CONFIG_ATT_ONE. In any
case, I'm not convinced this is necessary at all.

-- 
balbi


signature.asc
Description: PGP signature


Re: [PATCH 3/5] usb: gadget: gmidi: remove bus powered requirement on bmAttributes

2016-03-06 Thread Felipe Balbi

Hi,

Felipe Ferreri Tonello  writes:
> [ text/plain ]
> Hi Balbi, 
>
> On March 4, 2016 7:16:42 AM GMT+00:00, Felipe Balbi  wrote:
>>
>>Hi,
>>
>>"Felipe F. Tonello"  writes:
>>> [ text/plain ]
>>> This gadget uses a bmAttributes and MaxPower that requires the USB
>>bus to be
>>> powered from the host, which is not correct because this
>>configuration is device
>>> specific, not a USB-MIDI requirement.
>>>
>>> This patch adds two modules parameters, bmAttributes and MaxPower,
>>that allows
>>> the user to set those flags. The default values are what the gadget
>>used to use
>>> for backward compatibility.
>>>
>>> Signed-off-by: Felipe F. Tonello 
>>> ---
>>>  drivers/usb/gadget/legacy/gmidi.c | 14 --
>>>  1 file changed, 12 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/usb/gadget/legacy/gmidi.c
>>b/drivers/usb/gadget/legacy/gmidi.c
>>> index fc2ac150f5ff..0553435cc360 100644
>>> --- a/drivers/usb/gadget/legacy/gmidi.c
>>> +++ b/drivers/usb/gadget/legacy/gmidi.c
>>> @@ -63,6 +63,14 @@ static unsigned int out_ports = 1;
>>>  module_param(out_ports, uint, S_IRUGO);
>>>  MODULE_PARM_DESC(out_ports, "Number of MIDI output ports");
>>>  
>>> +static unsigned int bmAttributes = USB_CONFIG_ATT_ONE;
>>> +module_param(bmAttributes, uint, S_IRUGO);
>>> +MODULE_PARM_DESC(bmAttributes, "Configuration Descriptor's
>>bmAttributes parameter");
>>> +
>>> +static unsigned int MaxPower = CONFIG_USB_GADGET_VBUS_DRAW;
>>> +module_param(MaxPower, uint, S_IRUGO);
>>> +MODULE_PARM_DESC(MaxPower, "Used to calculate Configuration
>>Descriptor's bMaxPower parameter");
>>
>>you didn't run checkpatch, did you ? Also, are you sure you will need
>>to
>>change this by simply reloading the module ? I'm not convinced.
>
> Yes I always run checkpatch :)

do you really ? Why do you have a 98-character line, then ?

> What do you mean by reloading the module?

modprobe g_midi MaxPower=4
modprobe -r g_midi
modprobe g_midi MaxPower=10

I'm not convinced this is a valid use-case. Specially since you can just
provide several configurations and let the host choose the one it suits
it best.

>>> @@ -119,8 +127,8 @@ static struct usb_configuration midi_config = {
>>> .label  = "MIDI Gadget",
>>> .bConfigurationValue = 1,
>>> /* .iConfiguration = DYNAMIC */
>>> -   .bmAttributes   = USB_CONFIG_ATT_ONE,
>>
>>nack, nackety nack nack nack :-)
>>
>>USB_CONFIG_ATT_ONE *must* always be set. With your module parameter you
>>give users the oportunity to violate USB spec.
>
> You are right. It needs to check the value before it assigns to
> bmAttributes.

why check ? You can just unconditionally or USB_CONFIG_ATT_ONE. In any
case, I'm not convinced this is necessary at all.

-- 
balbi


signature.asc
Description: PGP signature


[PATCH 2/2] net: thunderx: Adjust nicvf structure to reduce cache misses

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Adjusted nicvf structure such that all elements used in hot
path like napi, xmit e.t.c fall into same cache line. This reduced
no of cache misses and resulted in ~2% increase in no of packets
handled on a core.

Also modified elements with :1 notation to boolean, to be
consistent with other element definitions.

Signed-off-by: Sunil Goutham 
---
 drivers/net/ethernet/cavium/thunder/nic.h |   52 
 1 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h 
b/drivers/net/ethernet/cavium/thunder/nic.h
index 5628aea..c063d92 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -263,46 +263,54 @@ struct nicvf {
struct nicvf*pnicvf;
struct net_device   *netdev;
struct pci_dev  *pdev;
+   void __iomem*reg_base;
+   struct queue_set*qs;
+   struct nicvf_cq_poll*napi[8];
u8  vf_id;
-   u8  node;
-   u8  tns_mode:1;
-   u8  sqs_mode:1;
-   u8  loopback_supported:1;
+   u8  sqs_id;
+   boolsqs_mode;
boolhw_tso;
-   u16 mtu;
-   struct queue_set*qs;
+
+   /* Receive buffer alloc */
+   u32 rb_page_offset;
+   u16 rb_pageref;
+   boolrb_alloc_fail;
+   boolrb_work_scheduled;
+   struct page *rb_page;
+   struct delayed_work rbdr_work;
+   struct tasklet_struct   rbdr_task;
+
+   /* Secondary Qset */
+   u8  sqs_count;
 #defineMAX_SQS_PER_VF_SINGLE_NODE  5
 #defineMAX_SQS_PER_VF  11
-   u8  sqs_id;
-   u8  sqs_count; /* Secondary Qset count */
struct nicvf*snicvf[MAX_SQS_PER_VF];
+
+   /* Queue count */
u8  rx_queues;
u8  tx_queues;
u8  max_queues;
-   void __iomem*reg_base;
+
+   u8  node;
+   u8  cpi_alg;
+   u16 mtu;
boollink_up;
u8  duplex;
u32 speed;
-   struct page *rb_page;
-   u32 rb_page_offset;
-   u16 rb_pageref;
-   boolrb_alloc_fail;
-   boolrb_work_scheduled;
-   struct delayed_work rbdr_work;
-   struct tasklet_struct   rbdr_task;
-   struct tasklet_struct   qs_err_task;
-   struct tasklet_struct   cq_task;
-   struct nicvf_cq_poll*napi[8];
+   booltns_mode;
+   boolloopback_supported;
struct nicvf_rss_info   rss_info;
-   u8  cpi_alg;
+   struct tasklet_struct   qs_err_task;
+   struct work_struct  reset_task;
+
/* Interrupt coalescing settings */
u32 cq_coalesce_usecs;
-
u32 msg_enable;
+
+   /* Stats */
struct nicvf_hw_stats   hw_stats;
struct nicvf_drv_stats  drv_stats;
struct bgx_statsbgx_stats;
-   struct work_struct  reset_task;
 
/* MSI-X  */
boolmsix_enabled;
-- 
1.7.1



[PATCH 2/2] net: thunderx: Adjust nicvf structure to reduce cache misses

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Adjusted nicvf structure such that all elements used in hot
path like napi, xmit e.t.c fall into same cache line. This reduced
no of cache misses and resulted in ~2% increase in no of packets
handled on a core.

Also modified elements with :1 notation to boolean, to be
consistent with other element definitions.

Signed-off-by: Sunil Goutham 
---
 drivers/net/ethernet/cavium/thunder/nic.h |   52 
 1 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h 
b/drivers/net/ethernet/cavium/thunder/nic.h
index 5628aea..c063d92 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -263,46 +263,54 @@ struct nicvf {
struct nicvf*pnicvf;
struct net_device   *netdev;
struct pci_dev  *pdev;
+   void __iomem*reg_base;
+   struct queue_set*qs;
+   struct nicvf_cq_poll*napi[8];
u8  vf_id;
-   u8  node;
-   u8  tns_mode:1;
-   u8  sqs_mode:1;
-   u8  loopback_supported:1;
+   u8  sqs_id;
+   boolsqs_mode;
boolhw_tso;
-   u16 mtu;
-   struct queue_set*qs;
+
+   /* Receive buffer alloc */
+   u32 rb_page_offset;
+   u16 rb_pageref;
+   boolrb_alloc_fail;
+   boolrb_work_scheduled;
+   struct page *rb_page;
+   struct delayed_work rbdr_work;
+   struct tasklet_struct   rbdr_task;
+
+   /* Secondary Qset */
+   u8  sqs_count;
 #defineMAX_SQS_PER_VF_SINGLE_NODE  5
 #defineMAX_SQS_PER_VF  11
-   u8  sqs_id;
-   u8  sqs_count; /* Secondary Qset count */
struct nicvf*snicvf[MAX_SQS_PER_VF];
+
+   /* Queue count */
u8  rx_queues;
u8  tx_queues;
u8  max_queues;
-   void __iomem*reg_base;
+
+   u8  node;
+   u8  cpi_alg;
+   u16 mtu;
boollink_up;
u8  duplex;
u32 speed;
-   struct page *rb_page;
-   u32 rb_page_offset;
-   u16 rb_pageref;
-   boolrb_alloc_fail;
-   boolrb_work_scheduled;
-   struct delayed_work rbdr_work;
-   struct tasklet_struct   rbdr_task;
-   struct tasklet_struct   qs_err_task;
-   struct tasklet_struct   cq_task;
-   struct nicvf_cq_poll*napi[8];
+   booltns_mode;
+   boolloopback_supported;
struct nicvf_rss_info   rss_info;
-   u8  cpi_alg;
+   struct tasklet_struct   qs_err_task;
+   struct work_struct  reset_task;
+
/* Interrupt coalescing settings */
u32 cq_coalesce_usecs;
-
u32 msg_enable;
+
+   /* Stats */
struct nicvf_hw_stats   hw_stats;
struct nicvf_drv_stats  drv_stats;
struct bgx_statsbgx_stats;
-   struct work_struct  reset_task;
 
/* MSI-X  */
boolmsix_enabled;
-- 
1.7.1



[PATCH 1/2] net: thunderx: Set recevie buffer page usage count in bulk

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Instead of calling get_page() for every receive buffer carved out
of page, set page's usage count at the end, to reduce no of atomic
calls.

Signed-off-by: Sunil Goutham 
---
 drivers/net/ethernet/cavium/thunder/nic.h  |1 +
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |   31 ++-
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h 
b/drivers/net/ethernet/cavium/thunder/nic.h
index 00cc915..5628aea 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -285,6 +285,7 @@ struct nicvf {
u32 speed;
struct page *rb_page;
u32 rb_page_offset;
+   u16 rb_pageref;
boolrb_alloc_fail;
boolrb_work_scheduled;
struct delayed_work rbdr_work;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 0dd1abf..fa05e34 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -18,6 +18,15 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+static void nicvf_get_page(struct nicvf *nic)
+{
+   if (!nic->rb_pageref || !nic->rb_page)
+   return;
+
+   atomic_add(nic->rb_pageref, >rb_page->_count);
+   nic->rb_pageref = 0;
+}
+
 /* Poll a register for a specific value */
 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
  u64 reg, int bit_pos, int bits, int val)
@@ -81,16 +90,15 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, 
gfp_t gfp,
int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
 
/* Check if request can be accomodated in previous allocated page */
-   if (nic->rb_page) {
-   if ((nic->rb_page_offset + buf_len + buf_len) >
-   (PAGE_SIZE << order)) {
-   nic->rb_page = NULL;
-   } else {
-   nic->rb_page_offset += buf_len;
-   get_page(nic->rb_page);
-   }
+   if (nic->rb_page &&
+   ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) {
+   nic->rb_pageref++;
+   goto ret;
}
 
+   nicvf_get_page(nic);
+   nic->rb_page = NULL;
+
/* Allocate a new page */
if (!nic->rb_page) {
nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
@@ -102,7 +110,9 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, 
gfp_t gfp,
nic->rb_page_offset = 0;
}
 
+ret:
*rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+   nic->rb_page_offset += buf_len;
 
return 0;
 }
@@ -158,6 +168,9 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr 
*rbdr,
desc = GET_RBDR_DESC(rbdr, idx);
desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
}
+
+   nicvf_get_page(nic);
+
return 0;
 }
 
@@ -241,6 +254,8 @@ refill:
new_rb++;
}
 
+   nicvf_get_page(nic);
+
/* make sure all memory stores are done before ringing doorbell */
smp_wmb();
 
-- 
1.7.1



[PATCH 1/2] net: thunderx: Set recevie buffer page usage count in bulk

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Instead of calling get_page() for every receive buffer carved out
of page, set page's usage count at the end, to reduce no of atomic
calls.

Signed-off-by: Sunil Goutham 
---
 drivers/net/ethernet/cavium/thunder/nic.h  |1 +
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |   31 ++-
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h 
b/drivers/net/ethernet/cavium/thunder/nic.h
index 00cc915..5628aea 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -285,6 +285,7 @@ struct nicvf {
u32 speed;
struct page *rb_page;
u32 rb_page_offset;
+   u16 rb_pageref;
boolrb_alloc_fail;
boolrb_work_scheduled;
struct delayed_work rbdr_work;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 0dd1abf..fa05e34 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -18,6 +18,15 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+static void nicvf_get_page(struct nicvf *nic)
+{
+   if (!nic->rb_pageref || !nic->rb_page)
+   return;
+
+   atomic_add(nic->rb_pageref, >rb_page->_count);
+   nic->rb_pageref = 0;
+}
+
 /* Poll a register for a specific value */
 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
  u64 reg, int bit_pos, int bits, int val)
@@ -81,16 +90,15 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, 
gfp_t gfp,
int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
 
/* Check if request can be accomodated in previous allocated page */
-   if (nic->rb_page) {
-   if ((nic->rb_page_offset + buf_len + buf_len) >
-   (PAGE_SIZE << order)) {
-   nic->rb_page = NULL;
-   } else {
-   nic->rb_page_offset += buf_len;
-   get_page(nic->rb_page);
-   }
+   if (nic->rb_page &&
+   ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) {
+   nic->rb_pageref++;
+   goto ret;
}
 
+   nicvf_get_page(nic);
+   nic->rb_page = NULL;
+
/* Allocate a new page */
if (!nic->rb_page) {
nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
@@ -102,7 +110,9 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, 
gfp_t gfp,
nic->rb_page_offset = 0;
}
 
+ret:
*rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+   nic->rb_page_offset += buf_len;
 
return 0;
 }
@@ -158,6 +168,9 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr 
*rbdr,
desc = GET_RBDR_DESC(rbdr, idx);
desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
}
+
+   nicvf_get_page(nic);
+
return 0;
 }
 
@@ -241,6 +254,8 @@ refill:
new_rb++;
}
 
+   nicvf_get_page(nic);
+
/* make sure all memory stores are done before ringing doorbell */
smp_wmb();
 
-- 
1.7.1



[PATCH 0/2] net: thunderx: Performance enhancement changes

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Below patches attempts to improve performance by reducing
no of atomic operations while allocating new receive buffers
and reducing cache misses by adjusting nicvf structure elements.

Sunil Goutham (2):
  net: thunderx: Set recevie buffer page usage count in bulk
  net: thunderx: Adjust nicvf structure to reduce cache misses

 drivers/net/ethernet/cavium/thunder/nic.h  |   51 
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |   31 +---
 2 files changed, 53 insertions(+), 29 deletions(-)



[PATCH 0/2] net: thunderx: Performance enhancement changes

2016-03-06 Thread sunil . kovvuri
From: Sunil Goutham 

Below patches attempts to improve performance by reducing
no of atomic operations while allocating new receive buffers
and reducing cache misses by adjusting nicvf structure elements.

Sunil Goutham (2):
  net: thunderx: Set recevie buffer page usage count in bulk
  net: thunderx: Adjust nicvf structure to reduce cache misses

 drivers/net/ethernet/cavium/thunder/nic.h  |   51 
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |   31 +---
 2 files changed, 53 insertions(+), 29 deletions(-)



[PATCH V1] regulator: pv88090: fix incorrect clear of event register

2016-03-06 Thread James Ban

From: James Ban 

This is a patch to fix incorrect clear of event register.

Signed-off-by: James Ban 

---
This patch applies against linux-next and next-20160304 


 drivers/regulator/pv88090-regulator.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/pv88090-regulator.c 
b/drivers/regulator/pv88090-regulator.c
index ac15f31..0057c67 100644
--- a/drivers/regulator/pv88090-regulator.c
+++ b/drivers/regulator/pv88090-regulator.c
@@ -283,8 +283,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_VDD_FLT, PV88090_E_VDD_FLT);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_VDD_FLT);
if (err < 0)
goto error_i2c;
 
@@ -300,8 +300,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_OVER_TEMP, PV88090_E_OVER_TEMP);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_OVER_TEMP);
if (err < 0)
goto error_i2c;
 
-- 
end-of-patch for PATCH V1



Re: [PATCH 1/2] Document: i2c: Add a dt binding for mediatek MT2701 soc

2016-03-06 Thread liguo zhang
On Mon, 2016-01-04 at 10:55 +0100, Wolfram Sang wrote:
> On Mon, Jan 04, 2016 at 02:15:37PM +0800, Liguo Zhang wrote:
> > Add a dt binding for the MT2701 soc.
> > 
> > Signed-off-by: Liguo Zhang 
> 
> This should probably go with the other patch via arm-soc, or?

The patch about i2c dts info for MT2701 depends on the MT2701 clock
patch. After the MT2701 clock patch is accepted, the i2c patch for
MT2701 may be accepted.

The MT2701 clock patch:
http://lists.infradead.org/pipermail/linux-mediatek/2016-January/003489.html

> 
> In that case:
> 
> Acked-by: Wolfram Sang 
> 












[PATCH V1] regulator: pv88090: fix incorrect clear of event register

2016-03-06 Thread James Ban

From: James Ban 

This is a patch to fix incorrect clear of event register.

Signed-off-by: James Ban 

---
This patch applies against linux-next and next-20160304 


 drivers/regulator/pv88090-regulator.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/pv88090-regulator.c 
b/drivers/regulator/pv88090-regulator.c
index ac15f31..0057c67 100644
--- a/drivers/regulator/pv88090-regulator.c
+++ b/drivers/regulator/pv88090-regulator.c
@@ -283,8 +283,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_VDD_FLT, PV88090_E_VDD_FLT);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_VDD_FLT);
if (err < 0)
goto error_i2c;
 
@@ -300,8 +300,8 @@ static irqreturn_t pv88090_irq_handler(int irq, void *data)
}
}
 
-   err = regmap_update_bits(chip->regmap, PV88090_REG_EVENT_A,
-   PV88090_E_OVER_TEMP, PV88090_E_OVER_TEMP);
+   err = regmap_write(chip->regmap, PV88090_REG_EVENT_A,
+   PV88090_E_OVER_TEMP);
if (err < 0)
goto error_i2c;
 
-- 
end-of-patch for PATCH V1



Re: [PATCH 1/2] Document: i2c: Add a dt binding for mediatek MT2701 soc

2016-03-06 Thread liguo zhang
On Mon, 2016-01-04 at 10:55 +0100, Wolfram Sang wrote:
> On Mon, Jan 04, 2016 at 02:15:37PM +0800, Liguo Zhang wrote:
> > Add a dt binding for the MT2701 soc.
> > 
> > Signed-off-by: Liguo Zhang 
> 
> This should probably go with the other patch via arm-soc, or?

The patch about i2c dts info for MT2701 depends on the MT2701 clock
patch. After the MT2701 clock patch is accepted, the i2c patch for
MT2701 may be accepted.

The MT2701 clock patch:
http://lists.infradead.org/pipermail/linux-mediatek/2016-January/003489.html

> 
> In that case:
> 
> Acked-by: Wolfram Sang 
> 












Re: [PATCH resend] PCI: QEMU top-level IDs for (sub)vendor & device

2016-03-06 Thread Daniel Vetter
On Mon, Mar 07, 2016 at 12:09:51AM +0200, Michael S. Tsirkin wrote:
> On Sun, Mar 06, 2016 at 10:02:30PM +, Robin H. Johnson wrote:
> > Introduce PCI_VENDOR/PCI_SUBVENDOR/PCI_SUBDEVICE defines to replace the
> > constants scattered in the kernel already used to detect QEMU.
> > 
> > They are defined in the QEMU codebase per docs/specs/pci-ids.txt.
> > 
> > Signed-off-by: Robin H. Johnson 
> > Reviewed-by: Takashi Iwai 
> > Reviewed-by: Gerd Hoffmann 
> > ---
> 
> Acked-by: Michael S. Tsirkin 
> 
> PCI tree seems to make the most sense.
> Bjorn, can you merge this please?

Yeah, ack for merging through pci.

Acked-by: Daniel Vetter 
-Daniel

> 
> > This change prompted by a near-miss in the review of recent change:
> > 'drm/i915: refine qemu south bridge detection'
> > 
> > This patch was previously sent to LKML 25 Jan 2016; and got some
> > reviews, but otherwise slipped through the cracks.
> > 
> > ---
> >  drivers/gpu/drm/bochs/bochs_drv.c   | 4 ++--
> >  drivers/gpu/drm/cirrus/cirrus_drv.c | 5 +++--
> >  drivers/virtio/virtio_pci_common.c  | 2 +-
> >  include/linux/pci_ids.h | 4 
> >  sound/pci/intel8x0.c| 4 ++--
> >  5 files changed, 12 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/bochs/bochs_drv.c 
> > b/drivers/gpu/drm/bochs/bochs_drv.c
> > index 7f1a360..b332b4d3 100644
> > --- a/drivers/gpu/drm/bochs/bochs_drv.c
> > +++ b/drivers/gpu/drm/bochs/bochs_drv.c
> > @@ -182,8 +182,8 @@ static const struct pci_device_id bochs_pci_tbl[] = {
> > {
> > .vendor  = 0x1234,
> > .device  = 0x,
> > -   .subvendor   = 0x1af4,
> > -   .subdevice   = 0x1100,
> > +   .subvendor   = PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
> > +   .subdevice   = PCI_SUBDEVICE_ID_QEMU,
> > .driver_data = BOCHS_QEMU_STDVGA,
> > },
> > {
> > diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c 
> > b/drivers/gpu/drm/cirrus/cirrus_drv.c
> > index b1619e2..7bc394e 100644
> > --- a/drivers/gpu/drm/cirrus/cirrus_drv.c
> > +++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
> > @@ -33,8 +33,9 @@ static struct drm_driver driver;
> >  
> >  /* only bind to the cirrus chip in qemu */
> >  static const struct pci_device_id pciidlist[] = {
> > -   { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
> > - 0, 0 },
> > +   { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446,
> > + PCI_SUBVENDOR_ID_REDHAT_QUMRANET, PCI_SUBDEVICE_ID_QEMU,
> > + 0, 0, 0 },
> > { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
> >   0x0001, 0, 0, 0 },
> > {0,}
> > diff --git a/drivers/virtio/virtio_pci_common.c 
> > b/drivers/virtio/virtio_pci_common.c
> > index 36205c2..127dfe4 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -467,7 +467,7 @@ static const struct dev_pm_ops virtio_pci_pm_ops = {
> >  
> >  /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
> >  static const struct pci_device_id virtio_pci_id_table[] = {
> > -   { PCI_DEVICE(0x1af4, PCI_ANY_ID) },
> > +   { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
> > { 0 }
> >  };
> >  
> > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> > index 37f05cb..6d249d3 100644
> > --- a/include/linux/pci_ids.h
> > +++ b/include/linux/pci_ids.h
> > @@ -2506,6 +2506,10 @@
> >  
> >  #define PCI_VENDOR_ID_AZWAVE   0x1a3b
> >  
> > +#define PCI_VENDOR_ID_REDHAT_QUMRANET0x1af4
> > +#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
> > +#define PCI_SUBDEVICE_ID_QEMU0x1100
> > +
> >  #define PCI_VENDOR_ID_ASMEDIA  0x1b21
> >  
> >  #define PCI_VENDOR_ID_CIRCUITCO0x1cc8
> > diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
> > index 42bcbac..12c2c18 100644
> > --- a/sound/pci/intel8x0.c
> > +++ b/sound/pci/intel8x0.c
> > @@ -2980,8 +2980,8 @@ static int snd_intel8x0_inside_vm(struct pci_dev *pci)
> > goto fini;
> >  
> > /* check for known (emulated) devices */
> > -   if (pci->subsystem_vendor == 0x1af4 &&
> > -   pci->subsystem_device == 0x1100) {
> > +   if (pci->subsystem_vendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
> > +   pci->subsystem_device == PCI_SUBDEVICE_ID_QEMU) {
> > /* KVM emulated sound, PCI SSID: 1af4:1100 */
> > msg = "enable KVM";
> > } else if (pci->subsystem_vendor == 0x1ab8) {
> > -- 
> > 2.3.0
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 3/4] arm64: dts: Add dts files for LG Electronics's lg1312 SoC

2016-03-06 Thread Mark Rutland
Hi,

On Mon, Mar 07, 2016 at 01:09:59PM +0900, Chanho Min wrote:
> Add initial dtsi file to support lg1312 SoC which based on
> Cortex-A53. Also add dts file to support lg1312 reference board
> which based on lg1312 SoC.
> 
> Signed-off-by: Chanho Min 

I have a few comments on this patch below.

> +/ {
> + #address-cells = <2>;
> + #size-cells = <1>;

Is there definitely no reason to require a 64-bit size? e.g. ranges?

Generally I'd expect both address and size to be described as 64 bit quantities
in the root node, to make it less painful to extend in future.

> +
> + model = "LG Electronics, DTV SoC LG1312 Reference Board";
> + compatible = "lge,lg1312-ref", "lge,lg1312";
> +
> + memory {
> + device_type = "memory";
> + reg = <0x0 0x 0x2000>;
> + };
> +
> + chosen {
> + bootargs = "root=/dev/nfs ip=dhcp";
> + };
> +};

Drop these bootargs. This is specific to a particular developer's
configuration, and they make no sense alone given the lack of an nfsroot, so
they're evidently being overwritten anyway.

> + psci {
> + compatible  = "arm,psci";
> + method = "smc";
> + cpu_suspend = <0x8401>;
> + cpu_off = <0x8402>;
> + cpu_on = <0x8403>;
> + };

What are you using as your PSCI implementation?

Is it not PSCI 0.2+ compliant?

Which exception level are you booting at?

> + gic: interrupt-controller@c0001000 {
> + #interrupt-cells = <3>;
> +
> + compatible = "arm,gic-400";
> + interrupt-controller;
> + reg = <0x0 0xc0001000 0x1000>,
> +   <0x0 0xc0002000 0x1000>;
> + };

I believe the CPU interface is too short (as GICC_DIR lives at 0x1000).

What about GICH and GICV?

> + pmu {
> + compatible = "arm,armv8-pmuv3";

Use "arm,cortex-a53-pmu".

> + timer {
> + compatible = "arm,armv8-timer";
> + interrupts = ,
> +  ;
> + clock-frequency = <2400>;
> + };

Please fix your firmware to program CNTFRQ. 

The clock-frequency property is at best a workaround for a broken system, and
is not sufficient in general.

> + clocks {
> + clk_bus: clk_bus {
> + #clock-cells = <0>;
> +
> + compatible = "fixed-clock";
> + clock-frequency = <19800>;
> + clock-output-names = "BUSCLK";
> + };
> + };

Just put this fixed-clock under the root node. There is nothing special about
/clocks; it is not required to be probed and serves no purpose.

Thanks,
Mark.


Re: [PATCH resend] PCI: QEMU top-level IDs for (sub)vendor & device

2016-03-06 Thread Daniel Vetter
On Mon, Mar 07, 2016 at 12:09:51AM +0200, Michael S. Tsirkin wrote:
> On Sun, Mar 06, 2016 at 10:02:30PM +, Robin H. Johnson wrote:
> > Introduce PCI_VENDOR/PCI_SUBVENDOR/PCI_SUBDEVICE defines to replace the
> > constants scattered in the kernel already used to detect QEMU.
> > 
> > They are defined in the QEMU codebase per docs/specs/pci-ids.txt.
> > 
> > Signed-off-by: Robin H. Johnson 
> > Reviewed-by: Takashi Iwai 
> > Reviewed-by: Gerd Hoffmann 
> > ---
> 
> Acked-by: Michael S. Tsirkin 
> 
> PCI tree seems to make the most sense.
> Bjorn, can you merge this please?

Yeah, ack for merging through pci.

Acked-by: Daniel Vetter 
-Daniel

> 
> > This change prompted by a near-miss in the review of recent change:
> > 'drm/i915: refine qemu south bridge detection'
> > 
> > This patch was previously sent to LKML 25 Jan 2016; and got some
> > reviews, but otherwise slipped through the cracks.
> > 
> > ---
> >  drivers/gpu/drm/bochs/bochs_drv.c   | 4 ++--
> >  drivers/gpu/drm/cirrus/cirrus_drv.c | 5 +++--
> >  drivers/virtio/virtio_pci_common.c  | 2 +-
> >  include/linux/pci_ids.h | 4 
> >  sound/pci/intel8x0.c| 4 ++--
> >  5 files changed, 12 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/bochs/bochs_drv.c 
> > b/drivers/gpu/drm/bochs/bochs_drv.c
> > index 7f1a360..b332b4d3 100644
> > --- a/drivers/gpu/drm/bochs/bochs_drv.c
> > +++ b/drivers/gpu/drm/bochs/bochs_drv.c
> > @@ -182,8 +182,8 @@ static const struct pci_device_id bochs_pci_tbl[] = {
> > {
> > .vendor  = 0x1234,
> > .device  = 0x,
> > -   .subvendor   = 0x1af4,
> > -   .subdevice   = 0x1100,
> > +   .subvendor   = PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
> > +   .subdevice   = PCI_SUBDEVICE_ID_QEMU,
> > .driver_data = BOCHS_QEMU_STDVGA,
> > },
> > {
> > diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c 
> > b/drivers/gpu/drm/cirrus/cirrus_drv.c
> > index b1619e2..7bc394e 100644
> > --- a/drivers/gpu/drm/cirrus/cirrus_drv.c
> > +++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
> > @@ -33,8 +33,9 @@ static struct drm_driver driver;
> >  
> >  /* only bind to the cirrus chip in qemu */
> >  static const struct pci_device_id pciidlist[] = {
> > -   { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
> > - 0, 0 },
> > +   { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446,
> > + PCI_SUBVENDOR_ID_REDHAT_QUMRANET, PCI_SUBDEVICE_ID_QEMU,
> > + 0, 0, 0 },
> > { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
> >   0x0001, 0, 0, 0 },
> > {0,}
> > diff --git a/drivers/virtio/virtio_pci_common.c 
> > b/drivers/virtio/virtio_pci_common.c
> > index 36205c2..127dfe4 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -467,7 +467,7 @@ static const struct dev_pm_ops virtio_pci_pm_ops = {
> >  
> >  /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
> >  static const struct pci_device_id virtio_pci_id_table[] = {
> > -   { PCI_DEVICE(0x1af4, PCI_ANY_ID) },
> > +   { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
> > { 0 }
> >  };
> >  
> > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> > index 37f05cb..6d249d3 100644
> > --- a/include/linux/pci_ids.h
> > +++ b/include/linux/pci_ids.h
> > @@ -2506,6 +2506,10 @@
> >  
> >  #define PCI_VENDOR_ID_AZWAVE   0x1a3b
> >  
> > +#define PCI_VENDOR_ID_REDHAT_QUMRANET0x1af4
> > +#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4
> > +#define PCI_SUBDEVICE_ID_QEMU0x1100
> > +
> >  #define PCI_VENDOR_ID_ASMEDIA  0x1b21
> >  
> >  #define PCI_VENDOR_ID_CIRCUITCO0x1cc8
> > diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
> > index 42bcbac..12c2c18 100644
> > --- a/sound/pci/intel8x0.c
> > +++ b/sound/pci/intel8x0.c
> > @@ -2980,8 +2980,8 @@ static int snd_intel8x0_inside_vm(struct pci_dev *pci)
> > goto fini;
> >  
> > /* check for known (emulated) devices */
> > -   if (pci->subsystem_vendor == 0x1af4 &&
> > -   pci->subsystem_device == 0x1100) {
> > +   if (pci->subsystem_vendor == PCI_SUBVENDOR_ID_REDHAT_QUMRANET &&
> > +   pci->subsystem_device == PCI_SUBDEVICE_ID_QEMU) {
> > /* KVM emulated sound, PCI SSID: 1af4:1100 */
> > msg = "enable KVM";
> > } else if (pci->subsystem_vendor == 0x1ab8) {
> > -- 
> > 2.3.0
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 3/4] arm64: dts: Add dts files for LG Electronics's lg1312 SoC

2016-03-06 Thread Mark Rutland
Hi,

On Mon, Mar 07, 2016 at 01:09:59PM +0900, Chanho Min wrote:
> Add initial dtsi file to support lg1312 SoC which based on
> Cortex-A53. Also add dts file to support lg1312 reference board
> which based on lg1312 SoC.
> 
> Signed-off-by: Chanho Min 

I have a few comments on this patch below.

> +/ {
> + #address-cells = <2>;
> + #size-cells = <1>;

Is there definitely no reason to require a 64-bit size? e.g. ranges?

Generally I'd expect both address and size to be described as 64 bit quantities
in the root node, to make it less painful to extend in future.

> +
> + model = "LG Electronics, DTV SoC LG1312 Reference Board";
> + compatible = "lge,lg1312-ref", "lge,lg1312";
> +
> + memory {
> + device_type = "memory";
> + reg = <0x0 0x 0x2000>;
> + };
> +
> + chosen {
> + bootargs = "root=/dev/nfs ip=dhcp";
> + };
> +};

Drop these bootargs. This is specific to a particular developer's
configuration, and they make no sense alone given the lack of an nfsroot, so
they're evidently being overwritten anyway.

> + psci {
> + compatible  = "arm,psci";
> + method = "smc";
> + cpu_suspend = <0x8401>;
> + cpu_off = <0x8402>;
> + cpu_on = <0x8403>;
> + };

What are you using as your PSCI implementation?

Is it not PSCI 0.2+ compliant?

Which exception level are you booting at?

> + gic: interrupt-controller@c0001000 {
> + #interrupt-cells = <3>;
> +
> + compatible = "arm,gic-400";
> + interrupt-controller;
> + reg = <0x0 0xc0001000 0x1000>,
> +   <0x0 0xc0002000 0x1000>;
> + };

I believe the CPU interface is too short (as GICC_DIR lives at 0x1000).

What about GICH and GICV?

> + pmu {
> + compatible = "arm,armv8-pmuv3";

Use "arm,cortex-a53-pmu".

> + timer {
> + compatible = "arm,armv8-timer";
> + interrupts = ,
> +  ;
> + clock-frequency = <2400>;
> + };

Please fix your firmware to program CNTFRQ. 

The clock-frequency property is at best a workaround for a broken system, and
is not sufficient in general.

> + clocks {
> + clk_bus: clk_bus {
> + #clock-cells = <0>;
> +
> + compatible = "fixed-clock";
> + clock-frequency = <19800>;
> + clock-output-names = "BUSCLK";
> + };
> + };

Just put this fixed-clock under the root node. There is nothing special about
/clocks; it is not required to be probed and serves no purpose.

Thanks,
Mark.


Re: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread Sudip Mukherjee
On Mon, Feb 29, 2016 at 11:15:51AM +0900, Daeseok Youn wrote:
> From 70f8703b3bd73fa56f4ea91e98967b8925550aa6 Mon Sep 17 00:00:00 2001
> From: Daeseok Youn 
> Date: Thu, 25 Feb 2016 14:53:37 +0900
> Subject: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

These lines above are not required in the commit log. Please send v2 of
both your patches without these lines.

regards
sudip


Re: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

2016-03-06 Thread Sudip Mukherjee
On Mon, Feb 29, 2016 at 11:15:51AM +0900, Daeseok Youn wrote:
> From 70f8703b3bd73fa56f4ea91e98967b8925550aa6 Mon Sep 17 00:00:00 2001
> From: Daeseok Youn 
> Date: Thu, 25 Feb 2016 14:53:37 +0900
> Subject: [PATCH 1/2] staging: dgnc: use pointer type of tty_struct

These lines above are not required in the commit log. Please send v2 of
both your patches without these lines.

regards
sudip


[RESENT PATCH] mmc: block: fix ABI regression of mmc_blk_ioctl

2016-03-06 Thread Shawn Lin
We should return -EINVAL if cmd is not MMC_IOC_CMD or MMC_IOC_MULTI_CMD,
otherwise blkdev_roset will return -EPERM.

Android-adb calls make_block_device_writable with ioctl(BLKROSET), which
will return error, make remount failed:
remount of /system failed;
couldn't make block device writable: Operation not permitted

openat(AT_FDCWD, "/dev/block/platform/ff42.dwmmc/by-name/system", O_RDONLY) 
= 3
ioctl(3, BLKROSET, 0)  = -1 EPERM (Operation not permitted)

Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands")
Cc: sta...@vger.kernel.org
Signed-off-by: Shawn Lin 
---

 drivers/mmc/card/block.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 47bc87d..170f099 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -688,6 +688,9 @@ cmd_err:
 static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
 {
+   if (cmd != MMC_IOC_CMD && cmd != MMC_IOC_MULTI_CMD)
+   return -EINVAL;
+
/*
 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
 * whole block device, not on a partition.  This prevents overspray
-- 
2.3.7




[RESENT PATCH] mmc: block: fix ABI regression of mmc_blk_ioctl

2016-03-06 Thread Shawn Lin
We should return -EINVAL if cmd is not MMC_IOC_CMD or MMC_IOC_MULTI_CMD,
otherwise blkdev_roset will return -EPERM.

Android-adb calls make_block_device_writable with ioctl(BLKROSET), which
will return error, make remount failed:
remount of /system failed;
couldn't make block device writable: Operation not permitted

openat(AT_FDCWD, "/dev/block/platform/ff42.dwmmc/by-name/system", O_RDONLY) 
= 3
ioctl(3, BLKROSET, 0)  = -1 EPERM (Operation not permitted)

Fixes: a5f5774c55a2 ("mmc: block: Add new ioctl to send multi commands")
Cc: sta...@vger.kernel.org
Signed-off-by: Shawn Lin 
---

 drivers/mmc/card/block.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 47bc87d..170f099 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -688,6 +688,9 @@ cmd_err:
 static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
 {
+   if (cmd != MMC_IOC_CMD && cmd != MMC_IOC_MULTI_CMD)
+   return -EINVAL;
+
/*
 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
 * whole block device, not on a partition.  This prevents overspray
-- 
2.3.7




Re: [PATCH v2] locktorture: Fix NULL pointer when torture_type is invalid

2016-03-06 Thread Kefeng Wang


On 2016/3/7 13:40, Davidlohr Bueso wrote:
> On Mon, 07 Mar 2016, Kefeng Wang wrote:
>> On 2016/3/3 16:36, Davidlohr Bueso wrote:
> 
>>> +/*
>>> + * Indicates early cleanup, meaning that the test has not run,
>>> + * such as when passing bogus args when loading the module. As
>>> + * such, only perform the underlying torture-specific cleanups,
>>> + * and avoid anything related to locktorture.
>>> + */
>>> +if (!cxt.lwsa)
>>> +goto end;
>>
>> Sorry for the late response, the cxt.lrsa should be taken into account too.
> 
> I am taking it into account, note that we kfree lwsa if lrsa fails memory
> allocation. Of course we should be defensive, so go ahead and explicitly set
> it to nil. v2 below, otherwise same patch.

This one looks good, and tested on my board.


> 
> -8<--
> Subject: [PATCH v2] locktorture: Fix nil pointer dereferencing for cleanup 
> paths
> 
> It has been found that paths that invoke cleanups through
> lock_torture_cleanup() can incur in nil pointer dereferencing
> bugs during the statistics printing phase. This is mainly
> because we should not be calling into statistics before we are
> sure things have been setup correctly.
> 
> Specifically, early checks (and the need for handling this in
> the cleanup call) only include parameter checks and basic
> statistics allocation. Once we start write/read kthreads
> we then consider the test as started. As such, update the func
> in question to check for cxt.lwsa writer stats, if not set,
> we either have a bogus parameter or ENOMEM situation and
> therefore only need to deal with general torture calls
> 
> Reported-by: Kefeng Wang 
> Signed-off-by: Davidlohr Bueso 
> ---
>  kernel/locking/locktorture.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
> index 8ef1919..b5bc243 100644
> --- a/kernel/locking/locktorture.c
> +++ b/kernel/locking/locktorture.c
> @@ -748,6 +748,15 @@ static void lock_torture_cleanup(void)
>  if (torture_cleanup_begin())
>  return;
>  
> +/*
> + * Indicates early cleanup, meaning that the test has not run,
> + * such as when passing bogus args when loading the module. As
> + * such, only perform the underlying torture-specific cleanups,
> + * and avoid anything related to locktorture.
> + */
> +if (!cxt.lwsa)
> +goto end;
> +
>  if (writer_tasks) {
>  for (i = 0; i < cxt.nrealwriters_stress; i++)
>  torture_stop_kthread(lock_torture_writer,
> @@ -776,6 +785,7 @@ static void lock_torture_cleanup(void)
>  else
>  lock_torture_print_module_parms(cxt.cur_ops,
>  "End of test: SUCCESS");
> +end:
>  torture_cleanup_end();
>  }
>  
> @@ -870,6 +880,7 @@ static int __init lock_torture_init(void)
>  VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
>  firsterr = -ENOMEM;
>  kfree(cxt.lwsa);
> +cxt.lwsa = NULL;
>  goto unwind;
>  }
>  
> @@ -878,6 +889,7 @@ static int __init lock_torture_init(void)
>  cxt.lrsa[i].n_lock_acquired = 0;
>  }
>  }
> +
>  lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
>  
>  /* Prepare torture context. */



Re: [PATCH v2] locktorture: Fix NULL pointer when torture_type is invalid

2016-03-06 Thread Kefeng Wang


On 2016/3/7 13:40, Davidlohr Bueso wrote:
> On Mon, 07 Mar 2016, Kefeng Wang wrote:
>> On 2016/3/3 16:36, Davidlohr Bueso wrote:
> 
>>> +/*
>>> + * Indicates early cleanup, meaning that the test has not run,
>>> + * such as when passing bogus args when loading the module. As
>>> + * such, only perform the underlying torture-specific cleanups,
>>> + * and avoid anything related to locktorture.
>>> + */
>>> +if (!cxt.lwsa)
>>> +goto end;
>>
>> Sorry for the late response, the cxt.lrsa should be taken into account too.
> 
> I am taking it into account, note that we kfree lwsa if lrsa fails memory
> allocation. Of course we should be defensive, so go ahead and explicitly set
> it to nil. v2 below, otherwise same patch.

This one looks good, and tested on my board.


> 
> -8<--
> Subject: [PATCH v2] locktorture: Fix nil pointer dereferencing for cleanup 
> paths
> 
> It has been found that paths that invoke cleanups through
> lock_torture_cleanup() can incur in nil pointer dereferencing
> bugs during the statistics printing phase. This is mainly
> because we should not be calling into statistics before we are
> sure things have been setup correctly.
> 
> Specifically, early checks (and the need for handling this in
> the cleanup call) only include parameter checks and basic
> statistics allocation. Once we start write/read kthreads
> we then consider the test as started. As such, update the func
> in question to check for cxt.lwsa writer stats, if not set,
> we either have a bogus parameter or ENOMEM situation and
> therefore only need to deal with general torture calls
> 
> Reported-by: Kefeng Wang 
> Signed-off-by: Davidlohr Bueso 
> ---
>  kernel/locking/locktorture.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
> index 8ef1919..b5bc243 100644
> --- a/kernel/locking/locktorture.c
> +++ b/kernel/locking/locktorture.c
> @@ -748,6 +748,15 @@ static void lock_torture_cleanup(void)
>  if (torture_cleanup_begin())
>  return;
>  
> +/*
> + * Indicates early cleanup, meaning that the test has not run,
> + * such as when passing bogus args when loading the module. As
> + * such, only perform the underlying torture-specific cleanups,
> + * and avoid anything related to locktorture.
> + */
> +if (!cxt.lwsa)
> +goto end;
> +
>  if (writer_tasks) {
>  for (i = 0; i < cxt.nrealwriters_stress; i++)
>  torture_stop_kthread(lock_torture_writer,
> @@ -776,6 +785,7 @@ static void lock_torture_cleanup(void)
>  else
>  lock_torture_print_module_parms(cxt.cur_ops,
>  "End of test: SUCCESS");
> +end:
>  torture_cleanup_end();
>  }
>  
> @@ -870,6 +880,7 @@ static int __init lock_torture_init(void)
>  VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
>  firsterr = -ENOMEM;
>  kfree(cxt.lwsa);
> +cxt.lwsa = NULL;
>  goto unwind;
>  }
>  
> @@ -878,6 +889,7 @@ static int __init lock_torture_init(void)
>  cxt.lrsa[i].n_lock_acquired = 0;
>  }
>  }
> +
>  lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
>  
>  /* Prepare torture context. */



Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2016-03-06 Thread Holger Schurig
I know that in Germany a good amount of land-line telephone line are
still using ISDN. Some telco company try to move people to IP only, but
this is currently still a process.

Especially company line are using ISDN still, and there are some Linux
programs that act on then, e.g. Asterisk and derived PBX software has
ISDN support which is actively used.


Re: [PATCH 2/2] isdn: i4l: move active-isdn drivers to staging

2016-03-06 Thread Holger Schurig
I know that in Germany a good amount of land-line telephone line are
still using ISDN. Some telco company try to move people to IP only, but
this is currently still a process.

Especially company line are using ISDN still, and there are some Linux
programs that act on then, e.g. Asterisk and derived PBX software has
ISDN support which is actively used.


RE: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > No. And it's exactly what I mean. The ballooned memory is still
> > processed during live migration without skipping. The live migration code is
> in migration/ram.c.
> 
> So if guest acknowledged VIRTIO_BALLOON_F_MUST_TELL_HOST, we can
> teach qemu to skip these pages.
> Want to write a patch to do this?
> 

Yes, we really can teach qemu to skip these pages and it's not hard.  
The problem is the poor performance, this PV solution is aimed to make it more
efficient and reduce the performance impact on guest.

> > >
> > > > > > The only advantage of ' inflating the balloon before live
> > > > > > migration' is simple,
> > > > > nothing more.
> > > > >
> > > > > That's a big advantage.  Another one is that it does something
> > > > > useful in real- world scenarios.
> > > > >
> > > >
> > > > I don't think the heave performance impaction is something useful
> > > > in real
> > > world scenarios.
> > > >
> > > > Liang
> > > > > Roman.
> > >
> > > So fix the performance then. You will have to try harder if you want
> > > to convince people that the performance is due to bad host/guest
> > > interface, and so we have to change *that*.
> > >
> >
> > Actually, the PV solution is irrelevant with the balloon mechanism, I
> > just use it to transfer information between host and guest.
> > I am not sure if I should implement a new virtio device, and I want to
> > get the answer from the community.
> > In this RFC patch, to make things simple, I choose to extend the
> > virtio-balloon and use the extended interface to transfer the request and
> free_page_bimap content.
> >
> > I am not intend to change the current virtio-balloon implementation.
> >
> > Liang
> 
> And the answer would depend on the answer to my question above.
> Does balloon need an interface passing page bitmaps around?

Yes, I need a new interface.

> Does this speed up any operations?

No, a new interface will not speed up anything, but it is the easiest way to 
solve the compatibility issue.

> OTOH what if you use the regular balloon interface with your patches?
>

The regular balloon interfaces have their specific function and I can't use 
them in my patches.
If using these regular interface, I have to do a lot of changes to keep the 
compatibility. 

> 
> > > --
> > > MST


Re: Applied "regulator: max8973: add support for junction thermal warning" to the regulator tree

2016-03-06 Thread Laxman Dewangan


On Sunday 06 March 2016 05:05 PM, Mark Brown wrote:

* PGP Signed by an unknown key

On Sun, Mar 06, 2016 at 01:17:37PM +0530, Laxman Dewangan wrote:


Here driver is built in binary and THERMAL is the loadable module.
Do we really have THERMAL as module i.e. basic framework?

If randconfig can generate it it's valid.


-#ifdef CONFIG_THERMAL_OF
+#ifdef CONFIG_THERMAL
  static int max8973_thermal_read_temp(void *data, int *temp)
  {
 struct max8973_chip *mchip = data;

That looks like a hack that might break, I'd not expect it to help here
and probably has some other config that can generate issues.  What I
think should be happening here is something like

depends on THERMAL_OF if THERMAL_OF

or similar (ie, there's a direct dependency once the config is enabled).


Following will not help
depends on THERMAL_OF if THERMAL_OF
because THERMAL_OF is always "y" even if THERMAL is "m".

Build error can by resolved by adding below in the Kconfig
depends on THERMAL

but the issue is if THERMAL is "m" and  REGULATOR_MAX8973 is "y" as per 
the failure rand config then REGULATOR_MAX8973 automatically become "m". 
This may break some existing platform.


Also this driver does not need hard dependency in the thermal as max8973 
does not support thermal but max77621 supports it which is again optional.


Some of driver use
drivers/power/charger-manager.c:#ifdef CONFIG_THERMAL
drivers/power/power_supply_core.c:#ifdef CONFIG_THERMAL


So can we give the similar try here and test for build?





RE: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > No. And it's exactly what I mean. The ballooned memory is still
> > processed during live migration without skipping. The live migration code is
> in migration/ram.c.
> 
> So if guest acknowledged VIRTIO_BALLOON_F_MUST_TELL_HOST, we can
> teach qemu to skip these pages.
> Want to write a patch to do this?
> 

Yes, we really can teach qemu to skip these pages and it's not hard.  
The problem is the poor performance, this PV solution is aimed to make it more
efficient and reduce the performance impact on guest.

> > >
> > > > > > The only advantage of ' inflating the balloon before live
> > > > > > migration' is simple,
> > > > > nothing more.
> > > > >
> > > > > That's a big advantage.  Another one is that it does something
> > > > > useful in real- world scenarios.
> > > > >
> > > >
> > > > I don't think the heave performance impaction is something useful
> > > > in real
> > > world scenarios.
> > > >
> > > > Liang
> > > > > Roman.
> > >
> > > So fix the performance then. You will have to try harder if you want
> > > to convince people that the performance is due to bad host/guest
> > > interface, and so we have to change *that*.
> > >
> >
> > Actually, the PV solution is irrelevant with the balloon mechanism, I
> > just use it to transfer information between host and guest.
> > I am not sure if I should implement a new virtio device, and I want to
> > get the answer from the community.
> > In this RFC patch, to make things simple, I choose to extend the
> > virtio-balloon and use the extended interface to transfer the request and
> free_page_bimap content.
> >
> > I am not intend to change the current virtio-balloon implementation.
> >
> > Liang
> 
> And the answer would depend on the answer to my question above.
> Does balloon need an interface passing page bitmaps around?

Yes, I need a new interface.

> Does this speed up any operations?

No, a new interface will not speed up anything, but it is the easiest way to 
solve the compatibility issue.

> OTOH what if you use the regular balloon interface with your patches?
>

The regular balloon interfaces have their specific function and I can't use 
them in my patches.
If using these regular interface, I have to do a lot of changes to keep the 
compatibility. 

> 
> > > --
> > > MST


Re: Applied "regulator: max8973: add support for junction thermal warning" to the regulator tree

2016-03-06 Thread Laxman Dewangan


On Sunday 06 March 2016 05:05 PM, Mark Brown wrote:

* PGP Signed by an unknown key

On Sun, Mar 06, 2016 at 01:17:37PM +0530, Laxman Dewangan wrote:


Here driver is built in binary and THERMAL is the loadable module.
Do we really have THERMAL as module i.e. basic framework?

If randconfig can generate it it's valid.


-#ifdef CONFIG_THERMAL_OF
+#ifdef CONFIG_THERMAL
  static int max8973_thermal_read_temp(void *data, int *temp)
  {
 struct max8973_chip *mchip = data;

That looks like a hack that might break, I'd not expect it to help here
and probably has some other config that can generate issues.  What I
think should be happening here is something like

depends on THERMAL_OF if THERMAL_OF

or similar (ie, there's a direct dependency once the config is enabled).


Following will not help
depends on THERMAL_OF if THERMAL_OF
because THERMAL_OF is always "y" even if THERMAL is "m".

Build error can by resolved by adding below in the Kconfig
depends on THERMAL

but the issue is if THERMAL is "m" and  REGULATOR_MAX8973 is "y" as per 
the failure rand config then REGULATOR_MAX8973 automatically become "m". 
This may break some existing platform.


Also this driver does not need hard dependency in the thermal as max8973 
does not support thermal but max77621 supports it which is again optional.


Some of driver use
drivers/power/charger-manager.c:#ifdef CONFIG_THERMAL
drivers/power/power_supply_core.c:#ifdef CONFIG_THERMAL


So can we give the similar try here and test for build?





RE: [PATCH 0/4] ARM64:SoC add a new platform, LG Electronics's lg1k

2016-03-06 Thread Chanho Min
> 
> The patches look ok in principle, but the timing is unfortunate: we are at -
> rc7 now, just before the merge window, and we need to give this a little extra
> time for review, so I think we should merge this for 4.7, not 4.6.

It's ok for me.

Chanho



RE: [PATCH 0/4] ARM64:SoC add a new platform, LG Electronics's lg1k

2016-03-06 Thread Chanho Min
> 
> The patches look ok in principle, but the timing is unfortunate: we are at -
> rc7 now, just before the merge window, and we need to give this a little extra
> time for review, so I think we should merge this for 4.7, not 4.6.

It's ok for me.

Chanho



RE: [PATCH] ACPICA: Events: Execute some _REG methods in early boot

2016-03-06 Thread Zheng, Lv
Hi,

First of all, why don't you respond on the kernel bugzilla?
Posting a fix here directly without communication doesn't look like a 
constructive help but more like a destructive attack to me.

> From: linux-acpi-ow...@vger.kernel.org [mailto:linux-acpi-
> Subject: [PATCH] ACPICA: Events: Execute some _REG methods in early boot
> 
> The regression caused _REG methods to not be run in early boot for all
> space IDs, but a removed comment stated that _REG methods should be
> executed for IDs like embedded_controller. Before the regression this
> was the case:
> 
> [0.230469] Executing  Method   \_SB.PCI0.LPCB.EC._REG
> [0.230531] Initializing Region   \GNVS
> [0.230607] Initializing Region   \_SB.PCI0.LPCB.EC.ECOR
> [0.231043] Initializing Region   \_SB.PCI0.IGPU.IGDM
[Lv Zheng] 
As I said in the previous reply, this is the known issue and can be fixed by 
applying the whole series.
Especially this commit:
https://patchwork.kernel.org/patch/8241421/
That's why I asked you to test again by applying the whole series.
And that's why after having not seen your response for so long time, we 
prepared a test branch and was waiting for your response.

You need to post acpidump there to have the issue root caused so that more 
accurate fix can be generated.

> 
> After the regression the initialisation is not done and ODEBUG warnings
> are shown at boot and/or shutdown:
> 
> [6.676570] WARNING: CPU: 0 PID: 3317 at lib/debugobjects.c:263
> debug_print_object+0x85/0xa0()
> [6.676576] ODEBUG: assert_init not available (active state 0) object type:
> timer_list hint: stub_timer+0x0/0x20
> [6.676578] Modules linked in:
> [6.676582] CPU: 0 PID: 3317 Comm: ccpd Not tainted 4.5.0-rc6 #509
> [6.676584] Hardware name: Apple Inc. MacBookPro10,2/Mac-
> AFD8A9D944EA4843, BIOS MBP102.88Z.0106.B0A.1509130955 09/13/2015
> [6.676586]   880256543db0 814802b5
> 880256543df8
> [6.676590]  81f40dbd 880256543de8 810bd0ec
> 880256543e90
> [6.676594]  822532c0 81f40e63 83138c88
> 01fdf040
> [6.676598] Call Trace:
> [6.676603]  [] dump_stack+0x67/0x92
> [6.676608]  [] warn_slowpath_common+0x7c/0xb0
> [6.676611]  [] warn_slowpath_fmt+0x47/0x50
> [6.676614]  [] debug_print_object+0x85/0xa0
> [6.676616]  [] ? cascade+0x70/0x70
> [6.676620]  [] debug_object_assert_init+0xf8/0x130
> [6.676624]  [] ? trace_hardirqs_on+0xd/0x10
> [6.676626]  [] del_timer+0x1f/0x70
> [6.676631]  [] laptop_sync_completion+0x61/0x100
> [6.676633]  [] ? laptop_io_completion+0x30/0x30
> [6.676637]  [] sys_sync+0x7f/0x90
> [6.676641]  [] entry_SYSCALL_64_fastpath+0x12/0x6f
> 
> Fixes: efaed9be998b ("ACPICA: Events: Enhance acpi_ev_execute_reg_method()
> to ensure no _REG evaluations can happen during OS early boot stages")
> Link: https://lkml.org/lkml/2016/2/3/273
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112911
> Signed-off-by: Chris Bainbridge 
> ---
>  drivers/acpi/acpica/evregion.c | 22 +-
>  1 file changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
> index 47092b4d633c..d0b02ef0effa 100644
> --- a/drivers/acpi/acpica/evregion.c
> +++ b/drivers/acpi/acpica/evregion.c
> @@ -590,6 +590,7 @@ acpi_ev_execute_reg_method(union
> acpi_operand_object *region_obj, u32 function)
>   union acpi_operand_object *args[3];
>   union acpi_operand_object *region_obj2;
>   acpi_status status;
> + bool sp;
> 
>   ACPI_FUNCTION_TRACE(ev_execute_reg_method);
> 
> @@ -598,9 +599,28 @@ acpi_ev_execute_reg_method(union
> acpi_operand_object *region_obj, u32 function)
>   return_ACPI_STATUS(AE_NOT_EXIST);
>   }
> 
> + /*
> +  * For the default space_IDs, (the IDs for which there are default 
> region
> handlers
> +  * installed) Only execute the _REG methods if the global initialization
> _REG
> +  * methods have already been run (via acpi_initialize_objects). In other
> words,
> +  * we will defer the execution of the _REG methods for these
> space_IDs until
> +  * execution of acpi_initialize_objects. This is done because we need
> the handlers
> +  * for the default spaces (mem/io/pci/table) to be installed before we
> can run
> +  * any control methods (or _REG methods). There is known BIOS code
> that depends
> +  * on this.
> +  *
> +  * For all other space_IDs, we can safely execute the _REG methods
> immediately.
> +  * This means that for IDs like embedded_controller, this function
> should be called
> +  * only after acpi_enable_subsystem has been called.
> +  */
> +
> + sp = (region_obj->region.space_id ==
> ACPI_ADR_SPACE_SYSTEM_MEMORY ||
> +   region_obj->region.space_id == ACPI_ADR_SPACE_SYSTEM_IO ||
> +   

RE: [PATCH] ACPICA: Events: Execute some _REG methods in early boot

2016-03-06 Thread Zheng, Lv
Hi,

First of all, why don't you respond on the kernel bugzilla?
Posting a fix here directly without communication doesn't look like a 
constructive help but more like a destructive attack to me.

> From: linux-acpi-ow...@vger.kernel.org [mailto:linux-acpi-
> Subject: [PATCH] ACPICA: Events: Execute some _REG methods in early boot
> 
> The regression caused _REG methods to not be run in early boot for all
> space IDs, but a removed comment stated that _REG methods should be
> executed for IDs like embedded_controller. Before the regression this
> was the case:
> 
> [0.230469] Executing  Method   \_SB.PCI0.LPCB.EC._REG
> [0.230531] Initializing Region   \GNVS
> [0.230607] Initializing Region   \_SB.PCI0.LPCB.EC.ECOR
> [0.231043] Initializing Region   \_SB.PCI0.IGPU.IGDM
[Lv Zheng] 
As I said in the previous reply, this is the known issue and can be fixed by 
applying the whole series.
Especially this commit:
https://patchwork.kernel.org/patch/8241421/
That's why I asked you to test again by applying the whole series.
And that's why after having not seen your response for so long time, we 
prepared a test branch and was waiting for your response.

You need to post acpidump there to have the issue root caused so that more 
accurate fix can be generated.

> 
> After the regression the initialisation is not done and ODEBUG warnings
> are shown at boot and/or shutdown:
> 
> [6.676570] WARNING: CPU: 0 PID: 3317 at lib/debugobjects.c:263
> debug_print_object+0x85/0xa0()
> [6.676576] ODEBUG: assert_init not available (active state 0) object type:
> timer_list hint: stub_timer+0x0/0x20
> [6.676578] Modules linked in:
> [6.676582] CPU: 0 PID: 3317 Comm: ccpd Not tainted 4.5.0-rc6 #509
> [6.676584] Hardware name: Apple Inc. MacBookPro10,2/Mac-
> AFD8A9D944EA4843, BIOS MBP102.88Z.0106.B0A.1509130955 09/13/2015
> [6.676586]   880256543db0 814802b5
> 880256543df8
> [6.676590]  81f40dbd 880256543de8 810bd0ec
> 880256543e90
> [6.676594]  822532c0 81f40e63 83138c88
> 01fdf040
> [6.676598] Call Trace:
> [6.676603]  [] dump_stack+0x67/0x92
> [6.676608]  [] warn_slowpath_common+0x7c/0xb0
> [6.676611]  [] warn_slowpath_fmt+0x47/0x50
> [6.676614]  [] debug_print_object+0x85/0xa0
> [6.676616]  [] ? cascade+0x70/0x70
> [6.676620]  [] debug_object_assert_init+0xf8/0x130
> [6.676624]  [] ? trace_hardirqs_on+0xd/0x10
> [6.676626]  [] del_timer+0x1f/0x70
> [6.676631]  [] laptop_sync_completion+0x61/0x100
> [6.676633]  [] ? laptop_io_completion+0x30/0x30
> [6.676637]  [] sys_sync+0x7f/0x90
> [6.676641]  [] entry_SYSCALL_64_fastpath+0x12/0x6f
> 
> Fixes: efaed9be998b ("ACPICA: Events: Enhance acpi_ev_execute_reg_method()
> to ensure no _REG evaluations can happen during OS early boot stages")
> Link: https://lkml.org/lkml/2016/2/3/273
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112911
> Signed-off-by: Chris Bainbridge 
> ---
>  drivers/acpi/acpica/evregion.c | 22 +-
>  1 file changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
> index 47092b4d633c..d0b02ef0effa 100644
> --- a/drivers/acpi/acpica/evregion.c
> +++ b/drivers/acpi/acpica/evregion.c
> @@ -590,6 +590,7 @@ acpi_ev_execute_reg_method(union
> acpi_operand_object *region_obj, u32 function)
>   union acpi_operand_object *args[3];
>   union acpi_operand_object *region_obj2;
>   acpi_status status;
> + bool sp;
> 
>   ACPI_FUNCTION_TRACE(ev_execute_reg_method);
> 
> @@ -598,9 +599,28 @@ acpi_ev_execute_reg_method(union
> acpi_operand_object *region_obj, u32 function)
>   return_ACPI_STATUS(AE_NOT_EXIST);
>   }
> 
> + /*
> +  * For the default space_IDs, (the IDs for which there are default 
> region
> handlers
> +  * installed) Only execute the _REG methods if the global initialization
> _REG
> +  * methods have already been run (via acpi_initialize_objects). In other
> words,
> +  * we will defer the execution of the _REG methods for these
> space_IDs until
> +  * execution of acpi_initialize_objects. This is done because we need
> the handlers
> +  * for the default spaces (mem/io/pci/table) to be installed before we
> can run
> +  * any control methods (or _REG methods). There is known BIOS code
> that depends
> +  * on this.
> +  *
> +  * For all other space_IDs, we can safely execute the _REG methods
> immediately.
> +  * This means that for IDs like embedded_controller, this function
> should be called
> +  * only after acpi_enable_subsystem has been called.
> +  */
> +
> + sp = (region_obj->region.space_id ==
> ACPI_ADR_SPACE_SYSTEM_MEMORY ||
> +   region_obj->region.space_id == ACPI_ADR_SPACE_SYSTEM_IO ||
> +   region_obj->region.space_id == 

Re: [PATCH v1 02/11] mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION

2016-03-06 Thread Naoya Horiguchi
On Mon, Mar 07, 2016 at 11:58:04AM +1100, Balbir Singh wrote:
> On Thu, Mar 03, 2016 at 04:41:49PM +0900, Naoya Horiguchi wrote:
> > Introduces CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION to limit thp migration
> > functionality to x86_64, which should be safer at the first step.
> >
> 
> The changelog is not helpful. Could you please describe what is
> architecture specific in these changes? What do other arches need to do
> to port these changes over?

The arch specific parts are pmd_present() and swap entry format. Currently
pmd_present() in x86_64 is not simple enough to easily determine pmd's state
(none, normal pmd entry pointing to pte page, pmd for thp, or pmd migration 
entry ...)
That requires me to assume in this version that pmd migration entry should
have_PAGE_PSE set, which should not be necessary if the complexity is fixed.
So I will mention this pmd_present() problem in the next version.

So if it's fixed, what developers need to do to port this feature to their
architectures is just to enable CONFIG_ARCH_ENABLE_THP_MIGRATION (and test it.)

Thanks,
Naoya Horiguchi


Re: [PATCH v1 02/11] mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION

2016-03-06 Thread Naoya Horiguchi
On Mon, Mar 07, 2016 at 11:58:04AM +1100, Balbir Singh wrote:
> On Thu, Mar 03, 2016 at 04:41:49PM +0900, Naoya Horiguchi wrote:
> > Introduces CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION to limit thp migration
> > functionality to x86_64, which should be safer at the first step.
> >
> 
> The changelog is not helpful. Could you please describe what is
> architecture specific in these changes? What do other arches need to do
> to port these changes over?

The arch specific parts are pmd_present() and swap entry format. Currently
pmd_present() in x86_64 is not simple enough to easily determine pmd's state
(none, normal pmd entry pointing to pte page, pmd for thp, or pmd migration 
entry ...)
That requires me to assume in this version that pmd migration entry should
have_PAGE_PSE set, which should not be necessary if the complexity is fixed.
So I will mention this pmd_present() problem in the next version.

So if it's fixed, what developers need to do to port this feature to their
architectures is just to enable CONFIG_ARCH_ENABLE_THP_MIGRATION (and test it.)

Thanks,
Naoya Horiguchi


Re: [PATCH v4 6/8] dmaengine: rcar-dmac: add iommu support for slave transfers

2016-03-06 Thread Vinod Koul
On Tue, Feb 16, 2016 at 09:39:42PM +0100, Niklas Söderlund wrote:
> Enable slave transfers to devices behind IPMMU:s by mapping the slave

IPMMU:s ?

> addresses using the dma-mapping API.
> 
> Signed-off-by: Niklas Söderlund 
> Reviewed-by: Laurent Pinchart 
> ---
>  drivers/dma/sh/rcar-dmac.c | 52 
> +-
>  1 file changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 743873c..6a24847 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -1106,21 +1106,63 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, 
> dma_addr_t buf_addr,
>   return desc;
>  }
>  
> +static int rcar_dmac_set_slave_addr(struct dma_chan *chan,
> +  struct rcar_dmac_chan_slave *slave,
> +  phys_addr_t addr, size_t size)
> +{
> + enum dma_data_direction dir;
> +
> + /*
> +  * We can't know the direction at this time, see documentation for
> +  * 'direction' in struct dma_slave_config.
> +  */

Okay so we are mapping on the device config, which doesn't seem intutive.
Why is this not done during prepare calls?

> + dir = DMA_BIDIRECTIONAL;
> +
> + if (slave->xfer_size) {
> + dma_unmap_resource(chan->device->dev, slave->slave_addr,
> +slave->xfer_size, dir, NULL);
> + slave->slave_addr = 0;
> + slave->xfer_size = 0;
> + }
> +
> + if (size) {
> + slave->slave_addr = dma_map_resource(chan->device->dev, addr,
> + size, dir, NULL);
> +
> + if (dma_mapping_error(chan->device->dev, slave->slave_addr)) {
> + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +
> + dev_err(chan->device->dev,
> + "chan%u: failed to map %zx@%pap", rchan->index,
> + size, );
> + return -EIO;
> + }
> +
> + slave->xfer_size = size;
> + }
> +
> + return 0;
> +}
> +
>  static int rcar_dmac_device_config(struct dma_chan *chan,
>  struct dma_slave_config *cfg)
>  {
>   struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> + int ret;
>  
>   /*
>* We could lock this, but you shouldn't be configuring the
>* channel, while using it...
>*/
> - rchan->src.slave_addr = cfg->src_addr;
> - rchan->dst.slave_addr = cfg->dst_addr;
> - rchan->src.xfer_size = cfg->src_addr_width;
> - rchan->dst.xfer_size = cfg->dst_addr_width;
>  
> - return 0;
> + ret = rcar_dmac_set_slave_addr(chan, >src, cfg->src_addr,
> +cfg->src_addr_width);
> + if (ret)
> + return ret;
> +
> + ret = rcar_dmac_set_slave_addr(chan, >dst, cfg->dst_addr,
> +cfg->dst_addr_width);
> + return ret;
>  }
>  
>  static int rcar_dmac_chan_terminate_all(struct dma_chan *chan)
> -- 
> 2.7.1
> 

-- 
~Vinod


Re: [PATCH v4 6/8] dmaengine: rcar-dmac: add iommu support for slave transfers

2016-03-06 Thread Vinod Koul
On Tue, Feb 16, 2016 at 09:39:42PM +0100, Niklas Söderlund wrote:
> Enable slave transfers to devices behind IPMMU:s by mapping the slave

IPMMU:s ?

> addresses using the dma-mapping API.
> 
> Signed-off-by: Niklas Söderlund 
> Reviewed-by: Laurent Pinchart 
> ---
>  drivers/dma/sh/rcar-dmac.c | 52 
> +-
>  1 file changed, 47 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
> index 743873c..6a24847 100644
> --- a/drivers/dma/sh/rcar-dmac.c
> +++ b/drivers/dma/sh/rcar-dmac.c
> @@ -1106,21 +1106,63 @@ rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, 
> dma_addr_t buf_addr,
>   return desc;
>  }
>  
> +static int rcar_dmac_set_slave_addr(struct dma_chan *chan,
> +  struct rcar_dmac_chan_slave *slave,
> +  phys_addr_t addr, size_t size)
> +{
> + enum dma_data_direction dir;
> +
> + /*
> +  * We can't know the direction at this time, see documentation for
> +  * 'direction' in struct dma_slave_config.
> +  */

Okay so we are mapping on the device config, which doesn't seem intutive.
Why is this not done during prepare calls?

> + dir = DMA_BIDIRECTIONAL;
> +
> + if (slave->xfer_size) {
> + dma_unmap_resource(chan->device->dev, slave->slave_addr,
> +slave->xfer_size, dir, NULL);
> + slave->slave_addr = 0;
> + slave->xfer_size = 0;
> + }
> +
> + if (size) {
> + slave->slave_addr = dma_map_resource(chan->device->dev, addr,
> + size, dir, NULL);
> +
> + if (dma_mapping_error(chan->device->dev, slave->slave_addr)) {
> + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +
> + dev_err(chan->device->dev,
> + "chan%u: failed to map %zx@%pap", rchan->index,
> + size, );
> + return -EIO;
> + }
> +
> + slave->xfer_size = size;
> + }
> +
> + return 0;
> +}
> +
>  static int rcar_dmac_device_config(struct dma_chan *chan,
>  struct dma_slave_config *cfg)
>  {
>   struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> + int ret;
>  
>   /*
>* We could lock this, but you shouldn't be configuring the
>* channel, while using it...
>*/
> - rchan->src.slave_addr = cfg->src_addr;
> - rchan->dst.slave_addr = cfg->dst_addr;
> - rchan->src.xfer_size = cfg->src_addr_width;
> - rchan->dst.xfer_size = cfg->dst_addr_width;
>  
> - return 0;
> + ret = rcar_dmac_set_slave_addr(chan, >src, cfg->src_addr,
> +cfg->src_addr_width);
> + if (ret)
> + return ret;
> +
> + ret = rcar_dmac_set_slave_addr(chan, >dst, cfg->dst_addr,
> +cfg->dst_addr_width);
> + return ret;
>  }
>  
>  static int rcar_dmac_chan_terminate_all(struct dma_chan *chan)
> -- 
> 2.7.1
> 

-- 
~Vinod


Re: linux-next: manual merge of the watchdog tree with the arm-soc tree

2016-03-06 Thread Olof Johansson
Hi Wim,

It's much easier for us if all DTS changes go in through the arm-soc
trees, to avoid these kind of conflicts. Is this on a branch where you
can easily drop it and we pick it up instead, or is it on a now-stable
branch?


-Olof

On Sun, Mar 6, 2016 at 8:04 PM, Stephen Rothwell  wrote:
> Hi Wim,
>
> Today's linux-next merge of the watchdog tree got a conflict in:
>
>   arch/arm64/boot/dts/arm/foundation-v8.dts
>
> between commit:
>
>   d11a89796678 ("arm64: dts: split Foundation model dts to put the GIC 
> separately")
>
> from the arm-soc tree and commit:
>
>   fe3a97e8ed02 ("ARM64: add SBSA Generic Watchdog device node in 
> foundation-v8.dts")
>
> from the watchdog tree.
>
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).
>
> --
> Cheers,
> Stephen Rothwell
>
> diff --cc arch/arm64/boot/dts/arm/foundation-v8.dts
> index 71168077312d,66cb9aaa9135..
> --- a/arch/arm64/boot/dts/arm/foundation-v8.dts
> +++ b/arch/arm64/boot/dts/arm/foundation-v8.dts
> @@@ -18,4 -83,165 +18,11 @@@
>   <0x0 0x2c006000 0 0x2000>;
> interrupts = <1 9 0xf04>;
> };
>  -
>  -  timer {
>  -  compatible = "arm,armv8-timer";
>  -  interrupts = <1 13 0xf08>,
>  -   <1 14 0xf08>,
>  -   <1 11 0xf08>,
>  -   <1 10 0xf08>;
>  -  clock-frequency = <1>;
>  -  };
>  -
>  -  pmu {
>  -  compatible = "arm,armv8-pmuv3";
>  -  interrupts = <0 60 4>,
>  -   <0 61 4>,
>  -   <0 62 4>,
>  -   <0 63 4>;
>  -  };
>  -
>  -  smb {
>  -  compatible = "arm,vexpress,v2m-p1", "simple-bus";
>  -  arm,v2m-memory-map = "rs1";
>  -  #address-cells = <2>; /* SMB chipselect number and offset */
>  -  #size-cells = <1>;
>  -
>  -  ranges = <0 0 0 0x0800 0x0400>,
>  -   <1 0 0 0x1400 0x0400>,
>  -   <2 0 0 0x1800 0x0400>,
>  -   <3 0 0 0x1c00 0x0400>,
>  -   <4 0 0 0x0c00 0x0400>,
>  -   <5 0 0 0x1000 0x0400>;
>  -
>  -  #interrupt-cells = <1>;
>  -  interrupt-map-mask = <0 0 63>;
>  -  interrupt-map = <0 0  0  0  0 4>,
>  -  <0 0  1  0  1 4>,
>  -  <0 0  2  0  2 4>,
>  -  <0 0  3  0  3 4>,
>  -  <0 0  4  0  4 4>,
>  -  <0 0  5  0  5 4>,
>  -  <0 0  6  0  6 4>,
>  -  <0 0  7  0  7 4>,
>  -  <0 0  8  0  8 4>,
>  -  <0 0  9  0  9 4>,
>  -  <0 0 10  0 10 4>,
>  -  <0 0 11  0 11 4>,
>  -  <0 0 12  0 12 4>,
>  -  <0 0 13  0 13 4>,
>  -  <0 0 14  0 14 4>,
>  -  <0 0 15  0 15 4>,
>  -  <0 0 16  0 16 4>,
>  -  <0 0 17  0 17 4>,
>  -  <0 0 18  0 18 4>,
>  -  <0 0 19  0 19 4>,
>  -  <0 0 20  0 20 4>,
>  -  <0 0 21  0 21 4>,
>  -  <0 0 22  0 22 4>,
>  -  <0 0 23  0 23 4>,
>  -  <0 0 24  0 24 4>,
>  -  <0 0 25  0 25 4>,
>  -  <0 0 26  0 26 4>,
>  -  <0 0 27  0 27 4>,
>  -  <0 0 28  0 28 4>,
>  -  <0 0 29  0 29 4>,
>  -  <0 0 30  0 30 4>,
>  -  <0 0 31  0 31 4>,
>  -  <0 0 32  0 32 4>,
>  -  <0 0 33  0 33 4>,
>  -  <0 0 34  0 34 4>,
>  -  <0 0 35  0 35 4>,
>  -  <0 0 36  0 36 4>,
>  -  <0 0 37  0 37 4>,
>  -  <0 0 38  0 38 4>,
>  -  <0 0 39  0 39 4>,
>  -  <0 0 40  0 40 4>,
>  -  <0 0 41  0 41 4>,
>  -  <0 0 42  0 42 4>;
>  -
>  -  ethernet@2,0200 {
>  -  compatible = "smsc,lan91c111";
>  -  reg = <2 0x0200 0x1>;
>  -  interrupts = <15>;
>  -  };
>  -
>  -  v2m_clk24mhz: clk24mhz {
>  -  compatible = 

Re: linux-next: manual merge of the watchdog tree with the arm-soc tree

2016-03-06 Thread Olof Johansson
Hi Wim,

It's much easier for us if all DTS changes go in through the arm-soc
trees, to avoid these kind of conflicts. Is this on a branch where you
can easily drop it and we pick it up instead, or is it on a now-stable
branch?


-Olof

On Sun, Mar 6, 2016 at 8:04 PM, Stephen Rothwell  wrote:
> Hi Wim,
>
> Today's linux-next merge of the watchdog tree got a conflict in:
>
>   arch/arm64/boot/dts/arm/foundation-v8.dts
>
> between commit:
>
>   d11a89796678 ("arm64: dts: split Foundation model dts to put the GIC 
> separately")
>
> from the arm-soc tree and commit:
>
>   fe3a97e8ed02 ("ARM64: add SBSA Generic Watchdog device node in 
> foundation-v8.dts")
>
> from the watchdog tree.
>
> I fixed it up (see below) and can carry the fix as necessary (no action
> is required).
>
> --
> Cheers,
> Stephen Rothwell
>
> diff --cc arch/arm64/boot/dts/arm/foundation-v8.dts
> index 71168077312d,66cb9aaa9135..
> --- a/arch/arm64/boot/dts/arm/foundation-v8.dts
> +++ b/arch/arm64/boot/dts/arm/foundation-v8.dts
> @@@ -18,4 -83,165 +18,11 @@@
>   <0x0 0x2c006000 0 0x2000>;
> interrupts = <1 9 0xf04>;
> };
>  -
>  -  timer {
>  -  compatible = "arm,armv8-timer";
>  -  interrupts = <1 13 0xf08>,
>  -   <1 14 0xf08>,
>  -   <1 11 0xf08>,
>  -   <1 10 0xf08>;
>  -  clock-frequency = <1>;
>  -  };
>  -
>  -  pmu {
>  -  compatible = "arm,armv8-pmuv3";
>  -  interrupts = <0 60 4>,
>  -   <0 61 4>,
>  -   <0 62 4>,
>  -   <0 63 4>;
>  -  };
>  -
>  -  smb {
>  -  compatible = "arm,vexpress,v2m-p1", "simple-bus";
>  -  arm,v2m-memory-map = "rs1";
>  -  #address-cells = <2>; /* SMB chipselect number and offset */
>  -  #size-cells = <1>;
>  -
>  -  ranges = <0 0 0 0x0800 0x0400>,
>  -   <1 0 0 0x1400 0x0400>,
>  -   <2 0 0 0x1800 0x0400>,
>  -   <3 0 0 0x1c00 0x0400>,
>  -   <4 0 0 0x0c00 0x0400>,
>  -   <5 0 0 0x1000 0x0400>;
>  -
>  -  #interrupt-cells = <1>;
>  -  interrupt-map-mask = <0 0 63>;
>  -  interrupt-map = <0 0  0  0  0 4>,
>  -  <0 0  1  0  1 4>,
>  -  <0 0  2  0  2 4>,
>  -  <0 0  3  0  3 4>,
>  -  <0 0  4  0  4 4>,
>  -  <0 0  5  0  5 4>,
>  -  <0 0  6  0  6 4>,
>  -  <0 0  7  0  7 4>,
>  -  <0 0  8  0  8 4>,
>  -  <0 0  9  0  9 4>,
>  -  <0 0 10  0 10 4>,
>  -  <0 0 11  0 11 4>,
>  -  <0 0 12  0 12 4>,
>  -  <0 0 13  0 13 4>,
>  -  <0 0 14  0 14 4>,
>  -  <0 0 15  0 15 4>,
>  -  <0 0 16  0 16 4>,
>  -  <0 0 17  0 17 4>,
>  -  <0 0 18  0 18 4>,
>  -  <0 0 19  0 19 4>,
>  -  <0 0 20  0 20 4>,
>  -  <0 0 21  0 21 4>,
>  -  <0 0 22  0 22 4>,
>  -  <0 0 23  0 23 4>,
>  -  <0 0 24  0 24 4>,
>  -  <0 0 25  0 25 4>,
>  -  <0 0 26  0 26 4>,
>  -  <0 0 27  0 27 4>,
>  -  <0 0 28  0 28 4>,
>  -  <0 0 29  0 29 4>,
>  -  <0 0 30  0 30 4>,
>  -  <0 0 31  0 31 4>,
>  -  <0 0 32  0 32 4>,
>  -  <0 0 33  0 33 4>,
>  -  <0 0 34  0 34 4>,
>  -  <0 0 35  0 35 4>,
>  -  <0 0 36  0 36 4>,
>  -  <0 0 37  0 37 4>,
>  -  <0 0 38  0 38 4>,
>  -  <0 0 39  0 39 4>,
>  -  <0 0 40  0 40 4>,
>  -  <0 0 41  0 41 4>,
>  -  <0 0 42  0 42 4>;
>  -
>  -  ethernet@2,0200 {
>  -  compatible = "smsc,lan91c111";
>  -  reg = <2 0x0200 0x1>;
>  -  interrupts = <15>;
>  -  };
>  -
>  -  v2m_clk24mhz: clk24mhz {
>  -  compatible = "fixed-clock";
>  - 

Re: [PATCH v2] locktorture: Fix NULL pointer when torture_type is invalid

2016-03-06 Thread Davidlohr Bueso

On Mon, 07 Mar 2016, Kefeng Wang wrote:

On 2016/3/3 16:36, Davidlohr Bueso wrote:



+/*
+ * Indicates early cleanup, meaning that the test has not run,
+ * such as when passing bogus args when loading the module. As
+ * such, only perform the underlying torture-specific cleanups,
+ * and avoid anything related to locktorture.
+ */
+if (!cxt.lwsa)
+goto end;


Sorry for the late response, the cxt.lrsa should be taken into account too.


I am taking it into account, note that we kfree lwsa if lrsa fails memory
allocation. Of course we should be defensive, so go ahead and explicitly set
it to nil. v2 below, otherwise same patch.

-8<--
Subject: [PATCH v2] locktorture: Fix nil pointer dereferencing for cleanup paths

It has been found that paths that invoke cleanups through
lock_torture_cleanup() can incur in nil pointer dereferencing
bugs during the statistics printing phase. This is mainly
because we should not be calling into statistics before we are
sure things have been setup correctly.

Specifically, early checks (and the need for handling this in
the cleanup call) only include parameter checks and basic
statistics allocation. Once we start write/read kthreads
we then consider the test as started. As such, update the func
in question to check for cxt.lwsa writer stats, if not set,
we either have a bogus parameter or ENOMEM situation and
therefore only need to deal with general torture calls

Reported-by: Kefeng Wang 
Signed-off-by: Davidlohr Bueso 
---
 kernel/locking/locktorture.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919..b5bc243 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -748,6 +748,15 @@ static void lock_torture_cleanup(void)
if (torture_cleanup_begin())
return;
 
+	/*

+* Indicates early cleanup, meaning that the test has not run,
+* such as when passing bogus args when loading the module. As
+* such, only perform the underlying torture-specific cleanups,
+* and avoid anything related to locktorture.
+*/
+   if (!cxt.lwsa)
+   goto end;
+
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
torture_stop_kthread(lock_torture_writer,
@@ -776,6 +785,7 @@ static void lock_torture_cleanup(void)
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+end:
torture_cleanup_end();
 }
 
@@ -870,6 +880,7 @@ static int __init lock_torture_init(void)

VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
kfree(cxt.lwsa);
+   cxt.lwsa = NULL;
goto unwind;
}
 
@@ -878,6 +889,7 @@ static int __init lock_torture_init(void)

cxt.lrsa[i].n_lock_acquired = 0;
}
}
+
lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
 
 	/* Prepare torture context. */

--
2.1.4



Re: [PATCH v2] locktorture: Fix NULL pointer when torture_type is invalid

2016-03-06 Thread Davidlohr Bueso

On Mon, 07 Mar 2016, Kefeng Wang wrote:

On 2016/3/3 16:36, Davidlohr Bueso wrote:



+/*
+ * Indicates early cleanup, meaning that the test has not run,
+ * such as when passing bogus args when loading the module. As
+ * such, only perform the underlying torture-specific cleanups,
+ * and avoid anything related to locktorture.
+ */
+if (!cxt.lwsa)
+goto end;


Sorry for the late response, the cxt.lrsa should be taken into account too.


I am taking it into account, note that we kfree lwsa if lrsa fails memory
allocation. Of course we should be defensive, so go ahead and explicitly set
it to nil. v2 below, otherwise same patch.

-8<--
Subject: [PATCH v2] locktorture: Fix nil pointer dereferencing for cleanup paths

It has been found that paths that invoke cleanups through
lock_torture_cleanup() can incur in nil pointer dereferencing
bugs during the statistics printing phase. This is mainly
because we should not be calling into statistics before we are
sure things have been setup correctly.

Specifically, early checks (and the need for handling this in
the cleanup call) only include parameter checks and basic
statistics allocation. Once we start write/read kthreads
we then consider the test as started. As such, update the func
in question to check for cxt.lwsa writer stats, if not set,
we either have a bogus parameter or ENOMEM situation and
therefore only need to deal with general torture calls

Reported-by: Kefeng Wang 
Signed-off-by: Davidlohr Bueso 
---
 kernel/locking/locktorture.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919..b5bc243 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -748,6 +748,15 @@ static void lock_torture_cleanup(void)
if (torture_cleanup_begin())
return;
 
+	/*

+* Indicates early cleanup, meaning that the test has not run,
+* such as when passing bogus args when loading the module. As
+* such, only perform the underlying torture-specific cleanups,
+* and avoid anything related to locktorture.
+*/
+   if (!cxt.lwsa)
+   goto end;
+
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
torture_stop_kthread(lock_torture_writer,
@@ -776,6 +785,7 @@ static void lock_torture_cleanup(void)
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+end:
torture_cleanup_end();
 }
 
@@ -870,6 +880,7 @@ static int __init lock_torture_init(void)

VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
kfree(cxt.lwsa);
+   cxt.lwsa = NULL;
goto unwind;
}
 
@@ -878,6 +889,7 @@ static int __init lock_torture_init(void)

cxt.lrsa[i].n_lock_acquired = 0;
}
}
+
lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
 
 	/* Prepare torture context. */

--
2.1.4



RE: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > On 04/03/2016 15:26, Li, Liang Z wrote:
> > >> >
> > >> > The memory usage will keep increasing due to ever growing caches,
> > >> > etc, so you'll be left with very little free memory fairly soon.
> > >> >
> > > I don't think so.
> > >
> >
> > Roman is right.  For example, here I am looking at a 64 GB (physical)
> > machine which was booted about 30 minutes ago, and which is running
> > disk-heavy workloads (installing VMs).
> >
> > Since I have started writing this email (2 minutes?), the amount of
> > free memory has already gone down from 37 GB to 33 GB.  I expect that
> > by the time I have finished running the workload, in two hours, it
> > will not have any free memory.
> 
> But what about a VM sitting idle, or that just has more RAM assigned to it
> than is currently using.
>  I've got a host here that's been up for 46 days and has been doing some
> heavy VM debugging a few days ago, but today:
> 
> # free -m
>   totalusedfree  shared  buff/cache   
> available
> Mem:  965361146   44834 184   50555   
> 94735
> 
> I very rarely use all it's RAM, so it's got a big chunk of free RAM, and yes 
> it's
> got a big chunk of cache as well.
> 
> Dave
> 
> >
> > Paolo

I begin to realize Roman's opinions. The PV solution can't handle the cache 
memory while inflating balloon could.
Inflating balloon so as to skipping the cache memory is no good for guest's 
performance.

How much of the free memory in the guest depends on the workload in the VM  and 
the time VM has already run
before live migration. Even the memory usage will keep increasing due to ever 
growing caches, but we don't know
when the live migration will happen, assuming there are no or very little free 
pages in the guest is not quite right.

The advantage of the pv solution is the smaller performance impact, comparing 
with inflating the balloon.

Liang





RE: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > On 04/03/2016 15:26, Li, Liang Z wrote:
> > >> >
> > >> > The memory usage will keep increasing due to ever growing caches,
> > >> > etc, so you'll be left with very little free memory fairly soon.
> > >> >
> > > I don't think so.
> > >
> >
> > Roman is right.  For example, here I am looking at a 64 GB (physical)
> > machine which was booted about 30 minutes ago, and which is running
> > disk-heavy workloads (installing VMs).
> >
> > Since I have started writing this email (2 minutes?), the amount of
> > free memory has already gone down from 37 GB to 33 GB.  I expect that
> > by the time I have finished running the workload, in two hours, it
> > will not have any free memory.
> 
> But what about a VM sitting idle, or that just has more RAM assigned to it
> than is currently using.
>  I've got a host here that's been up for 46 days and has been doing some
> heavy VM debugging a few days ago, but today:
> 
> # free -m
>   totalusedfree  shared  buff/cache   
> available
> Mem:  965361146   44834 184   50555   
> 94735
> 
> I very rarely use all it's RAM, so it's got a big chunk of free RAM, and yes 
> it's
> got a big chunk of cache as well.
> 
> Dave
> 
> >
> > Paolo

I begin to realize Roman's opinions. The PV solution can't handle the cache 
memory while inflating balloon could.
Inflating balloon so as to skipping the cache memory is no good for guest's 
performance.

How much of the free memory in the guest depends on the workload in the VM  and 
the time VM has already run
before live migration. Even the memory usage will keep increasing due to ever 
growing caches, but we don't know
when the live migration will happen, assuming there are no or very little free 
pages in the guest is not quite right.

The advantage of the pv solution is the smaller performance impact, comparing 
with inflating the balloon.

Liang





Re: [PATCH 0/3] OOM detection rework v4

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 04:15:58PM +0100, Michal Hocko wrote:
> On Fri 04-03-16 14:23:27, Joonsoo Kim wrote:
> > On Thu, Mar 03, 2016 at 04:25:15PM +0100, Michal Hocko wrote:
> > > On Thu 03-03-16 23:10:09, Joonsoo Kim wrote:
> > > > 2016-03-03 18:26 GMT+09:00 Michal Hocko :
> [...]
> > > > >> I guess that usual case for high order allocation failure has enough 
> > > > >> freepage.
> > > > >
> > > > > Not sure I understand you mean here but I wouldn't be surprised if 
> > > > > high
> > > > > order failed even with enough free pages. And that is exactly why I am
> > > > > claiming that reclaiming more pages is no free ticket to high order
> > > > > pages.
> > > > 
> > > > I didn't say that it's free ticket. OOM kill would be the most 
> > > > expensive ticket
> > > > that we have. Why do you want to kill something?
> > > 
> > > Because all the attempts so far have failed and we should rather not
> > > retry endlessly. With the band-aid we know we will retry
> > > MAX_RECLAIM_RETRIES at most. So compaction had that many attempts to
> > > resolve the situation along with the same amount of reclaim rounds to
> > > help and get over watermarks.
> > > 
> > > > It also doesn't guarantee to make high order pages. It is just another
> > > > way of reclaiming memory. What is the difference between plain reclaim
> > > > and OOM kill? Why do we use OOM kill in this case?
> > > 
> > > What is our alternative other than keep looping endlessly?
> > 
> > Loop as long as free memory or estimated available memory (free +
> > reclaimable) increases. This means that we did some progress. And,
> > they will not grow forever because we have just limited reclaimable
> > memory and limited memory. You can reset no_progress_loops = 0 when
> > those metric increases than before.
> 
> Hmm, why is this any better than taking the feedback from the reclaim
> (did_some_progress)?

My suggestion could be only applied to high order case. In this case,
free page and reclaimable page is already sufficient and parallel
free page consumer would re-generate reclaimable page endlessly so
positive did_some_progress will be returned endlessy. We need to stop
retry at some point so we need some metric that ensures finite retry
in any case.

>  
> > With this bound, we can do our best to try to solve this unpleasant
> > situation before OOM.
> > 
> > Unconditional 16 looping and then OOM kill really doesn't make any
> > sense, because it doesn't mean that we already do our best.
> 
> 16 is not really that important. We can change that if that doesn't
> sounds sufficient. But please note that each reclaim round means
> that we have scanned all eligible LRUs to find and reclaim something
> and asked direct compaction to prepare a high order page.
> This sounds like "do our best" to me.

AFAIK, each reclaim round doesn't reclaim all reclaimable page. It has
a limit to reclaim. It looks not our best to me and N retry only
multipies that limit N times. It also doesn't look like our best to
me and will lead to premature OOM kill.

> Now it seems that we need more changes at least in the compaction area
> because the code doesn't seem to fit the nature of !costly allocation
> requests. I am also not satisfied with the fixed MAX_RECLAIM_RETRIES for
> high order pages, I would much rather see some feedback mechanism which
> would measurable and evaluated in some way but is this really necessary
> for the initial version?

I don't know. My analysis is just based on my guess and background knowledge,
not practical usecase, so I'm not sure it is necessary for the initial
version or not. It's up to you.

Thanks.


Re: [PATCH 0/3] OOM detection rework v4

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 04:15:58PM +0100, Michal Hocko wrote:
> On Fri 04-03-16 14:23:27, Joonsoo Kim wrote:
> > On Thu, Mar 03, 2016 at 04:25:15PM +0100, Michal Hocko wrote:
> > > On Thu 03-03-16 23:10:09, Joonsoo Kim wrote:
> > > > 2016-03-03 18:26 GMT+09:00 Michal Hocko :
> [...]
> > > > >> I guess that usual case for high order allocation failure has enough 
> > > > >> freepage.
> > > > >
> > > > > Not sure I understand you mean here but I wouldn't be surprised if 
> > > > > high
> > > > > order failed even with enough free pages. And that is exactly why I am
> > > > > claiming that reclaiming more pages is no free ticket to high order
> > > > > pages.
> > > > 
> > > > I didn't say that it's free ticket. OOM kill would be the most 
> > > > expensive ticket
> > > > that we have. Why do you want to kill something?
> > > 
> > > Because all the attempts so far have failed and we should rather not
> > > retry endlessly. With the band-aid we know we will retry
> > > MAX_RECLAIM_RETRIES at most. So compaction had that many attempts to
> > > resolve the situation along with the same amount of reclaim rounds to
> > > help and get over watermarks.
> > > 
> > > > It also doesn't guarantee to make high order pages. It is just another
> > > > way of reclaiming memory. What is the difference between plain reclaim
> > > > and OOM kill? Why do we use OOM kill in this case?
> > > 
> > > What is our alternative other than keep looping endlessly?
> > 
> > Loop as long as free memory or estimated available memory (free +
> > reclaimable) increases. This means that we did some progress. And,
> > they will not grow forever because we have just limited reclaimable
> > memory and limited memory. You can reset no_progress_loops = 0 when
> > those metric increases than before.
> 
> Hmm, why is this any better than taking the feedback from the reclaim
> (did_some_progress)?

My suggestion could be only applied to high order case. In this case,
free page and reclaimable page is already sufficient and parallel
free page consumer would re-generate reclaimable page endlessly so
positive did_some_progress will be returned endlessy. We need to stop
retry at some point so we need some metric that ensures finite retry
in any case.

>  
> > With this bound, we can do our best to try to solve this unpleasant
> > situation before OOM.
> > 
> > Unconditional 16 looping and then OOM kill really doesn't make any
> > sense, because it doesn't mean that we already do our best.
> 
> 16 is not really that important. We can change that if that doesn't
> sounds sufficient. But please note that each reclaim round means
> that we have scanned all eligible LRUs to find and reclaim something
> and asked direct compaction to prepare a high order page.
> This sounds like "do our best" to me.

AFAIK, each reclaim round doesn't reclaim all reclaimable page. It has
a limit to reclaim. It looks not our best to me and N retry only
multipies that limit N times. It also doesn't look like our best to
me and will lead to premature OOM kill.

> Now it seems that we need more changes at least in the compaction area
> because the code doesn't seem to fit the nature of !costly allocation
> requests. I am also not satisfied with the fixed MAX_RECLAIM_RETRIES for
> high order pages, I would much rather see some feedback mechanism which
> would measurable and evaluated in some way but is this really necessary
> for the initial version?

I don't know. My analysis is just based on my guess and background knowledge,
not practical usecase, so I'm not sure it is necessary for the initial
version or not. It's up to you.

Thanks.


Re: user namespace and fully visible proc and sys mounts

2016-03-06 Thread Serge E. Hallyn
On Sun, Mar 06, 2016 at 07:49:14PM -0800, Andy Lutomirski wrote:
> On Sun, Mar 6, 2016 at 7:45 PM, Serge E. Hallyn  wrote:
> > On Sun, Mar 06, 2016 at 06:24:23PM -0800, Andy Lutomirski wrote:
> >> On Mar 6, 2016 2:03 PM, "Eric W. Biederman"  wrote:
> >> >
> >> > "Serge E. Hallyn"  writes:
> >> >
> >> > > Hi,
> >> > >
> >> > > So we've been over this many times...  but unfortunately there is more
> >> > > breakage to report.  Regular privileged and unprivileged containers
> >> > > work all right for us.  But running an unprivileged container inside a
> >> > > privileged container is blocked.
> >> > >
> >> > > When creating privileged containers, lxc by default does a few things:
> >> > > it mounts some fuse.lxcfs files over procfiles include /proc/meminfo 
> >> > > and
> >> > > /proc/uptime.  It mounts proc rw but /proc/sysrq-trigger ro as well as
> >> > > moves /proc/sys/net out of the way, bind-mounts /proc/sys readonly
> >> > > (because this container is not in a user namespace) then moves
> >> > > /proc/sys/net back.  Finally it mounts sys ro but bind-mounts
> >> > > /sys/devices/virtual/net as writeable.
> >> > >
> >> > > If any of these are left enabled, unprivileged containers can't be
> >> > > started.  If all are disabled, then they can be.
> >> > >
> >> > > Can we find a way to make these not block remounts in child user
> >> > > namespaces?  A boot flag, a procfs and sysfs mount option, a sysctl?
> >> >
> >> > Are any of these overmounts done for the purpose of security?  It
> >> > appears the /proc/sys and /sys mounts being made read-only is for that
> >> > purpose.
> >> >
> >> > If none of the mounts are for secuirty the easy solution that works
> >> > today is to also mount /proc and /sys somewhere else in your container
> >> > so that the permission check for mounting a new copy passes.
> >>
> >> Can we use the big hammer approach on /proc/sys?  Specifically, what
> >> if we made it so that /proc mounts created in a non-root namespace
> >> *only* see things that are scoped to the active namespaces, and only
> >> those over which the mounter has capabilities?  We could have mount
> >> options for this.
> >
> > Of course the problem is precisely non-user-namespaced containers which
> > do own and have capabilities over the /proc/sys/files.  For user-namespaced
> > containers /proc/sys/ isn't really an issue.
> 
> What I mean is:
> 
> mount -o nsonly=user,net -t proc none /proc
> 
> would show the list of processors and things scoped to the current
> userns and netns, would *not* show global sysctls, and would fail
> unless the caller has appropriate caps over the userns and netns.
> This would work even if the old procfs is not fully visbile.

Gah, so apparently I'd forgotten the workaround I'd implemented - I
thought things had regressed, but they haven't, I'd just missed a step.

Sorry for the noise.  I don't want to make things more complicated or
more brittle when we can make it work as is - thanks.

-serge


Re: user namespace and fully visible proc and sys mounts

2016-03-06 Thread Serge E. Hallyn
On Sun, Mar 06, 2016 at 07:49:14PM -0800, Andy Lutomirski wrote:
> On Sun, Mar 6, 2016 at 7:45 PM, Serge E. Hallyn  wrote:
> > On Sun, Mar 06, 2016 at 06:24:23PM -0800, Andy Lutomirski wrote:
> >> On Mar 6, 2016 2:03 PM, "Eric W. Biederman"  wrote:
> >> >
> >> > "Serge E. Hallyn"  writes:
> >> >
> >> > > Hi,
> >> > >
> >> > > So we've been over this many times...  but unfortunately there is more
> >> > > breakage to report.  Regular privileged and unprivileged containers
> >> > > work all right for us.  But running an unprivileged container inside a
> >> > > privileged container is blocked.
> >> > >
> >> > > When creating privileged containers, lxc by default does a few things:
> >> > > it mounts some fuse.lxcfs files over procfiles include /proc/meminfo 
> >> > > and
> >> > > /proc/uptime.  It mounts proc rw but /proc/sysrq-trigger ro as well as
> >> > > moves /proc/sys/net out of the way, bind-mounts /proc/sys readonly
> >> > > (because this container is not in a user namespace) then moves
> >> > > /proc/sys/net back.  Finally it mounts sys ro but bind-mounts
> >> > > /sys/devices/virtual/net as writeable.
> >> > >
> >> > > If any of these are left enabled, unprivileged containers can't be
> >> > > started.  If all are disabled, then they can be.
> >> > >
> >> > > Can we find a way to make these not block remounts in child user
> >> > > namespaces?  A boot flag, a procfs and sysfs mount option, a sysctl?
> >> >
> >> > Are any of these overmounts done for the purpose of security?  It
> >> > appears the /proc/sys and /sys mounts being made read-only is for that
> >> > purpose.
> >> >
> >> > If none of the mounts are for secuirty the easy solution that works
> >> > today is to also mount /proc and /sys somewhere else in your container
> >> > so that the permission check for mounting a new copy passes.
> >>
> >> Can we use the big hammer approach on /proc/sys?  Specifically, what
> >> if we made it so that /proc mounts created in a non-root namespace
> >> *only* see things that are scoped to the active namespaces, and only
> >> those over which the mounter has capabilities?  We could have mount
> >> options for this.
> >
> > Of course the problem is precisely non-user-namespaced containers which
> > do own and have capabilities over the /proc/sys/files.  For user-namespaced
> > containers /proc/sys/ isn't really an issue.
> 
> What I mean is:
> 
> mount -o nsonly=user,net -t proc none /proc
> 
> would show the list of processors and things scoped to the current
> userns and netns, would *not* show global sysctls, and would fail
> unless the caller has appropriate caps over the userns and netns.
> This would work even if the old procfs is not fully visbile.

Gah, so apparently I'd forgotten the workaround I'd implemented - I
thought things had regressed, but they haven't, I'd just missed a step.

Sorry for the noise.  I don't want to make things more complicated or
more brittle when we can make it work as is - thanks.

-serge


[PATCH] PCI/keystone: check return value of devm_phy_get with EPROBE_DEFER

2016-03-06 Thread Shawn Lin
If the return value of devm_phy_get is EPROBE_DEFER, we should
defer probing the driver.

Signed-off-by: Shawn Lin 
---

 drivers/pci/host/pci-keystone.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index 0aa81bd..42af6ac 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -363,6 +363,8 @@ static int __init ks_pcie_probe(struct platform_device 
*pdev)
ret = phy_init(phy);
if (ret < 0)
return ret;
+   } else if (PTR_ERR(phy) == -EPROBE_DEFER) {
+   return PTR_ERR(phy);
}
 
/* index 2 is to read PCI DEVICE_ID */
-- 
2.3.7




[PATCH] PCI/keystone: check return value of devm_phy_get with EPROBE_DEFER

2016-03-06 Thread Shawn Lin
If the return value of devm_phy_get is EPROBE_DEFER, we should
defer probing the driver.

Signed-off-by: Shawn Lin 
---

 drivers/pci/host/pci-keystone.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c
index 0aa81bd..42af6ac 100644
--- a/drivers/pci/host/pci-keystone.c
+++ b/drivers/pci/host/pci-keystone.c
@@ -363,6 +363,8 @@ static int __init ks_pcie_probe(struct platform_device 
*pdev)
ret = phy_init(phy);
if (ret < 0)
return ret;
+   } else if (PTR_ERR(phy) == -EPROBE_DEFER) {
+   return PTR_ERR(phy);
}
 
/* index 2 is to read PCI DEVICE_ID */
-- 
2.3.7




Re: Suspicious error for CMA stress test

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 02:59:39PM +0800, Hanjun Guo wrote:
> On 2016/3/4 10:02, Joonsoo Kim wrote:
> > On Thu, Mar 03, 2016 at 08:49:01PM +0800, Hanjun Guo wrote:
> >> On 2016/3/3 15:42, Joonsoo Kim wrote:
> >>> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
>  (cc -mm and Joonsoo Kim)
> 
> 
>  On 03/02/2016 05:52 AM, Hanjun Guo wrote:
> > Hi,
> >
> > I came across a suspicious error for CMA stress test:
> >
> > Before the test, I got:
> > -bash-4.3# cat /proc/meminfo | grep Cma
> > CmaTotal: 204800 kB
> > CmaFree:  195044 kB
> >
> >
> > After running the test:
> > -bash-4.3# cat /proc/meminfo | grep Cma
> > CmaTotal: 204800 kB
> > CmaFree: 6602584 kB
> >
> > So the freed CMA memory is more than total..
> >
> > Also the the MemFree is more than mem total:
> >
> > -bash-4.3# cat /proc/meminfo
> > MemTotal:   16342016 kB
> > MemFree:22367268 kB
> > MemAvailable:   22370528 kB
> >> [...]
>  I played with this a bit and can see the same problem. The sanity
>  check of CmaFree < CmaTotal generally triggers in
>  __move_zone_freepage_state in unset_migratetype_isolate.
>  This also seems to be present as far back as v4.0 which was the
>  first version to have the updated accounting from Joonsoo.
>  Were there known limitations with the new freepage accounting,
>  Joonsoo?
> >>> I don't know. I also played with this and looks like there is
> >>> accounting problem, however, for my case, number of free page is slightly 
> >>> less
> >>> than total. I will take a look.
> >>>
> >>> Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
> >>> look like your case.
> >> I tested with malloc_size with 2M, and it grows much bigger than 1M, also I
> >> did some other test:
> > Thanks! Now, I can re-generate erronous situation you mentioned.
> >
> >>  - run with single thread with 10 times, everything is fine.
> >>
> >>  - I hack the cam_alloc() and free as below [1] to see if it's lock issue, 
> >> with
> >>the same test with 100 multi-thread, then I got:
> > [1] would not be sufficient to close this race.
> >
> > Try following things [A]. And, for more accurate test, I changed code a bit 
> > more
> > to prevent kernel page allocation from cma area [B]. This will prevent 
> > kernel
> > page allocation from cma area completely so we can focus cma_alloc/release 
> > race.
> >
> > Although, this is not correct fix, it could help that we can guess
> > where the problem is.
> >
> > Thanks.
> >
> > [A]
> > diff --git a/mm/cma.c b/mm/cma.c
> > index c003274..43ed02d 100644
> > --- a/mm/cma.c
> > +++ b/mm/cma.c
> > @@ -496,7 +496,9 @@ bool cma_release(struct cma *cma, const struct page 
> > *pages, unsigned int count)
> >  
> > VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
> >  
> > +   mutex_lock(_mutex);
> > free_contig_range(pfn, count);
> > +   mutex_unlock(_mutex);
> > cma_clear_bitmap(cma, pfn, count);
> > trace_cma_release(pfn, pages, count);
> >  
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index c6c38ed..1ce8a59 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2192,7 +2192,8 @@ void free_hot_cold_page(struct page *page, bool cold)
> >  * excessively into the page allocator
> >  */
> > if (migratetype >= MIGRATE_PCPTYPES) {
> > -   if (unlikely(is_migrate_isolate(migratetype))) {
> > +   if (is_migrate_cma(migratetype) ||
> > +   unlikely(is_migrate_isolate(migratetype))) {
> > free_one_page(zone, page, pfn, 0, migratetype);
> > goto out;
> > }
> 
> As I replied in previous email, the solution will fix the problem, the Cma 
> freed memory and
> system freed memory is in sane state after apply above patch.
> 
> I also tested this situation which only apply the code below:
> 
> if (migratetype >= MIGRATE_PCPTYPES) {
> -   if (unlikely(is_migrate_isolate(migratetype))) {
> +   if (is_migrate_cma(migratetype) ||
> +   unlikely(is_migrate_isolate(migratetype))) {
> free_one_page(zone, page, pfn, 0, migratetype);
> goto out;
> }
> 
> 
> This will not fix the problem, but will reduce the errorous freed number of 
> memory,
> hope this helps.
> 
> >
> >
> > [B]
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index f2dccf9..c6c38ed 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -1493,6 +1493,7 @@ static int prep_new_page(struct page *page, unsigned 
> > int order, gfp_t gfp_flags,
> > int 
> > alloc_flags)
> >  {
> > int i;
> > +   bool cma = false;
> >  
> > for (i = 0; i < (1 << 

Re: Suspicious error for CMA stress test

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 02:59:39PM +0800, Hanjun Guo wrote:
> On 2016/3/4 10:02, Joonsoo Kim wrote:
> > On Thu, Mar 03, 2016 at 08:49:01PM +0800, Hanjun Guo wrote:
> >> On 2016/3/3 15:42, Joonsoo Kim wrote:
> >>> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
>  (cc -mm and Joonsoo Kim)
> 
> 
>  On 03/02/2016 05:52 AM, Hanjun Guo wrote:
> > Hi,
> >
> > I came across a suspicious error for CMA stress test:
> >
> > Before the test, I got:
> > -bash-4.3# cat /proc/meminfo | grep Cma
> > CmaTotal: 204800 kB
> > CmaFree:  195044 kB
> >
> >
> > After running the test:
> > -bash-4.3# cat /proc/meminfo | grep Cma
> > CmaTotal: 204800 kB
> > CmaFree: 6602584 kB
> >
> > So the freed CMA memory is more than total..
> >
> > Also the the MemFree is more than mem total:
> >
> > -bash-4.3# cat /proc/meminfo
> > MemTotal:   16342016 kB
> > MemFree:22367268 kB
> > MemAvailable:   22370528 kB
> >> [...]
>  I played with this a bit and can see the same problem. The sanity
>  check of CmaFree < CmaTotal generally triggers in
>  __move_zone_freepage_state in unset_migratetype_isolate.
>  This also seems to be present as far back as v4.0 which was the
>  first version to have the updated accounting from Joonsoo.
>  Were there known limitations with the new freepage accounting,
>  Joonsoo?
> >>> I don't know. I also played with this and looks like there is
> >>> accounting problem, however, for my case, number of free page is slightly 
> >>> less
> >>> than total. I will take a look.
> >>>
> >>> Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
> >>> look like your case.
> >> I tested with malloc_size with 2M, and it grows much bigger than 1M, also I
> >> did some other test:
> > Thanks! Now, I can re-generate erronous situation you mentioned.
> >
> >>  - run with single thread with 10 times, everything is fine.
> >>
> >>  - I hack the cam_alloc() and free as below [1] to see if it's lock issue, 
> >> with
> >>the same test with 100 multi-thread, then I got:
> > [1] would not be sufficient to close this race.
> >
> > Try following things [A]. And, for more accurate test, I changed code a bit 
> > more
> > to prevent kernel page allocation from cma area [B]. This will prevent 
> > kernel
> > page allocation from cma area completely so we can focus cma_alloc/release 
> > race.
> >
> > Although, this is not correct fix, it could help that we can guess
> > where the problem is.
> >
> > Thanks.
> >
> > [A]
> > diff --git a/mm/cma.c b/mm/cma.c
> > index c003274..43ed02d 100644
> > --- a/mm/cma.c
> > +++ b/mm/cma.c
> > @@ -496,7 +496,9 @@ bool cma_release(struct cma *cma, const struct page 
> > *pages, unsigned int count)
> >  
> > VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
> >  
> > +   mutex_lock(_mutex);
> > free_contig_range(pfn, count);
> > +   mutex_unlock(_mutex);
> > cma_clear_bitmap(cma, pfn, count);
> > trace_cma_release(pfn, pages, count);
> >  
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index c6c38ed..1ce8a59 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2192,7 +2192,8 @@ void free_hot_cold_page(struct page *page, bool cold)
> >  * excessively into the page allocator
> >  */
> > if (migratetype >= MIGRATE_PCPTYPES) {
> > -   if (unlikely(is_migrate_isolate(migratetype))) {
> > +   if (is_migrate_cma(migratetype) ||
> > +   unlikely(is_migrate_isolate(migratetype))) {
> > free_one_page(zone, page, pfn, 0, migratetype);
> > goto out;
> > }
> 
> As I replied in previous email, the solution will fix the problem, the Cma 
> freed memory and
> system freed memory is in sane state after apply above patch.
> 
> I also tested this situation which only apply the code below:
> 
> if (migratetype >= MIGRATE_PCPTYPES) {
> -   if (unlikely(is_migrate_isolate(migratetype))) {
> +   if (is_migrate_cma(migratetype) ||
> +   unlikely(is_migrate_isolate(migratetype))) {
> free_one_page(zone, page, pfn, 0, migratetype);
> goto out;
> }
> 
> 
> This will not fix the problem, but will reduce the errorous freed number of 
> memory,
> hope this helps.
> 
> >
> >
> > [B]
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index f2dccf9..c6c38ed 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -1493,6 +1493,7 @@ static int prep_new_page(struct page *page, unsigned 
> > int order, gfp_t gfp_flags,
> > int 
> > alloc_flags)
> >  {
> > int i;
> > +   bool cma = false;
> >  
> > for (i = 0; i < (1 << order); i++) {
> >

Re: Suspicious error for CMA stress test

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 03:35:26PM +0800, Hanjun Guo wrote:
> On 2016/3/4 14:38, Joonsoo Kim wrote:
> > On Fri, Mar 04, 2016 at 02:05:09PM +0800, Hanjun Guo wrote:
> >> On 2016/3/4 12:32, Joonsoo Kim wrote:
> >>> On Fri, Mar 04, 2016 at 11:02:33AM +0900, Joonsoo Kim wrote:
>  On Thu, Mar 03, 2016 at 08:49:01PM +0800, Hanjun Guo wrote:
> > On 2016/3/3 15:42, Joonsoo Kim wrote:
> >> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
> >>> (cc -mm and Joonsoo Kim)
> >>>
> >>>
> >>> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>  Hi,
> 
>  I came across a suspicious error for CMA stress test:
> 
>  Before the test, I got:
>  -bash-4.3# cat /proc/meminfo | grep Cma
>  CmaTotal: 204800 kB
>  CmaFree:  195044 kB
> 
> 
>  After running the test:
>  -bash-4.3# cat /proc/meminfo | grep Cma
>  CmaTotal: 204800 kB
>  CmaFree: 6602584 kB
> 
>  So the freed CMA memory is more than total..
> 
>  Also the the MemFree is more than mem total:
> 
>  -bash-4.3# cat /proc/meminfo
>  MemTotal:   16342016 kB
>  MemFree:22367268 kB
>  MemAvailable:   22370528 kB
> > [...]
> >>> I played with this a bit and can see the same problem. The sanity
> >>> check of CmaFree < CmaTotal generally triggers in
> >>> __move_zone_freepage_state in unset_migratetype_isolate.
> >>> This also seems to be present as far back as v4.0 which was the
> >>> first version to have the updated accounting from Joonsoo.
> >>> Were there known limitations with the new freepage accounting,
> >>> Joonsoo?
> >> I don't know. I also played with this and looks like there is
> >> accounting problem, however, for my case, number of free page is 
> >> slightly less
> >> than total. I will take a look.
> >>
> >> Hanjun, could you tell me your malloc_size? I tested with 1 and it 
> >> doesn't
> >> look like your case.
> > I tested with malloc_size with 2M, and it grows much bigger than 1M, 
> > also I
> > did some other test:
>  Thanks! Now, I can re-generate erronous situation you mentioned.
> 
> >  - run with single thread with 10 times, everything is fine.
> >
> >  - I hack the cam_alloc() and free as below [1] to see if it's lock 
> > issue, with
> >the same test with 100 multi-thread, then I got:
>  [1] would not be sufficient to close this race.
> 
>  Try following things [A]. And, for more accurate test, I changed code a 
>  bit more
>  to prevent kernel page allocation from cma area [B]. This will prevent 
>  kernel
>  page allocation from cma area completely so we can focus 
>  cma_alloc/release race.
> 
>  Although, this is not correct fix, it could help that we can guess
>  where the problem is.
> >>> More correct fix is something like below.
> >>> Please test it.
> >> Hmm, this is not working:
> > Sad to hear that.
> >
> > Could you tell me your system's MAX_ORDER and pageblock_order?
> >
> 
> MAX_ORDER is 11, pageblock_order is 9, thanks for your help!

Hmm... that's same with me.

Below is similar fix that prevents buddy merging when one of buddy's
migrate type, but, not both, is MIGRATE_ISOLATE. In fact, I have
no idea why previous fix (more correct fix) doesn't work for you.
(It works for me.) But, maybe there is a bug on the fix
so I make new one which is more general form. Please test it.

Thanks.

-->8-
>From dd41e348572948d70b935fc24f82c096ff0fb417 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim 
Date: Fri, 4 Mar 2016 13:28:17 +0900
Subject: [PATCH] mm/cma: fix race

Signed-off-by: Joonsoo Kim 
---
 mm/page_alloc.c | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c6c38ed..d80d071 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -620,8 +620,8 @@ static inline void rmv_page_order(struct page *page)
  *
  * For recording page's order, we use page_private(page).
  */
-static inline int page_is_buddy(struct page *page, struct page *buddy,
-   unsigned int order)
+static inline int page_is_buddy(struct zone *zone, struct page *page,
+   struct page *buddy, unsigned int order)
 {
if (!pfn_valid_within(page_to_pfn(buddy)))
return 0;
@@ -644,6 +644,20 @@ static inline int page_is_buddy(struct page *page, struct 
page *buddy,
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
 
+   if (IS_ENABLED(CONFIG_CMA) &&
+   unlikely(has_isolate_pageblock(zone)) &&
+   unlikely(order >= 

Re: Suspicious error for CMA stress test

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 03:35:26PM +0800, Hanjun Guo wrote:
> On 2016/3/4 14:38, Joonsoo Kim wrote:
> > On Fri, Mar 04, 2016 at 02:05:09PM +0800, Hanjun Guo wrote:
> >> On 2016/3/4 12:32, Joonsoo Kim wrote:
> >>> On Fri, Mar 04, 2016 at 11:02:33AM +0900, Joonsoo Kim wrote:
>  On Thu, Mar 03, 2016 at 08:49:01PM +0800, Hanjun Guo wrote:
> > On 2016/3/3 15:42, Joonsoo Kim wrote:
> >> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
> >>> (cc -mm and Joonsoo Kim)
> >>>
> >>>
> >>> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>  Hi,
> 
>  I came across a suspicious error for CMA stress test:
> 
>  Before the test, I got:
>  -bash-4.3# cat /proc/meminfo | grep Cma
>  CmaTotal: 204800 kB
>  CmaFree:  195044 kB
> 
> 
>  After running the test:
>  -bash-4.3# cat /proc/meminfo | grep Cma
>  CmaTotal: 204800 kB
>  CmaFree: 6602584 kB
> 
>  So the freed CMA memory is more than total..
> 
>  Also the the MemFree is more than mem total:
> 
>  -bash-4.3# cat /proc/meminfo
>  MemTotal:   16342016 kB
>  MemFree:22367268 kB
>  MemAvailable:   22370528 kB
> > [...]
> >>> I played with this a bit and can see the same problem. The sanity
> >>> check of CmaFree < CmaTotal generally triggers in
> >>> __move_zone_freepage_state in unset_migratetype_isolate.
> >>> This also seems to be present as far back as v4.0 which was the
> >>> first version to have the updated accounting from Joonsoo.
> >>> Were there known limitations with the new freepage accounting,
> >>> Joonsoo?
> >> I don't know. I also played with this and looks like there is
> >> accounting problem, however, for my case, number of free page is 
> >> slightly less
> >> than total. I will take a look.
> >>
> >> Hanjun, could you tell me your malloc_size? I tested with 1 and it 
> >> doesn't
> >> look like your case.
> > I tested with malloc_size with 2M, and it grows much bigger than 1M, 
> > also I
> > did some other test:
>  Thanks! Now, I can re-generate erronous situation you mentioned.
> 
> >  - run with single thread with 10 times, everything is fine.
> >
> >  - I hack the cam_alloc() and free as below [1] to see if it's lock 
> > issue, with
> >the same test with 100 multi-thread, then I got:
>  [1] would not be sufficient to close this race.
> 
>  Try following things [A]. And, for more accurate test, I changed code a 
>  bit more
>  to prevent kernel page allocation from cma area [B]. This will prevent 
>  kernel
>  page allocation from cma area completely so we can focus 
>  cma_alloc/release race.
> 
>  Although, this is not correct fix, it could help that we can guess
>  where the problem is.
> >>> More correct fix is something like below.
> >>> Please test it.
> >> Hmm, this is not working:
> > Sad to hear that.
> >
> > Could you tell me your system's MAX_ORDER and pageblock_order?
> >
> 
> MAX_ORDER is 11, pageblock_order is 9, thanks for your help!

Hmm... that's same with me.

Below is similar fix that prevents buddy merging when one of buddy's
migrate type, but, not both, is MIGRATE_ISOLATE. In fact, I have
no idea why previous fix (more correct fix) doesn't work for you.
(It works for me.) But, maybe there is a bug on the fix
so I make new one which is more general form. Please test it.

Thanks.

-->8-
>From dd41e348572948d70b935fc24f82c096ff0fb417 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim 
Date: Fri, 4 Mar 2016 13:28:17 +0900
Subject: [PATCH] mm/cma: fix race

Signed-off-by: Joonsoo Kim 
---
 mm/page_alloc.c | 33 +++--
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c6c38ed..d80d071 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -620,8 +620,8 @@ static inline void rmv_page_order(struct page *page)
  *
  * For recording page's order, we use page_private(page).
  */
-static inline int page_is_buddy(struct page *page, struct page *buddy,
-   unsigned int order)
+static inline int page_is_buddy(struct zone *zone, struct page *page,
+   struct page *buddy, unsigned int order)
 {
if (!pfn_valid_within(page_to_pfn(buddy)))
return 0;
@@ -644,6 +644,20 @@ static inline int page_is_buddy(struct page *page, struct 
page *buddy,
if (page_zone_id(page) != page_zone_id(buddy))
return 0;
 
+   if (IS_ENABLED(CONFIG_CMA) &&
+   unlikely(has_isolate_pageblock(zone)) &&
+   unlikely(order >= pageblock_order)) {
+   int page_mt, buddy_mt;
+
+  

Re: [PATCH 0/4] ARM64:SoC add a new platform, LG Electronics's lg1k

2016-03-06 Thread Arnd Bergmann
On Monday 07 March 2016, Chanho Min wrote:
> This is an initial series for supporting LG Electronics's lg1k SoCs,
> based on ARM Cortex-A53, mainly used for digital TVs.
> 
> Chanho Min (4):
>   arm64: add Kconfig entry for LG1K SoC family
>   arm64: defconfig: enable ARCH_LG1K
>   arm64: dts: Add dts files for LG Electronics's lg1312 SoC
>   MAINTAINERS: add myself as ARM/LG1K maintainer

The patches look ok in principle, but the timing is unfortunate: we are at -rc7 
now,
just before the merge window, and we need to give this a little extra time for 
review,
so I think we should merge this for 4.7, not 4.6.

Arnd



Re: [PATCH 0/4] ARM64:SoC add a new platform, LG Electronics's lg1k

2016-03-06 Thread Arnd Bergmann
On Monday 07 March 2016, Chanho Min wrote:
> This is an initial series for supporting LG Electronics's lg1k SoCs,
> based on ARM Cortex-A53, mainly used for digital TVs.
> 
> Chanho Min (4):
>   arm64: add Kconfig entry for LG1K SoC family
>   arm64: defconfig: enable ARCH_LG1K
>   arm64: dts: Add dts files for LG Electronics's lg1312 SoC
>   MAINTAINERS: add myself as ARM/LG1K maintainer

The patches look ok in principle, but the timing is unfortunate: we are at -rc7 
now,
just before the merge window, and we need to give this a little extra time for 
review,
so I think we should merge this for 4.7, not 4.6.

Arnd



[PATCH] Remove last traces of ->sync_page

2016-03-06 Thread Matthew Wilcox
Commit 7eaceaccab5f removed ->sync_page, but a few mentions of it still
existed in documentation and comments,

Signed-off-by: Matthew Wilcox 
---
 Documentation/block/biodoc.txt|  3 +--
 Documentation/filesystems/Locking | 11 ++-
 Documentation/filesystems/vfs.txt |  8 +++-
 fs/isofs/compress.c   |  1 -
 fs/ntfs/inode.c   |  2 +-
 5 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 5be8a7f..026d133 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1024,8 +1024,7 @@ could be on demand. For example wait_on_buffer sets the 
unplugging going
 through sync_buffer() running blk_run_address_space(mapping). Or the caller
 can do it explicity through blk_unplug(bdev). So in the read case,
 the queue gets explicitly unplugged as part of waiting for completion on that
-buffer. For page driven IO, the address space ->sync_page() takes care of
-doing the blk_run_address_space().
+buffer.
 
 Aside:
   This is kind of controversial territory, as it's not clear if plugging is
diff --git a/Documentation/filesystems/Locking 
b/Documentation/filesystems/Locking
index 619af9b..3462123 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -179,7 +179,6 @@ unlocks and drops the reference.
 prototypes:
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
-   int (*sync_page)(struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
@@ -208,7 +207,6 @@ locking rules:
PageLocked(page)i_mutex
 writepage: yes, unlocks (see below)
 readpage:  yes, unlocks
-sync_page: maybe
 writepages:
 set_page_dirty no
 readpages:
@@ -226,8 +224,8 @@ error_remove_page:  yes
 swap_activate: no
 swap_deactivate:   no
 
-   ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
-may be called from the request handler (/dev/loop).
+   ->write_begin(), ->write_end() and ->readpage() may be called from
+the request handler (/dev/loop).
 
->readpage() unlocks the page, either synchronously or via I/O
 completion.
@@ -283,11 +281,6 @@ will leave the page itself marked clean but it will be 
tagged as dirty in the
 radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
 in the filesystem like having dirty inodes at umount and losing written data.
 
-   ->sync_page() locking rules are not well-defined - usually it is called
-with lock on page, but that is not guaranteed. Considering the currently
-existing instances of this method ->sync_page() itself doesn't look
-well-defined...
-
->writepages() is used for periodic writeback and for syscall-initiated
 sync operations.  The address_space should start I/O against at least
 *nr_to_write pages.  *nr_to_write must be decremented for each page which is
diff --git a/Documentation/filesystems/vfs.txt 
b/Documentation/filesystems/vfs.txt
index b02a7d5..a6a93b4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -534,9 +534,7 @@ __sync_single_inode) to check if ->writepages has been 
successful in
 writing out the whole address_space.
 
 The Writeback tag is used by filemap*wait* and sync_page* functions,
-via filemap_fdatawait_range, to wait for all writeback to
-complete.  While waiting ->sync_page (if defined) will be called on
-each page that is found to require writeback.
+via filemap_fdatawait_range, to wait for all writeback to complete.
 
 An address_space handler may attach extra information to a page,
 typically using the 'private' field in the 'struct page'.  If such
@@ -554,8 +552,8 @@ address_space has finer control of write sizes.
 
 The read process essentially only requires 'readpage'.  The write
 process is more complicated and uses write_begin/write_end or
-set_page_dirty to write data into the address_space, and writepage,
-sync_page, and writepages to writeback data to storage.
+set_page_dirty to write data into the address_space, and writepage
+and writepages to writeback data to storage.
 
 Adding and removing pages to/from an address_space is protected by the
 inode's i_mutex.
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index f311bf0..5249d1a 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -361,7 +361,6 @@ static int zisofs_readpage(struct file *file, struct page 
*page)
 
 const struct address_space_operations zisofs_aops = {
.readpage = zisofs_readpage,
-   /* No sync_page operation supported? */
/* No bmap operation supported */
 };
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07..02d0473 

[PATCH] Remove last traces of ->sync_page

2016-03-06 Thread Matthew Wilcox
Commit 7eaceaccab5f removed ->sync_page, but a few mentions of it still
existed in documentation and comments,

Signed-off-by: Matthew Wilcox 
---
 Documentation/block/biodoc.txt|  3 +--
 Documentation/filesystems/Locking | 11 ++-
 Documentation/filesystems/vfs.txt |  8 +++-
 fs/isofs/compress.c   |  1 -
 fs/ntfs/inode.c   |  2 +-
 5 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 5be8a7f..026d133 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1024,8 +1024,7 @@ could be on demand. For example wait_on_buffer sets the 
unplugging going
 through sync_buffer() running blk_run_address_space(mapping). Or the caller
 can do it explicity through blk_unplug(bdev). So in the read case,
 the queue gets explicitly unplugged as part of waiting for completion on that
-buffer. For page driven IO, the address space ->sync_page() takes care of
-doing the blk_run_address_space().
+buffer.
 
 Aside:
   This is kind of controversial territory, as it's not clear if plugging is
diff --git a/Documentation/filesystems/Locking 
b/Documentation/filesystems/Locking
index 619af9b..3462123 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -179,7 +179,6 @@ unlocks and drops the reference.
 prototypes:
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
-   int (*sync_page)(struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
@@ -208,7 +207,6 @@ locking rules:
PageLocked(page)i_mutex
 writepage: yes, unlocks (see below)
 readpage:  yes, unlocks
-sync_page: maybe
 writepages:
 set_page_dirty no
 readpages:
@@ -226,8 +224,8 @@ error_remove_page:  yes
 swap_activate: no
 swap_deactivate:   no
 
-   ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
-may be called from the request handler (/dev/loop).
+   ->write_begin(), ->write_end() and ->readpage() may be called from
+the request handler (/dev/loop).
 
->readpage() unlocks the page, either synchronously or via I/O
 completion.
@@ -283,11 +281,6 @@ will leave the page itself marked clean but it will be 
tagged as dirty in the
 radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
 in the filesystem like having dirty inodes at umount and losing written data.
 
-   ->sync_page() locking rules are not well-defined - usually it is called
-with lock on page, but that is not guaranteed. Considering the currently
-existing instances of this method ->sync_page() itself doesn't look
-well-defined...
-
->writepages() is used for periodic writeback and for syscall-initiated
 sync operations.  The address_space should start I/O against at least
 *nr_to_write pages.  *nr_to_write must be decremented for each page which is
diff --git a/Documentation/filesystems/vfs.txt 
b/Documentation/filesystems/vfs.txt
index b02a7d5..a6a93b4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -534,9 +534,7 @@ __sync_single_inode) to check if ->writepages has been 
successful in
 writing out the whole address_space.
 
 The Writeback tag is used by filemap*wait* and sync_page* functions,
-via filemap_fdatawait_range, to wait for all writeback to
-complete.  While waiting ->sync_page (if defined) will be called on
-each page that is found to require writeback.
+via filemap_fdatawait_range, to wait for all writeback to complete.
 
 An address_space handler may attach extra information to a page,
 typically using the 'private' field in the 'struct page'.  If such
@@ -554,8 +552,8 @@ address_space has finer control of write sizes.
 
 The read process essentially only requires 'readpage'.  The write
 process is more complicated and uses write_begin/write_end or
-set_page_dirty to write data into the address_space, and writepage,
-sync_page, and writepages to writeback data to storage.
+set_page_dirty to write data into the address_space, and writepage
+and writepages to writeback data to storage.
 
 Adding and removing pages to/from an address_space is protected by the
 inode's i_mutex.
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index f311bf0..5249d1a 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -361,7 +361,6 @@ static int zisofs_readpage(struct file *file, struct page 
*page)
 
 const struct address_space_operations zisofs_aops = {
.readpage = zisofs_readpage,
-   /* No sync_page operation supported? */
/* No bmap operation supported */
 };
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07..02d0473 100644
--- 

Re: [PATCH 3/4] arm64: dts: Add dts files for LG Electronics's lg1312 SoC

2016-03-06 Thread Arnd Bergmann
On Monday 07 March 2016, Chanho Min wrote:
> +
> +   chosen {
> +   bootargs = "root=/dev/nfs ip=dhcp";
> +   };

Can you add a stdout-path property as well, to make the console work?

Please also include an aliases node in the .dts file.

> +
> +#include 
> +#include 
> +
> +#define ARMV8_TIMER_IRQ_TYPE (GIC_CPU_MASK_RAW(0x0f) | IRQ_TYPE_LEVEL_LOW)

I would leave out the macro here and instead just open-code this in the place it
is used.

Arnd


Re: [PATCH 3/4] arm64: dts: Add dts files for LG Electronics's lg1312 SoC

2016-03-06 Thread Arnd Bergmann
On Monday 07 March 2016, Chanho Min wrote:
> +
> +   chosen {
> +   bootargs = "root=/dev/nfs ip=dhcp";
> +   };

Can you add a stdout-path property as well, to make the console work?

Please also include an aliases node in the .dts file.

> +
> +#include 
> +#include 
> +
> +#define ARMV8_TIMER_IRQ_TYPE (GIC_CPU_MASK_RAW(0x0f) | IRQ_TYPE_LEVEL_LOW)

I would leave out the macro here and instead just open-code this in the place it
is used.

Arnd


Re: [PATCH v4 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-03-06 Thread Joonsoo Kim
On Fri, Mar 04, 2016 at 12:04:39PM -0800, Andrew Morton wrote:
> On Thu, 3 Mar 2016 16:43:49 +0900 Joonsoo Kim  wrote:
> 
> > > Acked-by: Vlastimil Babka 
> > >
> > >> +config DEBUG_PAGE_REF
> > >> +   bool "Enable tracepoint to track down page reference 
> > >> manipulation"
> > >> +   depends on DEBUG_KERNEL
> > >> +   depends on TRACEPOINTS
> > >> +   ---help---
> > >> + This is the feature to add tracepoint for tracking down page
> > >> reference
> > >> + manipulation. This tracking is useful to diagnosis functional
> > >> failure
> > >> + due to migration failure caused by page reference mismatch. Be
> > >
> > >
> > > OK.
> > >
> > >> + careful to turn on this feature because it could bloat some
> > >> kernel
> > >> + text. In my configuration, it bloats 30 KB. Although kernel 
> > >> text
> > >> will
> > >> + be bloated, there would be no runtime performance overhead if
> > >> + tracepoint isn't enabled thanks to jump label.
> > >
> > >
> > > I would just write something like:
> > >
> > > Enabling this feature adds about 30 KB to the kernel code, but runtime
> > > performance overhead is virtually none until the tracepoints are actually
> > > enabled.
> > 
> > Okay, better!
> > Andrew, do you want fixup patch from me or could you simply handle it?
> > 
> 
> This?

Yep!

Thanks!

> 
> --- 
> a/mm/Kconfig.debug~mm-page_ref-add-tracepoint-to-track-down-page-reference-manipulation-fix-3-fix
> +++ a/mm/Kconfig.debug
> @@ -82,10 +82,9 @@ config DEBUG_PAGE_REF
>   depends on DEBUG_KERNEL
>   depends on TRACEPOINTS
>   ---help---
> -   This is the feature to add tracepoint for tracking down page reference
> -   manipulation. This tracking is useful to diagnosis functional failure
> -   due to migration failure caused by page reference mismatch. Be
> -   careful to turn on this feature because it could bloat some kernel
> -   text. In my configuration, it bloats 30 KB. Although kernel text will
> -   be bloated, there would be no runtime performance overhead if
> -   tracepoint isn't enabled thanks to jump label.
> +   This is a feature to add tracepoint for tracking down page reference
> +   manipulation. This tracking is useful to diagnose functional failure
> +   due to migration failures caused by page reference mismatches.  Be
> +   careful when enabling this feature because it adds about 30 KB to the
> +   kernel code.  However the runtime performance overhead is virtually
> +   nil until the tracepoints are actually enabled.
> _
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org;> em...@kvack.org 


[PATCH] ASoC: cs35l32: avoid uninitialized variable access

2016-03-06 Thread Arnd Bergmann
gcc warns about the possibilty of accessing a property read from
devicetree in cs35l32_i2c_probe() when it has not been initialized
because CONFIG_OF is disabled:

sound/soc/codecs/cs35l32.c: In function 'cs35l32_i2c_probe':
sound/soc/codecs/cs35l32.c:278:2: warning: 'val' may be used uninitialized in 
this function [-Wmaybe-uninitialized]

The code is actually correct because it checks the dev->of_node
variable first and we know this is NULL here when CONFIG_OF
is disabled, but Russell King noticed that it's broken when
we probe the device using DT, and the properties are absent.

The code already has some checking for incorrect values, and
I keep that checking unchanged here, but add an additional
check for an error returned by the property accessor functions
that now gets handled the same way as incorrect data in the
properties.

Signed-off-by: Arnd Bergmann 
---
 sound/soc/codecs/cs35l32.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c
index 44c30fe3e315..c490dc74121b 100644
--- a/sound/soc/codecs/cs35l32.c
+++ b/sound/soc/codecs/cs35l32.c
@@ -274,21 +274,24 @@ static int cs35l32_handle_of_data(struct i2c_client 
*i2c_client,
if (of_property_read_u32(np, "cirrus,sdout-share", ) >= 0)
pdata->sdout_share = val;
 
-   of_property_read_u32(np, "cirrus,boost-manager", );
+   if (of_property_read_u32(np, "cirrus,boost-manager", ))
+   val = -1u;
+
switch (val) {
case CS35L32_BOOST_MGR_AUTO:
case CS35L32_BOOST_MGR_AUTO_AUDIO:
case CS35L32_BOOST_MGR_BYPASS:
case CS35L32_BOOST_MGR_FIXED:
-   pdata->boost_mng = val;
break;
+   case -1u:
default:
dev_err(_client->dev,
"Wrong cirrus,boost-manager DT value %d\n", val);
pdata->boost_mng = CS35L32_BOOST_MGR_BYPASS;
}
 
-   of_property_read_u32(np, "cirrus,sdout-datacfg", );
+   if (of_property_read_u32(np, "cirrus,sdout-datacfg", ))
+   val = -1u;
switch (val) {
case CS35L32_DATA_CFG_LR_VP:
case CS35L32_DATA_CFG_LR_STAT:
@@ -296,13 +299,15 @@ static int cs35l32_handle_of_data(struct i2c_client 
*i2c_client,
case CS35L32_DATA_CFG_LR_VPSTAT:
pdata->sdout_datacfg = val;
break;
+   case -1u:
default:
dev_err(_client->dev,
"Wrong cirrus,sdout-datacfg DT value %d\n", val);
pdata->sdout_datacfg = CS35L32_DATA_CFG_LR;
}
 
-   of_property_read_u32(np, "cirrus,battery-threshold", );
+   if (of_property_read_u32(np, "cirrus,battery-threshold", ))
+   val = -1u;
switch (val) {
case CS35L32_BATT_THRESH_3_1V:
case CS35L32_BATT_THRESH_3_2V:
@@ -310,13 +315,15 @@ static int cs35l32_handle_of_data(struct i2c_client 
*i2c_client,
case CS35L32_BATT_THRESH_3_4V:
pdata->batt_thresh = val;
break;
+   case -1u:
default:
dev_err(_client->dev,
"Wrong cirrus,battery-threshold DT value %d\n", val);
pdata->batt_thresh = CS35L32_BATT_THRESH_3_3V;
}
 
-   of_property_read_u32(np, "cirrus,battery-recovery", );
+   if (of_property_read_u32(np, "cirrus,battery-recovery", ))
+   val = -1u;
switch (val) {
case CS35L32_BATT_RECOV_3_1V:
case CS35L32_BATT_RECOV_3_2V:
-- 
2.7.0



  1   2   3   4   5   6   7   >