Re: [PATCH] pinctrl: rockchip: add support the get_direction

2016-03-13 Thread Caesar Wang



在 2016年03月14日 13:44, Caesar Wang 写道:

This patch adds the get_direction to support the gpio
interface.

the gpio direction is not used on rockchip platform when use the gpio
debugfs.

Tested on kylin board. (RK3036 SoCs)
The repro steps:
$/sys/class/gpio/
echo 53 > export
$/sys/class/gpio/gpio53# cat direction
in
In general, the gpio53 should be out value, but the direction is the
default value 'in',  since the get_direction didn't supported in rockchip
pinctrl.

So, we should add this patch to support it.

Change-Id: I237c2e85ac7680b3d0de15923985b4827def


Resend it since the change id, sorry for the noise.


Reported-by: Jeffy Chen 
Signed-off-by: Caesar Wang 
Cc: Linus Walleij 
Cc: Heiko Stuebner 
Cc: linux-g...@vger.kernel.org
Cc: linux-rockc...@lists.infradead.org
---
  drivers/pinctrl/pinctrl-rockchip.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index bf032b9..f22a186 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -1208,6 +1208,18 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, 
unsigned selector,
return 0;
  }
  
+static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)

+{
+   struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+   int pin;
+   u32 data;
+
+   pin = offset - chip->base;
+   data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
+
+   return !!(data & BIT(pin));
+}
+
  /*
   * The calls to gpio_direction_output() and gpio_direction_input()
   * leads to this function call (via the 
pinctrl_gpio_direction_{input|output}()
@@ -1741,6 +1753,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = {
.free = gpiochip_generic_free,
.set = rockchip_gpio_set,
.get = rockchip_gpio_get,
+   .get_direction  = rockchip_gpio_get_direction,
.direction_input = rockchip_gpio_direction_input,
.direction_output = rockchip_gpio_direction_output,
.to_irq = rockchip_gpio_to_irq,


--
caesar wang | software engineer | w...@rock-chip.com




Re: [PATCH] pinctrl: rockchip: add support the get_direction

2016-03-13 Thread Caesar Wang



在 2016年03月14日 13:44, Caesar Wang 写道:

This patch adds the get_direction to support the gpio
interface.

the gpio direction is not used on rockchip platform when use the gpio
debugfs.

Tested on kylin board. (RK3036 SoCs)
The repro steps:
$/sys/class/gpio/
echo 53 > export
$/sys/class/gpio/gpio53# cat direction
in
In general, the gpio53 should be out value, but the direction is the
default value 'in',  since the get_direction didn't supported in rockchip
pinctrl.

So, we should add this patch to support it.

Change-Id: I237c2e85ac7680b3d0de15923985b4827def


Resend it since the change id, sorry for the noise.


Reported-by: Jeffy Chen 
Signed-off-by: Caesar Wang 
Cc: Linus Walleij 
Cc: Heiko Stuebner 
Cc: linux-g...@vger.kernel.org
Cc: linux-rockc...@lists.infradead.org
---
  drivers/pinctrl/pinctrl-rockchip.c | 13 +
  1 file changed, 13 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index bf032b9..f22a186 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -1208,6 +1208,18 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, 
unsigned selector,
return 0;
  }
  
+static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)

+{
+   struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+   int pin;
+   u32 data;
+
+   pin = offset - chip->base;
+   data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
+
+   return !!(data & BIT(pin));
+}
+
  /*
   * The calls to gpio_direction_output() and gpio_direction_input()
   * leads to this function call (via the 
pinctrl_gpio_direction_{input|output}()
@@ -1741,6 +1753,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = {
.free = gpiochip_generic_free,
.set = rockchip_gpio_set,
.get = rockchip_gpio_get,
+   .get_direction  = rockchip_gpio_get_direction,
.direction_input = rockchip_gpio_direction_input,
.direction_output = rockchip_gpio_direction_output,
.to_irq = rockchip_gpio_to_irq,


--
caesar wang | software engineer | w...@rock-chip.com




[PATCH v2] ARM: mmci:Remove unnecessary #include header file in mmci.c

2016-03-13 Thread Wang Hongcheng
The header file asm/sizes.h is unnecessary.
And it can also be compiled under X86 arch after the removal.

Signed-off-by: Wang Hongcheng 
---
 drivers/mmc/host/mmci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fb26674..e5e5441 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -40,7 +40,6 @@
 
 #include 
 #include 
-#include 
 
 #include "mmci.h"
 #include "mmci_qcom_dml.h"
-- 
1.9.1



[PATCH v2] ARM: mmci:Remove unnecessary #include header file in mmci.c

2016-03-13 Thread Wang Hongcheng
The header file asm/sizes.h is unnecessary.
And it can also be compiled under X86 arch after the removal.

Signed-off-by: Wang Hongcheng 
---
 drivers/mmc/host/mmci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fb26674..e5e5441 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -40,7 +40,6 @@
 
 #include 
 #include 
-#include 
 
 #include "mmci.h"
 #include "mmci_qcom_dml.h"
-- 
1.9.1



[PATCH] pinctrl: rockchip: add support the get_direction

2016-03-13 Thread Caesar Wang
This patch adds the get_direction to support the gpio
interface.

the gpio direction is not used on rockchip platform when use the gpio
debugfs.

Tested on kylin board. (RK3036 SoCs)
The repro steps:
$/sys/class/gpio/
echo 53 > export
$/sys/class/gpio/gpio53# cat direction
in
In general, the gpio53 should be out value, but the direction is the
default value 'in',  since the get_direction didn't supported in rockchip
pinctrl.

So, we should add this patch to support it.

Change-Id: I237c2e85ac7680b3d0de15923985b4827def
Reported-by: Jeffy Chen 
Signed-off-by: Caesar Wang 
Cc: Linus Walleij 
Cc: Heiko Stuebner 
Cc: linux-g...@vger.kernel.org
Cc: linux-rockc...@lists.infradead.org
---
 drivers/pinctrl/pinctrl-rockchip.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index bf032b9..f22a186 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -1208,6 +1208,18 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, 
unsigned selector,
return 0;
 }
 
+static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+{
+   struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+   int pin;
+   u32 data;
+
+   pin = offset - chip->base;
+   data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
+
+   return !!(data & BIT(pin));
+}
+
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -1741,6 +1753,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = {
.free = gpiochip_generic_free,
.set = rockchip_gpio_set,
.get = rockchip_gpio_get,
+   .get_direction  = rockchip_gpio_get_direction,
.direction_input = rockchip_gpio_direction_input,
.direction_output = rockchip_gpio_direction_output,
.to_irq = rockchip_gpio_to_irq,
-- 
1.9.1



[PATCH] pinctrl: rockchip: add support the get_direction

2016-03-13 Thread Caesar Wang
This patch adds the get_direction to support the gpio
interface.

the gpio direction is not used on rockchip platform when use the gpio
debugfs.

Tested on kylin board. (RK3036 SoCs)
The repro steps:
$/sys/class/gpio/
echo 53 > export
$/sys/class/gpio/gpio53# cat direction
in
In general, the gpio53 should be out value, but the direction is the
default value 'in',  since the get_direction didn't supported in rockchip
pinctrl.

So, we should add this patch to support it.

Change-Id: I237c2e85ac7680b3d0de15923985b4827def
Reported-by: Jeffy Chen 
Signed-off-by: Caesar Wang 
Cc: Linus Walleij 
Cc: Heiko Stuebner 
Cc: linux-g...@vger.kernel.org
Cc: linux-rockc...@lists.infradead.org
---
 drivers/pinctrl/pinctrl-rockchip.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index bf032b9..f22a186 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -1208,6 +1208,18 @@ static int rockchip_pmx_set(struct pinctrl_dev *pctldev, 
unsigned selector,
return 0;
 }
 
+static int rockchip_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+{
+   struct rockchip_pin_bank *bank = gpiochip_get_data(chip);
+   int pin;
+   u32 data;
+
+   pin = offset - chip->base;
+   data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
+
+   return !!(data & BIT(pin));
+}
+
 /*
  * The calls to gpio_direction_output() and gpio_direction_input()
  * leads to this function call (via the pinctrl_gpio_direction_{input|output}()
@@ -1741,6 +1753,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = {
.free = gpiochip_generic_free,
.set = rockchip_gpio_set,
.get = rockchip_gpio_get,
+   .get_direction  = rockchip_gpio_get_direction,
.direction_input = rockchip_gpio_direction_input,
.direction_output = rockchip_gpio_direction_output,
.to_irq = rockchip_gpio_to_irq,
-- 
1.9.1



Re: [LKP] [lkp] [namei] fda89e6574: kernel BUG at fs/namei.c:679!

2016-03-13 Thread Huang, Ying
Al Viro  writes:

> On Mon, Mar 14, 2016 at 08:48:26AM +0800, kernel test robot wrote:
>> FYI, we noticed the below changes on
>> 
>> https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git work.lookups
>> commit fda89e65743179d09e55bc6c265d06fa5efa8803 ("namei: untanlge 
>> lookup_fast()")
>
> Unfortunately, while with my normal .config it reliably triggers an oops
> in x86_pmu_enable() (with or without those patches), yours triggers nothing
> but a pile of OOMs.  How much RAM do you give those suckers?  I'm _not_
> testing those on bare hardware, obviously - it's KVM image.

qemu-system-x86_64 -enable-kvm -cpu qemu64,+ssse3 -kernel 
/pkg/linux/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/vmlinuz-4.5.0-rc4-00017-gfda89e6
 -append 'root=/dev/ram0 user=lkp 
job=/lkp/scheduled/vm-kbuild-4G-3/bisect_trinity-300s-debian-x86_64-2015-02-07.cgz-x86_64-rhel-fda89e65743179d09e55bc6c265d06fa5efa8803-20160312-104797-ow7uw0-0.yaml
 ARCH=x86_64 kconfig=x86_64-rhel branch=linux-devel/devel-catchup-201603120257 
commit=fda89e65743179d09e55bc6c265d06fa5efa8803 
BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/vmlinuz-4.5.0-rc4-00017-gfda89e6
 max_uptime=1500 
RESULT_ROOT=/result/trinity/300s/vm-kbuild-4G/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/0
 LKP_SERVER=inn earlyprintk=ttyS0,115200 systemd.log_level=err debug apic=debug 
sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 panic=-1 
softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 
prompt_ramdisk=0 console=ttyS0,115200 console=tty0 vga=normal rw 
ip=vm-kbuild-4G-3::dhcp'  -initrd /fs/sdg1/initrd-vm-kbuild-4G-3 -m 4096 
-smp 4 -device e1000,netdev=net0 -netdev user,id=net0,hostfwd=tcp::23034-:22 
-boot order=nc -no-reboot -watchdog i6300esb -rtc base=localtime -drive 
file=/fs/sdg1/disk0-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk1-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk2-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk3-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk4-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk5-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk6-vm-kbuild-4G-3,media=disk,if=virtio -pidfile 
/dev/shm/kboot/pid-vm-kbuild-4G-3 -serial 
file:/dev/shm/kboot/serial-vm-kbuild-4G-3 -daemonize -display none -monitor 
null 

This is the qemu command line we used for testing.

Best Regards,
Huang, Ying

> FWIW, see below for hopefully cleaner fix (will fold once I manage to trigger
> the damn thing and verify that fix indeed fixes).  It's on top of offending
> commit.  Folks, could you please check if it fixes that crap on your setup?
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 7a5f79f..d721821 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1519,6 +1519,7 @@ static int lookup_fast(struct nameidata *nd,
>   struct vfsmount *mnt = nd->path.mnt;
>   struct dentry *dentry, *parent = nd->path.dentry;
>   int err;
> + int status = 1;
>  
>   /*
>* Rename seqlock is not required here because in the off chance
> @@ -1555,54 +1556,45 @@ static int lookup_fast(struct nameidata *nd,
>   return -ECHILD;
>  
>   *seqp = seq;
> - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
> - int status = d_revalidate(dentry, nd->flags);
> - if (unlikely(status <= 0)) {
> - if (unlazy_walk(nd, dentry, seq))
> - return -ECHILD;
> - if (status == -ECHILD)
> - status = d_revalidate(dentry, 
> nd->flags);
> - if (status <= 0) {
> - if (!status) {
> - d_invalidate(dentry);
> - status = 1;
> - }
> - dput(dentry);
> - return status;
> - }
> - }
> + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
> + status = d_revalidate(dentry, nd->flags);
> + if (unlikely(status <= 0)) {
> + if (unlazy_walk(nd, dentry, seq))
> + return -ECHILD;
> + if (status == -ECHILD)
> + status = d_revalidate(dentry, nd->flags);
> + } else {
> + /*
> +  * Note: do negative dentry check after revalidation in
> +  * case that drops it.
> +  */
> + if (unlikely(negative))
> + return -ENOENT;
> + path->mnt = mnt;
> + 

Re: [LKP] [lkp] [namei] fda89e6574: kernel BUG at fs/namei.c:679!

2016-03-13 Thread Huang, Ying
Al Viro  writes:

> On Mon, Mar 14, 2016 at 08:48:26AM +0800, kernel test robot wrote:
>> FYI, we noticed the below changes on
>> 
>> https://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git work.lookups
>> commit fda89e65743179d09e55bc6c265d06fa5efa8803 ("namei: untanlge 
>> lookup_fast()")
>
> Unfortunately, while with my normal .config it reliably triggers an oops
> in x86_pmu_enable() (with or without those patches), yours triggers nothing
> but a pile of OOMs.  How much RAM do you give those suckers?  I'm _not_
> testing those on bare hardware, obviously - it's KVM image.

qemu-system-x86_64 -enable-kvm -cpu qemu64,+ssse3 -kernel 
/pkg/linux/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/vmlinuz-4.5.0-rc4-00017-gfda89e6
 -append 'root=/dev/ram0 user=lkp 
job=/lkp/scheduled/vm-kbuild-4G-3/bisect_trinity-300s-debian-x86_64-2015-02-07.cgz-x86_64-rhel-fda89e65743179d09e55bc6c265d06fa5efa8803-20160312-104797-ow7uw0-0.yaml
 ARCH=x86_64 kconfig=x86_64-rhel branch=linux-devel/devel-catchup-201603120257 
commit=fda89e65743179d09e55bc6c265d06fa5efa8803 
BOOT_IMAGE=/pkg/linux/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/vmlinuz-4.5.0-rc4-00017-gfda89e6
 max_uptime=1500 
RESULT_ROOT=/result/trinity/300s/vm-kbuild-4G/debian-x86_64-2015-02-07.cgz/x86_64-rhel/gcc-4.9/fda89e65743179d09e55bc6c265d06fa5efa8803/0
 LKP_SERVER=inn earlyprintk=ttyS0,115200 systemd.log_level=err debug apic=debug 
sysrq_always_enabled rcupdate.rcu_cpu_stall_timeout=100 panic=-1 
softlockup_panic=1 nmi_watchdog=panic oops=panic load_ramdisk=2 
prompt_ramdisk=0 console=ttyS0,115200 console=tty0 vga=normal rw 
ip=vm-kbuild-4G-3::dhcp'  -initrd /fs/sdg1/initrd-vm-kbuild-4G-3 -m 4096 
-smp 4 -device e1000,netdev=net0 -netdev user,id=net0,hostfwd=tcp::23034-:22 
-boot order=nc -no-reboot -watchdog i6300esb -rtc base=localtime -drive 
file=/fs/sdg1/disk0-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk1-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk2-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk3-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk4-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk5-vm-kbuild-4G-3,media=disk,if=virtio -drive 
file=/fs/sdg1/disk6-vm-kbuild-4G-3,media=disk,if=virtio -pidfile 
/dev/shm/kboot/pid-vm-kbuild-4G-3 -serial 
file:/dev/shm/kboot/serial-vm-kbuild-4G-3 -daemonize -display none -monitor 
null 

This is the qemu command line we used for testing.

Best Regards,
Huang, Ying

> FWIW, see below for hopefully cleaner fix (will fold once I manage to trigger
> the damn thing and verify that fix indeed fixes).  It's on top of offending
> commit.  Folks, could you please check if it fixes that crap on your setup?
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 7a5f79f..d721821 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1519,6 +1519,7 @@ static int lookup_fast(struct nameidata *nd,
>   struct vfsmount *mnt = nd->path.mnt;
>   struct dentry *dentry, *parent = nd->path.dentry;
>   int err;
> + int status = 1;
>  
>   /*
>* Rename seqlock is not required here because in the off chance
> @@ -1555,54 +1556,45 @@ static int lookup_fast(struct nameidata *nd,
>   return -ECHILD;
>  
>   *seqp = seq;
> - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
> - int status = d_revalidate(dentry, nd->flags);
> - if (unlikely(status <= 0)) {
> - if (unlazy_walk(nd, dentry, seq))
> - return -ECHILD;
> - if (status == -ECHILD)
> - status = d_revalidate(dentry, 
> nd->flags);
> - if (status <= 0) {
> - if (!status) {
> - d_invalidate(dentry);
> - status = 1;
> - }
> - dput(dentry);
> - return status;
> - }
> - }
> + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
> + status = d_revalidate(dentry, nd->flags);
> + if (unlikely(status <= 0)) {
> + if (unlazy_walk(nd, dentry, seq))
> + return -ECHILD;
> + if (status == -ECHILD)
> + status = d_revalidate(dentry, nd->flags);
> + } else {
> + /*
> +  * Note: do negative dentry check after revalidation in
> +  * case that drops it.
> +  */
> + if (unlikely(negative))
> + return -ENOENT;
> + path->mnt = mnt;
> + 

[PATCH 2/8] sched/fair: add margin to utilization update

2016-03-13 Thread Michael Turquette
Utilization contributions to cfs_rq->avg.util_avg are scaled for both
microarchitecture-invariance as well as frequency-invariance. This means
that any given utilization contribution will be scaled against the
current cpu capacity (cpu frequency). Contributions from long running
tasks, whose utilization grows larger over time, will asymptotically
approach the current capacity.

This causes a problem when using this utilization signal to select a
target cpu capacity (cpu frequency), as our signal will never exceed the
current capacity, which would otherwise be our signal to increase
frequency.

Solve this by introducing a default capacity margin that is added to the
utilization signal when requesting a change to capacity (cpu frequency).
The margin is 1280, or 1.25 x SCHED_CAPACITY_SCALE (1024). This is
equivalent to similar margins such as the default 125 value assigned to
struct sched_domain.imbalance_pct for load balancing, and to the 80%
up_threshold used by the legacy cpufreq ondemand governor.

Signed-off-by: Michael Turquette 
---
 kernel/sched/fair.c  | 18 --
 kernel/sched/sched.h |  3 +++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a32f281..29e8bae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -100,6 +100,19 @@ const_debug unsigned int sysctl_sched_migration_cost = 
50UL;
  */
 unsigned int __read_mostly sysctl_sched_shares_window = 1000UL;
 
+/*
+ * Add a 25% margin globally to all capacity requests from cfs. This is
+ * equivalent to an 80% up_threshold in legacy governors like ondemand.
+ *
+ * This is required as task utilization increases. The frequency-invariant
+ * utilization will asymptotically approach the current capacity of the cpu and
+ * the additional margin will cross the threshold into the next capacity state.
+ *
+ * XXX someday expand to separate, per-call site margins? e.g. enqueue, fork,
+ * task_tick, load_balance, etc
+ */
+unsigned long cfs_capacity_margin = CAPACITY_MARGIN_DEFAULT;
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -2840,6 +2853,8 @@ static inline void update_load_avg(struct sched_entity 
*se, int update_tg)
 
if (cpu == smp_processor_id() && >cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
+   unsigned long cap = cfs_rq->avg.util_avg *
+   cfs_capacity_margin / max;
 
/*
 * There are a few boundary cases this might miss but it should
@@ -2852,8 +2867,7 @@ static inline void update_load_avg(struct sched_entity 
*se, int update_tg)
 * thread is a different class (!fair), nor will the utilization
 * number include things like RT tasks.
 */
-   cpufreq_update_util(rq_clock(rq),
-   min(cfs_rq->avg.util_avg, max), max);
+   cpufreq_update_util(rq_clock(rq), min(cap, max), max);
}
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f06dfca..8c93ed2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -27,6 +27,9 @@ extern __read_mostly int scheduler_running;
 extern unsigned long calc_load_update;
 extern atomic_long_t calc_load_tasks;
 
+#define CAPACITY_MARGIN_DEFAULT 1280;
+extern unsigned long cfs_capacity_margin;
+
 extern void calc_global_load_tick(struct rq *this_rq);
 extern long calc_load_fold_active(struct rq *this_rq);
 
-- 
2.1.4



[PATCH 0/8] schedutil enhancements

2016-03-13 Thread Michael Turquette
I'm happy that scheduler-driven cpu frequency selection is getting some
attention. Rafael's recent schedutil governor is a step in the right direction.
This series builds on top of Rafael's schedutil governor, bringing it to parity
with some of the features in the schedfreq series posted by Steve[0], as well
as adding a couple of new things.

Patch 1 removes cpufreq_trigger_update()

Patches 2-4 move the cfs capacity margin out of the governor and into
cfs. This value is made tunable by a sysfs control in schedutil.

Patches 5-6 make cpufreq_update_util() aware of multiple scheduler
classes (cfs, rt & dl), and add storage & summation of these per-class
utilization values into schedutil.

Patches 7-8 introduces Dietmar's generic cpufreq implementation[1] of the
frequency invariance hook and changes the preprocessor magic in sched.h to
favor the cpufreq implementation over arch- or platform-specific ones.

If accepted, this series makes it trivial to port Steve and Juri's fine-grained
frequency selection in cfs and Vincent's rt utilization patch to the schedutil
governor.[2-6]

[0] lkml.kernel.org/r/1456190570-4475-1-git-send-email-smuc...@linaro.org
[1] 
https://git.linaro.org/people/steve.muckle/kernel.git/commit/1b7e57f89f14f7600e75e6fde42bf22d72927b3d
[2] lkml.kernel.org/r/1456190570-4475-5-git-send-email-smuc...@linaro.org
[3] lkml.kernel.org/r/1456190570-4475-6-git-send-email-smuc...@linaro.org
[4] lkml.kernel.org/r/1456190570-4475-7-git-send-email-smuc...@linaro.org
[5] lkml.kernel.org/r/1456190570-4475-8-git-send-email-smuc...@linaro.org
[6] lkml.kernel.org/r/1456190570-4475-11-git-send-email-smuc...@linaro.org

Dietmar Eggemann (1):
  cpufreq: Frequency invariant scheduler load-tracking support

Michael Turquette (7):
  sched/cpufreq: remove cpufreq_trigger_update()
  sched/fair: add margin to utilization update
  sched/cpufreq: new cfs capacity margin helpers
  cpufreq/schedutil: sysfs capacity margin tunable
  sched/cpufreq: pass sched class into cpufreq_update_util
  cpufreq/schedutil: sum per-sched class utilization
  sched: prefer cpufreq_scale_freq_capacity

 drivers/cpufreq/cpufreq.c   | 29 
 drivers/cpufreq/cpufreq_governor.c  |  5 +-
 drivers/cpufreq/cpufreq_schedutil.c | 70 
 drivers/cpufreq/intel_pstate.c  |  5 +-
 include/linux/cpufreq.h |  3 ++
 include/linux/sched.h   | 19 ++--
 kernel/sched/cpufreq.c  | 92 +
 kernel/sched/deadline.c |  2 +-
 kernel/sched/fair.c | 18 +++-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h| 29 +---
 11 files changed, 219 insertions(+), 55 deletions(-)

-- 
2.1.4



[PATCH 4/8] cpufreq/schedutil: sysfs capacity margin tunable

2016-03-13 Thread Michael Turquette
With the addition of the global cfs capacity margin helpers in patch,
"sched/cpufreq: new cfs capacity margin helpers", we can now export
sysfs tunables from the schedutil governor. This allows privileged users
to tune the value more easily.

The margin value is global to cfs, not per-policy. As such schedutil
does not store any state about the margin. Schedutil restores the margin
value to its default value when exiting.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_schedutil.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 5aa26bf..12e49b9 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -246,8 +246,32 @@ static ssize_t rate_limit_us_store(struct gov_attr_set 
*attr_set, const char *bu
 
 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
 
+static ssize_t capacity_margin_show(struct gov_attr_set *not_used,
+  char *buf)
+{
+   return sprintf(buf, "%lu\n", cpufreq_get_cfs_capacity_margin());
+}
+
+static ssize_t capacity_margin_store(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
+{
+   unsigned long margin;
+   int ret;
+
+   ret = sscanf(buf, "%lu", );
+   if (ret != 1)
+   return -EINVAL;
+
+   cpufreq_set_cfs_capacity_margin(margin);
+
+   return count;
+}
+
+static struct governor_attr capacity_margin = __ATTR_RW(capacity_margin);
+
 static struct attribute *sugov_attributes[] = {
_limit_us.attr,
+   _margin.attr,
NULL
 };
 
@@ -381,6 +405,7 @@ static int sugov_exit(struct cpufreq_policy *policy)
 
mutex_lock(_tunables_lock);
 
+   cpufreq_reset_cfs_capacity_margin();
count = gov_attr_set_put(>attr_set, 
_policy->tunables_hook);
policy->governor_data = NULL;
if (!count)
-- 
2.1.4



[PATCH 2/8] sched/fair: add margin to utilization update

2016-03-13 Thread Michael Turquette
Utilization contributions to cfs_rq->avg.util_avg are scaled for both
microarchitecture-invariance as well as frequency-invariance. This means
that any given utilization contribution will be scaled against the
current cpu capacity (cpu frequency). Contributions from long running
tasks, whose utilization grows larger over time, will asymptotically
approach the current capacity.

This causes a problem when using this utilization signal to select a
target cpu capacity (cpu frequency), as our signal will never exceed the
current capacity, which would otherwise be our signal to increase
frequency.

Solve this by introducing a default capacity margin that is added to the
utilization signal when requesting a change to capacity (cpu frequency).
The margin is 1280, or 1.25 x SCHED_CAPACITY_SCALE (1024). This is
equivalent to similar margins such as the default 125 value assigned to
struct sched_domain.imbalance_pct for load balancing, and to the 80%
up_threshold used by the legacy cpufreq ondemand governor.

Signed-off-by: Michael Turquette 
---
 kernel/sched/fair.c  | 18 --
 kernel/sched/sched.h |  3 +++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a32f281..29e8bae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -100,6 +100,19 @@ const_debug unsigned int sysctl_sched_migration_cost = 
50UL;
  */
 unsigned int __read_mostly sysctl_sched_shares_window = 1000UL;
 
+/*
+ * Add a 25% margin globally to all capacity requests from cfs. This is
+ * equivalent to an 80% up_threshold in legacy governors like ondemand.
+ *
+ * This is required as task utilization increases. The frequency-invariant
+ * utilization will asymptotically approach the current capacity of the cpu and
+ * the additional margin will cross the threshold into the next capacity state.
+ *
+ * XXX someday expand to separate, per-call site margins? e.g. enqueue, fork,
+ * task_tick, load_balance, etc
+ */
+unsigned long cfs_capacity_margin = CAPACITY_MARGIN_DEFAULT;
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -2840,6 +2853,8 @@ static inline void update_load_avg(struct sched_entity 
*se, int update_tg)
 
if (cpu == smp_processor_id() && >cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
+   unsigned long cap = cfs_rq->avg.util_avg *
+   cfs_capacity_margin / max;
 
/*
 * There are a few boundary cases this might miss but it should
@@ -2852,8 +2867,7 @@ static inline void update_load_avg(struct sched_entity 
*se, int update_tg)
 * thread is a different class (!fair), nor will the utilization
 * number include things like RT tasks.
 */
-   cpufreq_update_util(rq_clock(rq),
-   min(cfs_rq->avg.util_avg, max), max);
+   cpufreq_update_util(rq_clock(rq), min(cap, max), max);
}
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f06dfca..8c93ed2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -27,6 +27,9 @@ extern __read_mostly int scheduler_running;
 extern unsigned long calc_load_update;
 extern atomic_long_t calc_load_tasks;
 
+#define CAPACITY_MARGIN_DEFAULT 1280;
+extern unsigned long cfs_capacity_margin;
+
 extern void calc_global_load_tick(struct rq *this_rq);
 extern long calc_load_fold_active(struct rq *this_rq);
 
-- 
2.1.4



[PATCH 0/8] schedutil enhancements

2016-03-13 Thread Michael Turquette
I'm happy that scheduler-driven cpu frequency selection is getting some
attention. Rafael's recent schedutil governor is a step in the right direction.
This series builds on top of Rafael's schedutil governor, bringing it to parity
with some of the features in the schedfreq series posted by Steve[0], as well
as adding a couple of new things.

Patch 1 removes cpufreq_trigger_update()

Patches 2-4 move the cfs capacity margin out of the governor and into
cfs. This value is made tunable by a sysfs control in schedutil.

Patches 5-6 make cpufreq_update_util() aware of multiple scheduler
classes (cfs, rt & dl), and add storage & summation of these per-class
utilization values into schedutil.

Patches 7-8 introduces Dietmar's generic cpufreq implementation[1] of the
frequency invariance hook and changes the preprocessor magic in sched.h to
favor the cpufreq implementation over arch- or platform-specific ones.

If accepted, this series makes it trivial to port Steve and Juri's fine-grained
frequency selection in cfs and Vincent's rt utilization patch to the schedutil
governor.[2-6]

[0] lkml.kernel.org/r/1456190570-4475-1-git-send-email-smuc...@linaro.org
[1] 
https://git.linaro.org/people/steve.muckle/kernel.git/commit/1b7e57f89f14f7600e75e6fde42bf22d72927b3d
[2] lkml.kernel.org/r/1456190570-4475-5-git-send-email-smuc...@linaro.org
[3] lkml.kernel.org/r/1456190570-4475-6-git-send-email-smuc...@linaro.org
[4] lkml.kernel.org/r/1456190570-4475-7-git-send-email-smuc...@linaro.org
[5] lkml.kernel.org/r/1456190570-4475-8-git-send-email-smuc...@linaro.org
[6] lkml.kernel.org/r/1456190570-4475-11-git-send-email-smuc...@linaro.org

Dietmar Eggemann (1):
  cpufreq: Frequency invariant scheduler load-tracking support

Michael Turquette (7):
  sched/cpufreq: remove cpufreq_trigger_update()
  sched/fair: add margin to utilization update
  sched/cpufreq: new cfs capacity margin helpers
  cpufreq/schedutil: sysfs capacity margin tunable
  sched/cpufreq: pass sched class into cpufreq_update_util
  cpufreq/schedutil: sum per-sched class utilization
  sched: prefer cpufreq_scale_freq_capacity

 drivers/cpufreq/cpufreq.c   | 29 
 drivers/cpufreq/cpufreq_governor.c  |  5 +-
 drivers/cpufreq/cpufreq_schedutil.c | 70 
 drivers/cpufreq/intel_pstate.c  |  5 +-
 include/linux/cpufreq.h |  3 ++
 include/linux/sched.h   | 19 ++--
 kernel/sched/cpufreq.c  | 92 +
 kernel/sched/deadline.c |  2 +-
 kernel/sched/fair.c | 18 +++-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h| 29 +---
 11 files changed, 219 insertions(+), 55 deletions(-)

-- 
2.1.4



[PATCH 4/8] cpufreq/schedutil: sysfs capacity margin tunable

2016-03-13 Thread Michael Turquette
With the addition of the global cfs capacity margin helpers in patch,
"sched/cpufreq: new cfs capacity margin helpers", we can now export
sysfs tunables from the schedutil governor. This allows privileged users
to tune the value more easily.

The margin value is global to cfs, not per-policy. As such schedutil
does not store any state about the margin. Schedutil restores the margin
value to its default value when exiting.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_schedutil.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 5aa26bf..12e49b9 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -246,8 +246,32 @@ static ssize_t rate_limit_us_store(struct gov_attr_set 
*attr_set, const char *bu
 
 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
 
+static ssize_t capacity_margin_show(struct gov_attr_set *not_used,
+  char *buf)
+{
+   return sprintf(buf, "%lu\n", cpufreq_get_cfs_capacity_margin());
+}
+
+static ssize_t capacity_margin_store(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
+{
+   unsigned long margin;
+   int ret;
+
+   ret = sscanf(buf, "%lu", );
+   if (ret != 1)
+   return -EINVAL;
+
+   cpufreq_set_cfs_capacity_margin(margin);
+
+   return count;
+}
+
+static struct governor_attr capacity_margin = __ATTR_RW(capacity_margin);
+
 static struct attribute *sugov_attributes[] = {
_limit_us.attr,
+   _margin.attr,
NULL
 };
 
@@ -381,6 +405,7 @@ static int sugov_exit(struct cpufreq_policy *policy)
 
mutex_lock(_tunables_lock);
 
+   cpufreq_reset_cfs_capacity_margin();
count = gov_attr_set_put(>attr_set, 
_policy->tunables_hook);
policy->governor_data = NULL;
if (!count)
-- 
2.1.4



Re: [PART1 RFC v2 06/10] svm: Add interrupt injection via AVIC

2016-03-13 Thread Suravee Suthikulpanit



On 03/07/2016 10:36 PM, Paolo Bonzini wrote:



On 04/03/2016 21:46, Suravee Suthikulpanit wrote:

+static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   kvm_lapic_set_vector(vec, avic_get_bk_page_entry(svm, APIC_IRR));
+
+   if (vcpu->mode == IN_GUEST_MODE) {
+   wrmsrl(SVM_AVIC_DOORBELL,
+  __default_cpu_present_to_apicid(vcpu->cpu));
+   } else {
+   kvm_vcpu_kick(vcpu);
+   }


You also need to add

kvm_make_request(KVM_REQ_EVENT, vcpu);

before the "if", similar to vmx_deliver_posted_interrupt.

Paolo



Actually, I should only need that just before the kvm_cpu_kick(vcpu) 
isn't it. I don't think we need it in the case when sending doorbell.


Thanks,
Suravee


Re: [PART1 RFC v2 06/10] svm: Add interrupt injection via AVIC

2016-03-13 Thread Suravee Suthikulpanit



On 03/07/2016 10:36 PM, Paolo Bonzini wrote:



On 04/03/2016 21:46, Suravee Suthikulpanit wrote:

+static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   kvm_lapic_set_vector(vec, avic_get_bk_page_entry(svm, APIC_IRR));
+
+   if (vcpu->mode == IN_GUEST_MODE) {
+   wrmsrl(SVM_AVIC_DOORBELL,
+  __default_cpu_present_to_apicid(vcpu->cpu));
+   } else {
+   kvm_vcpu_kick(vcpu);
+   }


You also need to add

kvm_make_request(KVM_REQ_EVENT, vcpu);

before the "if", similar to vmx_deliver_posted_interrupt.

Paolo



Actually, I should only need that just before the kvm_cpu_kick(vcpu) 
isn't it. I don't think we need it in the case when sending doorbell.


Thanks,
Suravee


[PATCH 7/8] cpufreq: Frequency invariant scheduler load-tracking support

2016-03-13 Thread Michael Turquette
From: Dietmar Eggemann 

Implements cpufreq_scale_freq_capacity() to provide the scheduler with a
frequency scaling correction factor for more accurate load-tracking.

The factor is:

current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu)

In fact, freq_scale should be a struct cpufreq_policy data member. But
this would require that the scheduler hot path (__update_load_avg()) would
have to grab the cpufreq lock. This can be avoided by using per-cpu data
initialized to SCHED_CAPACITY_SCALE for freq_scale.

Signed-off-by: Dietmar Eggemann 
Signed-off-by: Michael Turquette 
---
I'm not as sure about patches 7 & 8, but I included them since I needed
frequency invariance while testing.

As mentioned by myself in 2014 and Rafael last month, the
arch_scale_freq_capacity hook is awkward, because this behavior may vary
within an architecture.

I re-introduce Dietmar's generic cpufreq implementation of the frequency
invariance hook in this patch,  and change the preprocessor magic in
sched.h to favor the cpufreq implementation over arch- or
platform-specific ones in the next patch.

If run-time selection of ops is needed them someone will need to write
that code.

I think that this negates the need for the arm arch hooks[0-2], and
hopefully Morten and Dietmar can weigh in on this.

[0] lkml.kernel.org/r/1436293469-25707-2-git-send-email-morten.rasmus...@arm.com
[1] lkml.kernel.org/r/1436293469-25707-6-git-send-email-morten.rasmus...@arm.com
[2] lkml.kernel.org/r/1436293469-25707-8-git-send-email-morten.rasmus...@arm.com

 drivers/cpufreq/cpufreq.c | 29 +
 include/linux/cpufreq.h   |  3 +++
 2 files changed, 32 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b1ca9c4..e67584f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -306,6 +306,31 @@ static void adjust_jiffies(unsigned long val, struct 
cpufreq_freqs *ci)
 #endif
 }
 
+/*
+ *   FREQUENCY INVARIANT CPU CAPACITY*
+ */
+
+static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void
+scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+{
+   unsigned long cur = freqs ? freqs->new : policy->cur;
+   unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
+   int cpu;
+
+   pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
+cpumask_pr_args(policy->cpus), cur, policy->max, scale);
+
+   for_each_cpu(cpu, policy->cpus)
+   per_cpu(freq_scale, cpu) = scale;
+}
+
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+   return per_cpu(freq_scale, cpu);
+}
+
 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, unsigned int state)
 {
@@ -409,6 +434,8 @@ wait:
 
spin_unlock(>transition_lock);
 
+   scale_freq_capacity(policy, freqs);
+
cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
@@ -2125,6 +2152,8 @@ static int cpufreq_set_policy(struct cpufreq_policy 
*policy,
blocking_notifier_call_chain(_policy_notifier_list,
CPUFREQ_NOTIFY, new_policy);
 
+   scale_freq_capacity(new_policy, NULL);
+
policy->min = new_policy->min;
policy->max = new_policy->max;
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 0e39499..72833be 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -583,4 +583,7 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+struct sched_domain;
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
 #endif /* _LINUX_CPUFREQ_H */
-- 
2.1.4



[PATCH 3/8] sched/cpufreq: new cfs capacity margin helpers

2016-03-13 Thread Michael Turquette
Introduce helper functions that allow cpufreq governors to change the
value of the capacity margin applied to the cfs_rq->avg.util_avg signal.
This allows for run-time tuning of the margin.

A follow-up patch will update the schedutil governor to use these
helpers.

Signed-off-by: Michael Turquette 
---
 include/linux/sched.h  |  3 +++
 kernel/sched/cpufreq.c | 53 ++
 2 files changed, 56 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1fa9b52..f18a99b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2372,6 +2372,9 @@ void cpufreq_set_freq_update_hook(int cpu, struct 
freq_update_hook *hook,
void (*func)(struct freq_update_hook *hook, u64 time,
 unsigned long util, unsigned long max));
 void cpufreq_clear_freq_update_hook(int cpu);
+unsigned long cpufreq_get_cfs_capacity_margin(void);
+void cpufreq_set_cfs_capacity_margin(unsigned long margin);
+void cpufreq_reset_cfs_capacity_margin(void);
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index bd012c2..a126b58 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -61,6 +61,59 @@ void cpufreq_clear_freq_update_hook(int cpu)
 EXPORT_SYMBOL_GPL(cpufreq_clear_freq_update_hook);
 
 /**
+ * cpufreq_get_cfs_capacity_margin - Get global cfs enqueue capacity margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * This function returns the current global cfs enqueue capacity margin
+ */
+unsigned long cpufreq_get_cfs_capacity_margin(void)
+{
+   return cfs_capacity_margin;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_cfs_capacity_margin);
+
+/**
+ * cpufreq_set_cfs_capacity_margin - Set global cfs enqueue capacity margin
+ * @margin: new capacity margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * For instance, to add a 25% margin to a utilization, margin should be 1280,
+ * which is 1.25x 1024, the default for SCHED_CAPACITY_SCALE.
+ */
+void cpufreq_set_cfs_capacity_margin(unsigned long margin)
+{
+   cfs_capacity_margin = margin;
+}
+EXPORT_SYMBOL_GPL(cpufreq_set_cfs_capacity_margin);
+
+/**
+ * cpufreq_reset_cfs_capacity_margin - Reset global cfs enqueue cap margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * This function resets the global margin to its default value.
+ */
+void cpufreq_reset_cfs_capacity_margin(void)
+{
+   cfs_capacity_margin = CAPACITY_MARGIN_DEFAULT;
+}
+EXPORT_SYMBOL_GPL(cpufreq_reset_cfs_capacity_margin);
+
+/**
  * cpufreq_update_util - Take a note about CPU utilization changes.
  * @time: Current time.
  * @util: CPU utilization.
-- 
2.1.4



[PATCH 7/8] cpufreq: Frequency invariant scheduler load-tracking support

2016-03-13 Thread Michael Turquette
From: Dietmar Eggemann 

Implements cpufreq_scale_freq_capacity() to provide the scheduler with a
frequency scaling correction factor for more accurate load-tracking.

The factor is:

current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_freq(cpu)

In fact, freq_scale should be a struct cpufreq_policy data member. But
this would require that the scheduler hot path (__update_load_avg()) would
have to grab the cpufreq lock. This can be avoided by using per-cpu data
initialized to SCHED_CAPACITY_SCALE for freq_scale.

Signed-off-by: Dietmar Eggemann 
Signed-off-by: Michael Turquette 
---
I'm not as sure about patches 7 & 8, but I included them since I needed
frequency invariance while testing.

As mentioned by myself in 2014 and Rafael last month, the
arch_scale_freq_capacity hook is awkward, because this behavior may vary
within an architecture.

I re-introduce Dietmar's generic cpufreq implementation of the frequency
invariance hook in this patch,  and change the preprocessor magic in
sched.h to favor the cpufreq implementation over arch- or
platform-specific ones in the next patch.

If run-time selection of ops is needed them someone will need to write
that code.

I think that this negates the need for the arm arch hooks[0-2], and
hopefully Morten and Dietmar can weigh in on this.

[0] lkml.kernel.org/r/1436293469-25707-2-git-send-email-morten.rasmus...@arm.com
[1] lkml.kernel.org/r/1436293469-25707-6-git-send-email-morten.rasmus...@arm.com
[2] lkml.kernel.org/r/1436293469-25707-8-git-send-email-morten.rasmus...@arm.com

 drivers/cpufreq/cpufreq.c | 29 +
 include/linux/cpufreq.h   |  3 +++
 2 files changed, 32 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b1ca9c4..e67584f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -306,6 +306,31 @@ static void adjust_jiffies(unsigned long val, struct 
cpufreq_freqs *ci)
 #endif
 }
 
+/*
+ *   FREQUENCY INVARIANT CPU CAPACITY*
+ */
+
+static DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+
+static void
+scale_freq_capacity(struct cpufreq_policy *policy, struct cpufreq_freqs *freqs)
+{
+   unsigned long cur = freqs ? freqs->new : policy->cur;
+   unsigned long scale = (cur << SCHED_CAPACITY_SHIFT) / policy->max;
+   int cpu;
+
+   pr_debug("cpus %*pbl cur/cur max freq %lu/%u kHz freq scale %lu\n",
+cpumask_pr_args(policy->cpus), cur, policy->max, scale);
+
+   for_each_cpu(cpu, policy->cpus)
+   per_cpu(freq_scale, cpu) = scale;
+}
+
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+   return per_cpu(freq_scale, cpu);
+}
+
 static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, unsigned int state)
 {
@@ -409,6 +434,8 @@ wait:
 
spin_unlock(>transition_lock);
 
+   scale_freq_capacity(policy, freqs);
+
cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
@@ -2125,6 +2152,8 @@ static int cpufreq_set_policy(struct cpufreq_policy 
*policy,
blocking_notifier_call_chain(_policy_notifier_list,
CPUFREQ_NOTIFY, new_policy);
 
+   scale_freq_capacity(new_policy, NULL);
+
policy->min = new_policy->min;
policy->max = new_policy->max;
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 0e39499..72833be 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -583,4 +583,7 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table,
unsigned int transition_latency);
+
+struct sched_domain;
+unsigned long cpufreq_scale_freq_capacity(struct sched_domain *sd, int cpu);
 #endif /* _LINUX_CPUFREQ_H */
-- 
2.1.4



[PATCH 3/8] sched/cpufreq: new cfs capacity margin helpers

2016-03-13 Thread Michael Turquette
Introduce helper functions that allow cpufreq governors to change the
value of the capacity margin applied to the cfs_rq->avg.util_avg signal.
This allows for run-time tuning of the margin.

A follow-up patch will update the schedutil governor to use these
helpers.

Signed-off-by: Michael Turquette 
---
 include/linux/sched.h  |  3 +++
 kernel/sched/cpufreq.c | 53 ++
 2 files changed, 56 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1fa9b52..f18a99b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2372,6 +2372,9 @@ void cpufreq_set_freq_update_hook(int cpu, struct 
freq_update_hook *hook,
void (*func)(struct freq_update_hook *hook, u64 time,
 unsigned long util, unsigned long max));
 void cpufreq_clear_freq_update_hook(int cpu);
+unsigned long cpufreq_get_cfs_capacity_margin(void);
+void cpufreq_set_cfs_capacity_margin(unsigned long margin);
+void cpufreq_reset_cfs_capacity_margin(void);
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index bd012c2..a126b58 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -61,6 +61,59 @@ void cpufreq_clear_freq_update_hook(int cpu)
 EXPORT_SYMBOL_GPL(cpufreq_clear_freq_update_hook);
 
 /**
+ * cpufreq_get_cfs_capacity_margin - Get global cfs enqueue capacity margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * This function returns the current global cfs enqueue capacity margin
+ */
+unsigned long cpufreq_get_cfs_capacity_margin(void)
+{
+   return cfs_capacity_margin;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_cfs_capacity_margin);
+
+/**
+ * cpufreq_set_cfs_capacity_margin - Set global cfs enqueue capacity margin
+ * @margin: new capacity margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * For instance, to add a 25% margin to a utilization, margin should be 1280,
+ * which is 1.25x 1024, the default for SCHED_CAPACITY_SCALE.
+ */
+void cpufreq_set_cfs_capacity_margin(unsigned long margin)
+{
+   cfs_capacity_margin = margin;
+}
+EXPORT_SYMBOL_GPL(cpufreq_set_cfs_capacity_margin);
+
+/**
+ * cpufreq_reset_cfs_capacity_margin - Reset global cfs enqueue cap margin
+ *
+ * margin is a percentage of capacity that is applied to the current
+ * utilization when selecting a new capacity state or cpu frequency. The value
+ * should be normalized to the range of [0..SCHED_CAPACITY_SCALE], where
+ * SCHED_CAPACITY_SCALE is 100% of the normalized capacity, or equivalent to
+ * multiplying the utilization by one.
+ *
+ * This function resets the global margin to its default value.
+ */
+void cpufreq_reset_cfs_capacity_margin(void)
+{
+   cfs_capacity_margin = CAPACITY_MARGIN_DEFAULT;
+}
+EXPORT_SYMBOL_GPL(cpufreq_reset_cfs_capacity_margin);
+
+/**
  * cpufreq_update_util - Take a note about CPU utilization changes.
  * @time: Current time.
  * @util: CPU utilization.
-- 
2.1.4



[PATCH 5/8] sched/cpufreq: pass sched class into cpufreq_update_util

2016-03-13 Thread Michael Turquette
cpufreq_update_util() accepts a single utilization value which  does not
account for multiple utilization contributions from the cfs, rt & dl
scheduler classes. Begin fixing this by adding a sched_class argument to
cpufreq_update_util(), all of its call sites and the governor-specific
hooks in intel_pstate.c, cpufreq_schedutil.c and cpufreq_governor.c.

A follow-on patch will add summation of the sched_class contributions to
the schedutil governor.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_governor.c  |  5 +++--
 drivers/cpufreq/cpufreq_schedutil.c |  6 --
 drivers/cpufreq/intel_pstate.c  |  5 +++--
 include/linux/sched.h   | 16 +---
 kernel/sched/cpufreq.c  | 11 +++
 kernel/sched/deadline.c |  2 +-
 kernel/sched/fair.c |  2 +-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h|  8 +---
 9 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c 
b/drivers/cpufreq/cpufreq_governor.c
index 148576c..4694751 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -248,8 +248,9 @@ static void dbs_irq_work(struct irq_work *irq_work)
schedule_work(_dbs->work);
 }
 
-static void dbs_freq_update_handler(struct freq_update_hook *hook, u64 time,
-   unsigned long util_not_used,
+static void dbs_freq_update_handler(struct freq_update_hook *hook,
+   enum sched_class_util sc_not_used,
+   u64 time, unsigned long util_not_used,
unsigned long max_not_used)
 {
struct cpu_dbs_info *cdbs = container_of(hook, struct cpu_dbs_info, 
update_hook);
diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 12e49b9..18d9ca3 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -106,7 +106,8 @@ static void sugov_update_commit(struct sugov_policy 
*sg_policy, u64 time,
trace_cpu_frequency(freq, smp_processor_id());
 }
 
-static void sugov_update_single(struct freq_update_hook *hook, u64 time,
+static void sugov_update_single(struct freq_update_hook *hook,
+   enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
 {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
@@ -166,7 +167,8 @@ static unsigned int sugov_next_freq(struct sugov_policy 
*sg_policy,
return  util * max_f / max;
 }
 
-static void sugov_update_shared(struct freq_update_hook *hook, u64 time,
+static void sugov_update_shared(struct freq_update_hook *hook,
+   enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
 {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 20e2bb2..86aa368 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1020,8 +1020,9 @@ static inline void intel_pstate_adjust_busy_pstate(struct 
cpudata *cpu)
sample->freq);
 }
 
-static void intel_pstate_freq_update(struct freq_update_hook *hook, u64 time,
-unsigned long util_not_used,
+static void intel_pstate_freq_update(struct freq_update_hook *hook,
+enum sched_class_util sc_not_used
+u64 time, unsigned long util_not_used,
 unsigned long max_not_used)
 {
struct cpudata *cpu = container_of(hook, struct cpudata, update_hook);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f18a99b..1c7d7bd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2362,15 +2362,25 @@ extern u64 scheduler_tick_max_deferment(void);
 static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
+enum sched_class_util {
+   cfs_util,
+   rt_util,
+   dl_util,
+   nr_util_types,
+};
+
 #ifdef CONFIG_CPU_FREQ
 struct freq_update_hook {
-   void (*func)(struct freq_update_hook *hook, u64 time,
+   void (*func)(struct freq_update_hook *hook,
+enum sched_class_util sched_class, u64 time,
 unsigned long util, unsigned long max);
 };
 
 void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook,
-   void (*func)(struct freq_update_hook *hook, u64 time,
-unsigned long util, unsigned long max));
+   void (*func)(struct freq_update_hook *hook,
+enum sched_class_util sched_class,
+u64 

Re: [PATCH V5 02/10] perf/amd/iommu: Consolidate and move perf_event_amd_iommu header

2016-03-13 Thread Suravee Suthikulpanit

Hi,

On 03/12/2016 08:22 PM, Peter Zijlstra wrote:

On Tue, Feb 23, 2016 at 08:12:36AM -0600, Suravee Suthikulpanit wrote:

From: Suravee Suthikulpanit 

First, this patch move arch/x86/events/amd/iommu.h to
arch/x86/include/asm/perf/amd/iommu.h so that we easily include
it in both perf-amd-iommu and amd-iommu drivers.

Then, we consolidate declaration of AMD IOMMU performance counter
APIs into one file.


These seem two independent thingies; should this therefore not be 2
patches?


Reviewed-by: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
  arch/x86/events/amd/iommu.c   |  2 +-
  arch/x86/events/amd/iommu.h   | 40 -
  arch/x86/include/asm/perf/amd/iommu.h | 42 +++


That seems somewhat excessive. Not only do you create
arch/x86/include/asm/perf/ you then put another directory on top of
that.



The original header files (arch/x86/events/amd/iommu.h and 
drivers/iommu/amd_iommu_proto.h) has duplicate function declarations. 
So, with the new header file being in the 
arch/x86/include/asm/perf/amd/iommu.h, we can just have one function 
declaration.


So, you just want to separate the file moving part and the part that 
removes of the duplication?


Thanks,
Suravee


[PATCH 5/8] sched/cpufreq: pass sched class into cpufreq_update_util

2016-03-13 Thread Michael Turquette
cpufreq_update_util() accepts a single utilization value which  does not
account for multiple utilization contributions from the cfs, rt & dl
scheduler classes. Begin fixing this by adding a sched_class argument to
cpufreq_update_util(), all of its call sites and the governor-specific
hooks in intel_pstate.c, cpufreq_schedutil.c and cpufreq_governor.c.

A follow-on patch will add summation of the sched_class contributions to
the schedutil governor.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_governor.c  |  5 +++--
 drivers/cpufreq/cpufreq_schedutil.c |  6 --
 drivers/cpufreq/intel_pstate.c  |  5 +++--
 include/linux/sched.h   | 16 +---
 kernel/sched/cpufreq.c  | 11 +++
 kernel/sched/deadline.c |  2 +-
 kernel/sched/fair.c |  2 +-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h|  8 +---
 9 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c 
b/drivers/cpufreq/cpufreq_governor.c
index 148576c..4694751 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -248,8 +248,9 @@ static void dbs_irq_work(struct irq_work *irq_work)
schedule_work(_dbs->work);
 }
 
-static void dbs_freq_update_handler(struct freq_update_hook *hook, u64 time,
-   unsigned long util_not_used,
+static void dbs_freq_update_handler(struct freq_update_hook *hook,
+   enum sched_class_util sc_not_used,
+   u64 time, unsigned long util_not_used,
unsigned long max_not_used)
 {
struct cpu_dbs_info *cdbs = container_of(hook, struct cpu_dbs_info, 
update_hook);
diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 12e49b9..18d9ca3 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -106,7 +106,8 @@ static void sugov_update_commit(struct sugov_policy 
*sg_policy, u64 time,
trace_cpu_frequency(freq, smp_processor_id());
 }
 
-static void sugov_update_single(struct freq_update_hook *hook, u64 time,
+static void sugov_update_single(struct freq_update_hook *hook,
+   enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
 {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
@@ -166,7 +167,8 @@ static unsigned int sugov_next_freq(struct sugov_policy 
*sg_policy,
return  util * max_f / max;
 }
 
-static void sugov_update_shared(struct freq_update_hook *hook, u64 time,
+static void sugov_update_shared(struct freq_update_hook *hook,
+   enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
 {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 20e2bb2..86aa368 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1020,8 +1020,9 @@ static inline void intel_pstate_adjust_busy_pstate(struct 
cpudata *cpu)
sample->freq);
 }
 
-static void intel_pstate_freq_update(struct freq_update_hook *hook, u64 time,
-unsigned long util_not_used,
+static void intel_pstate_freq_update(struct freq_update_hook *hook,
+enum sched_class_util sc_not_used
+u64 time, unsigned long util_not_used,
 unsigned long max_not_used)
 {
struct cpudata *cpu = container_of(hook, struct cpudata, update_hook);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f18a99b..1c7d7bd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2362,15 +2362,25 @@ extern u64 scheduler_tick_max_deferment(void);
 static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
+enum sched_class_util {
+   cfs_util,
+   rt_util,
+   dl_util,
+   nr_util_types,
+};
+
 #ifdef CONFIG_CPU_FREQ
 struct freq_update_hook {
-   void (*func)(struct freq_update_hook *hook, u64 time,
+   void (*func)(struct freq_update_hook *hook,
+enum sched_class_util sched_class, u64 time,
 unsigned long util, unsigned long max);
 };
 
 void cpufreq_set_freq_update_hook(int cpu, struct freq_update_hook *hook,
-   void (*func)(struct freq_update_hook *hook, u64 time,
-unsigned long util, unsigned long max));
+   void (*func)(struct freq_update_hook *hook,
+enum sched_class_util sched_class,
+u64 time, unsigned long util,
+   

Re: [PATCH V5 02/10] perf/amd/iommu: Consolidate and move perf_event_amd_iommu header

2016-03-13 Thread Suravee Suthikulpanit

Hi,

On 03/12/2016 08:22 PM, Peter Zijlstra wrote:

On Tue, Feb 23, 2016 at 08:12:36AM -0600, Suravee Suthikulpanit wrote:

From: Suravee Suthikulpanit 

First, this patch move arch/x86/events/amd/iommu.h to
arch/x86/include/asm/perf/amd/iommu.h so that we easily include
it in both perf-amd-iommu and amd-iommu drivers.

Then, we consolidate declaration of AMD IOMMU performance counter
APIs into one file.


These seem two independent thingies; should this therefore not be 2
patches?


Reviewed-by: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
  arch/x86/events/amd/iommu.c   |  2 +-
  arch/x86/events/amd/iommu.h   | 40 -
  arch/x86/include/asm/perf/amd/iommu.h | 42 +++


That seems somewhat excessive. Not only do you create
arch/x86/include/asm/perf/ you then put another directory on top of
that.



The original header files (arch/x86/events/amd/iommu.h and 
drivers/iommu/amd_iommu_proto.h) has duplicate function declarations. 
So, with the new header file being in the 
arch/x86/include/asm/perf/amd/iommu.h, we can just have one function 
declaration.


So, you just want to separate the file moving part and the part that 
removes of the duplication?


Thanks,
Suravee


[PATCH 8/8] sched: prefer cpufreq_scale_freq_capacity

2016-03-13 Thread Michael Turquette
arch_scale_freq_capacity is weird. It specifies an arch hook for an
implementation that could easily vary within an architecture or even a
chip family.

This patch helps to mitigate this weirdness by defaulting to the
cpufreq-provided implementation, which should work for all cases where
CONFIG_CPU_FREQ is set.

If CONFIG_CPU_FREQ is not set, then try to use an implementation
provided by the architecture. Failing that, fall back to
SCHED_CAPACITY_SCALE.

It may be desirable for cpufreq drivers to specify their own
implementation of arch_scale_freq_capacity in the future. The same is
true for platform code within an architecture. In both cases an
efficient implementation selector will need to be created and this patch
adds a comment to that effect.

Signed-off-by: Michael Turquette 
---
 kernel/sched/sched.h | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 469d11d..37502ea 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1368,7 +1368,21 @@ static inline int hrtick_enabled(struct rq *rq)
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
 
-#ifndef arch_scale_freq_capacity
+/*
+ * arch_scale_freq_capacity can be implemented by cpufreq, platform code or
+ * arch code. We select the cpufreq-provided implementation first. If it
+ * doesn't exist then we default to any other implementation provided from
+ * platform/arch code. If those do not exist then we use the default
+ * SCHED_CAPACITY_SCALE value below.
+ *
+ * Note that if cpufreq drivers or platform/arch code have competing
+ * implementations it is up to those subsystems to select one at runtime with
+ * an efficient solution, as we cannot tolerate the overhead of indirect
+ * functions (e.g. function pointers) in the scheduler fast path
+ */
+#ifdef CONFIG_CPU_FREQ
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+#elif !defined(arch_scale_freq_capacity)
 static __always_inline
 unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
-- 
2.1.4



[PATCH 1/8] sched/cpufreq: remove cpufreq_trigger_update()

2016-03-13 Thread Michael Turquette
cpufreq_trigger_update() was introduced in "cpufreq: Rework the
scheduler hooks for triggering updates"[0]. Consensus is that this
helper is not needed and removing it will aid in experimenting with
deadline and rt capacity requests.

Instead of reverting the above patch, which includes useful renaming of
data structures and related functions, simply remove the function,
update affected kerneldoc and change rt.c and deadline.c to use
cpufreq_update_util().

[0] lkml.kernel.org/r/7541372.ciuw4go...@vostro.rjw.lan

Signed-off-by: Michael Turquette 
---
 kernel/sched/cpufreq.c  | 28 ++--
 kernel/sched/deadline.c |  2 +-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h|  2 --
 4 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index eecaba4..bd012c2 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -20,8 +20,8 @@ static DEFINE_PER_CPU(struct freq_update_hook *, 
cpufreq_freq_update_hook);
  *
  * Set and publish the freq_update_hook pointer for the given CPU.  That 
pointer
  * points to a struct freq_update_hook object containing a callback function
- * to call from cpufreq_trigger_update().  That function will be called from
- * an RCU read-side critical section, so it must not sleep.
+ * to call from cpufreq_update_util().  That function will be called from an
+ * RCU read-side critical section, so it must not sleep.
  *
  * Callers must use RCU-sched callbacks to free any memory that might be
  * accessed via the old update_util_data pointer or invoke synchronize_sched()
@@ -87,27 +87,3 @@ void cpufreq_update_util(u64 time, unsigned long util, 
unsigned long max)
if (hook)
hook->func(hook, time, util, max);
 }
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
- *
- * The way cpufreq is currently arranged requires it to evaluate the CPU
- * performance state (frequency/voltage) on a regular basis.  To facilitate
- * that, cpufreq_update_util() is called by update_load_avg() in CFS when
- * executed for the current CPU's runqueue.
- *
- * However, this isn't sufficient to prevent the CPU from being stuck in a
- * completely inadequate performance level for too long, because the calls
- * from CFS will not be made if RT or deadline tasks are active all the time
- * (or there are RT and DL tasks only).
- *
- * As a workaround for that issue, this function is called by the RT and DL
- * sched classes to trigger extra cpufreq updates to prevent it from stalling,
- * but that really is a band-aid.  Going forward it should be replaced with
- * solutions targeted more specifically at RT and DL tasks.
- */
-void cpufreq_trigger_update(u64 time)
-{
-   cpufreq_update_util(time, ULONG_MAX, 0);
-}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1a035fa..3fd5bc4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -728,7 +728,7 @@ static void update_curr_dl(struct rq *rq)
 
/* Kick cpufreq (see the comment in drivers/cpufreq/cpufreq.c). */
if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
+   cpufreq_update_util(rq_clock(rq), ULONG_MAX, 0);
 
/*
 * Consumed budget is computed considering the time as
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9dd1c09..53ad077 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -947,7 +947,7 @@ static void update_curr_rt(struct rq *rq)
 
/* Kick cpufreq (see the comment in drivers/cpufreq/cpufreq.c). */
if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
+   cpufreq_update_util(rq_clock(rq), ULONG_MAX, 0);
 
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7ae012e..f06dfca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1742,9 +1742,7 @@ static inline u64 irq_time_read(int cpu)
 
 #ifdef CONFIG_CPU_FREQ
 void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
-void cpufreq_trigger_update(u64 time);
 #else
 static inline void cpufreq_update_util(u64 time, unsigned long util,
   unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
 #endif /* CONFIG_CPU_FREQ */
-- 
2.1.4



[PATCH 6/8] cpufreq/schedutil: sum per-sched class utilization

2016-03-13 Thread Michael Turquette
Patch, "sched/cpufreq: pass sched class into cpufreq_update_util" made
it possible for calls of cpufreq_update_util() to specify scheduler
class, particularly cfs, rt & dl.

Update the schedutil governor to store these individual utilizations per
cpu and sum them to create a total utilization contribution.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_schedutil.c | 39 +++--
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 18d9ca3..b9234e1 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -46,8 +46,10 @@ struct sugov_cpu {
struct freq_update_hook update_hook;
struct sugov_policy *sg_policy;
 
+   unsigned long util[nr_util_types];
+   unsigned long total_util;
+
/* The fields below are only needed when sharing a policy. */
-   unsigned long util;
unsigned long max;
u64 last_update;
 };
@@ -106,6 +108,18 @@ static void sugov_update_commit(struct sugov_policy 
*sg_policy, u64 time,
trace_cpu_frequency(freq, smp_processor_id());
 }
 
+static unsigned long sugov_sum_total_util(struct sugov_cpu *sg_cpu)
+{
+   enum sched_class_util sc;
+
+   /* sum the utilization of all sched classes */
+   sg_cpu->total_util = 0;
+   for (sc = 0; sc < nr_util_types; sc++)
+   sg_cpu->total_util += sg_cpu->util[sc];
+
+   return sg_cpu->total_util;
+}
+
 static void sugov_update_single(struct freq_update_hook *hook,
enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
@@ -113,12 +127,17 @@ static void sugov_update_single(struct freq_update_hook 
*hook,
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned int max_f, next_f;
+   unsigned long total_util;
 
if (!sugov_should_update_freq(sg_policy, time))
return;
 
+   /* update per-sched_class utilization for this cpu */
+   sg_cpu->util[sc] = util;
+   total_util = sugov_sum_total_util(sg_cpu);
+
max_f = sg_policy->max_freq;
-   next_f = util > max ? max_f : util * max_f / max;
+   next_f = total_util > max ? max_f : total_util * max_f / max;
sugov_update_commit(sg_policy, time, next_f);
 }
 
@@ -153,7 +172,7 @@ static unsigned int sugov_next_freq(struct sugov_policy 
*sg_policy,
if ((s64)delta_ns > NSEC_PER_SEC / HZ)
continue;
 
-   j_util = j_sg_cpu->util;
+   j_util = j_sg_cpu->total_util;
j_max = j_sg_cpu->max;
if (j_util > j_max)
return max_f;
@@ -174,15 +193,19 @@ static void sugov_update_shared(struct freq_update_hook 
*hook,
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned int next_f;
+   unsigned long total_util;
 
raw_spin_lock(_policy->update_lock);
 
-   sg_cpu->util = util;
+   sg_cpu->util[sc] = util;
sg_cpu->max = max;
sg_cpu->last_update = time;
 
+   /* update per-sched_class utilization for this cpu */
+   total_util = sugov_sum_total_util(sg_cpu);
+
if (sugov_should_update_freq(sg_policy, time)) {
-   next_f = sugov_next_freq(sg_policy, util, max);
+   next_f = sugov_next_freq(sg_policy, total_util, max);
sugov_update_commit(sg_policy, time, next_f);
}
 
@@ -423,6 +446,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 {
struct sugov_policy *sg_policy = policy->governor_data;
unsigned int cpu;
+   enum sched_class_util sc;
 
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * 
NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0;
@@ -434,8 +458,11 @@ static int sugov_start(struct cpufreq_policy *policy)
struct sugov_cpu *sg_cpu = _cpu(sugov_cpu, cpu);
 
sg_cpu->sg_policy = sg_policy;
+   for (sc = 0; sc < nr_util_types; sc++) {
+   sg_cpu->util[sc] = ULONG_MAX;
+   sg_cpu->total_util = ULONG_MAX;
+   }
if (policy_is_shared(policy)) {
-   sg_cpu->util = ULONG_MAX;
sg_cpu->max = 0;
sg_cpu->last_update = 0;
cpufreq_set_freq_update_hook(cpu, _cpu->update_hook,
-- 
2.1.4



[PATCH 8/8] sched: prefer cpufreq_scale_freq_capacity

2016-03-13 Thread Michael Turquette
arch_scale_freq_capacity is weird. It specifies an arch hook for an
implementation that could easily vary within an architecture or even a
chip family.

This patch helps to mitigate this weirdness by defaulting to the
cpufreq-provided implementation, which should work for all cases where
CONFIG_CPU_FREQ is set.

If CONFIG_CPU_FREQ is not set, then try to use an implementation
provided by the architecture. Failing that, fall back to
SCHED_CAPACITY_SCALE.

It may be desirable for cpufreq drivers to specify their own
implementation of arch_scale_freq_capacity in the future. The same is
true for platform code within an architecture. In both cases an
efficient implementation selector will need to be created and this patch
adds a comment to that effect.

Signed-off-by: Michael Turquette 
---
 kernel/sched/sched.h | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 469d11d..37502ea 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1368,7 +1368,21 @@ static inline int hrtick_enabled(struct rq *rq)
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
 
-#ifndef arch_scale_freq_capacity
+/*
+ * arch_scale_freq_capacity can be implemented by cpufreq, platform code or
+ * arch code. We select the cpufreq-provided implementation first. If it
+ * doesn't exist then we default to any other implementation provided from
+ * platform/arch code. If those do not exist then we use the default
+ * SCHED_CAPACITY_SCALE value below.
+ *
+ * Note that if cpufreq drivers or platform/arch code have competing
+ * implementations it is up to those subsystems to select one at runtime with
+ * an efficient solution, as we cannot tolerate the overhead of indirect
+ * functions (e.g. function pointers) in the scheduler fast path
+ */
+#ifdef CONFIG_CPU_FREQ
+#define arch_scale_freq_capacity cpufreq_scale_freq_capacity
+#elif !defined(arch_scale_freq_capacity)
 static __always_inline
 unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
-- 
2.1.4



[PATCH 1/8] sched/cpufreq: remove cpufreq_trigger_update()

2016-03-13 Thread Michael Turquette
cpufreq_trigger_update() was introduced in "cpufreq: Rework the
scheduler hooks for triggering updates"[0]. Consensus is that this
helper is not needed and removing it will aid in experimenting with
deadline and rt capacity requests.

Instead of reverting the above patch, which includes useful renaming of
data structures and related functions, simply remove the function,
update affected kerneldoc and change rt.c and deadline.c to use
cpufreq_update_util().

[0] lkml.kernel.org/r/7541372.ciuw4go...@vostro.rjw.lan

Signed-off-by: Michael Turquette 
---
 kernel/sched/cpufreq.c  | 28 ++--
 kernel/sched/deadline.c |  2 +-
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h|  2 --
 4 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index eecaba4..bd012c2 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -20,8 +20,8 @@ static DEFINE_PER_CPU(struct freq_update_hook *, 
cpufreq_freq_update_hook);
  *
  * Set and publish the freq_update_hook pointer for the given CPU.  That 
pointer
  * points to a struct freq_update_hook object containing a callback function
- * to call from cpufreq_trigger_update().  That function will be called from
- * an RCU read-side critical section, so it must not sleep.
+ * to call from cpufreq_update_util().  That function will be called from an
+ * RCU read-side critical section, so it must not sleep.
  *
  * Callers must use RCU-sched callbacks to free any memory that might be
  * accessed via the old update_util_data pointer or invoke synchronize_sched()
@@ -87,27 +87,3 @@ void cpufreq_update_util(u64 time, unsigned long util, 
unsigned long max)
if (hook)
hook->func(hook, time, util, max);
 }
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
- *
- * The way cpufreq is currently arranged requires it to evaluate the CPU
- * performance state (frequency/voltage) on a regular basis.  To facilitate
- * that, cpufreq_update_util() is called by update_load_avg() in CFS when
- * executed for the current CPU's runqueue.
- *
- * However, this isn't sufficient to prevent the CPU from being stuck in a
- * completely inadequate performance level for too long, because the calls
- * from CFS will not be made if RT or deadline tasks are active all the time
- * (or there are RT and DL tasks only).
- *
- * As a workaround for that issue, this function is called by the RT and DL
- * sched classes to trigger extra cpufreq updates to prevent it from stalling,
- * but that really is a band-aid.  Going forward it should be replaced with
- * solutions targeted more specifically at RT and DL tasks.
- */
-void cpufreq_trigger_update(u64 time)
-{
-   cpufreq_update_util(time, ULONG_MAX, 0);
-}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1a035fa..3fd5bc4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -728,7 +728,7 @@ static void update_curr_dl(struct rq *rq)
 
/* Kick cpufreq (see the comment in drivers/cpufreq/cpufreq.c). */
if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
+   cpufreq_update_util(rq_clock(rq), ULONG_MAX, 0);
 
/*
 * Consumed budget is computed considering the time as
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 9dd1c09..53ad077 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -947,7 +947,7 @@ static void update_curr_rt(struct rq *rq)
 
/* Kick cpufreq (see the comment in drivers/cpufreq/cpufreq.c). */
if (cpu_of(rq) == smp_processor_id())
-   cpufreq_trigger_update(rq_clock(rq));
+   cpufreq_update_util(rq_clock(rq), ULONG_MAX, 0);
 
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7ae012e..f06dfca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1742,9 +1742,7 @@ static inline u64 irq_time_read(int cpu)
 
 #ifdef CONFIG_CPU_FREQ
 void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
-void cpufreq_trigger_update(u64 time);
 #else
 static inline void cpufreq_update_util(u64 time, unsigned long util,
   unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
 #endif /* CONFIG_CPU_FREQ */
-- 
2.1.4



[PATCH 6/8] cpufreq/schedutil: sum per-sched class utilization

2016-03-13 Thread Michael Turquette
Patch, "sched/cpufreq: pass sched class into cpufreq_update_util" made
it possible for calls of cpufreq_update_util() to specify scheduler
class, particularly cfs, rt & dl.

Update the schedutil governor to store these individual utilizations per
cpu and sum them to create a total utilization contribution.

Signed-off-by: Michael Turquette 
---
 drivers/cpufreq/cpufreq_schedutil.c | 39 +++--
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_schedutil.c 
b/drivers/cpufreq/cpufreq_schedutil.c
index 18d9ca3..b9234e1 100644
--- a/drivers/cpufreq/cpufreq_schedutil.c
+++ b/drivers/cpufreq/cpufreq_schedutil.c
@@ -46,8 +46,10 @@ struct sugov_cpu {
struct freq_update_hook update_hook;
struct sugov_policy *sg_policy;
 
+   unsigned long util[nr_util_types];
+   unsigned long total_util;
+
/* The fields below are only needed when sharing a policy. */
-   unsigned long util;
unsigned long max;
u64 last_update;
 };
@@ -106,6 +108,18 @@ static void sugov_update_commit(struct sugov_policy 
*sg_policy, u64 time,
trace_cpu_frequency(freq, smp_processor_id());
 }
 
+static unsigned long sugov_sum_total_util(struct sugov_cpu *sg_cpu)
+{
+   enum sched_class_util sc;
+
+   /* sum the utilization of all sched classes */
+   sg_cpu->total_util = 0;
+   for (sc = 0; sc < nr_util_types; sc++)
+   sg_cpu->total_util += sg_cpu->util[sc];
+
+   return sg_cpu->total_util;
+}
+
 static void sugov_update_single(struct freq_update_hook *hook,
enum sched_class_util sc, u64 time,
unsigned long util, unsigned long max)
@@ -113,12 +127,17 @@ static void sugov_update_single(struct freq_update_hook 
*hook,
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned int max_f, next_f;
+   unsigned long total_util;
 
if (!sugov_should_update_freq(sg_policy, time))
return;
 
+   /* update per-sched_class utilization for this cpu */
+   sg_cpu->util[sc] = util;
+   total_util = sugov_sum_total_util(sg_cpu);
+
max_f = sg_policy->max_freq;
-   next_f = util > max ? max_f : util * max_f / max;
+   next_f = total_util > max ? max_f : total_util * max_f / max;
sugov_update_commit(sg_policy, time, next_f);
 }
 
@@ -153,7 +172,7 @@ static unsigned int sugov_next_freq(struct sugov_policy 
*sg_policy,
if ((s64)delta_ns > NSEC_PER_SEC / HZ)
continue;
 
-   j_util = j_sg_cpu->util;
+   j_util = j_sg_cpu->total_util;
j_max = j_sg_cpu->max;
if (j_util > j_max)
return max_f;
@@ -174,15 +193,19 @@ static void sugov_update_shared(struct freq_update_hook 
*hook,
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_hook);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned int next_f;
+   unsigned long total_util;
 
raw_spin_lock(_policy->update_lock);
 
-   sg_cpu->util = util;
+   sg_cpu->util[sc] = util;
sg_cpu->max = max;
sg_cpu->last_update = time;
 
+   /* update per-sched_class utilization for this cpu */
+   total_util = sugov_sum_total_util(sg_cpu);
+
if (sugov_should_update_freq(sg_policy, time)) {
-   next_f = sugov_next_freq(sg_policy, util, max);
+   next_f = sugov_next_freq(sg_policy, total_util, max);
sugov_update_commit(sg_policy, time, next_f);
}
 
@@ -423,6 +446,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 {
struct sugov_policy *sg_policy = policy->governor_data;
unsigned int cpu;
+   enum sched_class_util sc;
 
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * 
NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0;
@@ -434,8 +458,11 @@ static int sugov_start(struct cpufreq_policy *policy)
struct sugov_cpu *sg_cpu = _cpu(sugov_cpu, cpu);
 
sg_cpu->sg_policy = sg_policy;
+   for (sc = 0; sc < nr_util_types; sc++) {
+   sg_cpu->util[sc] = ULONG_MAX;
+   sg_cpu->total_util = ULONG_MAX;
+   }
if (policy_is_shared(policy)) {
-   sg_cpu->util = ULONG_MAX;
sg_cpu->max = 0;
sg_cpu->last_update = 0;
cpufreq_set_freq_update_hook(cpu, _cpu->update_hook,
-- 
2.1.4



Re: C1E auto-promotion suspend/resume

2016-03-13 Thread Andy Lutomirski
On Sun, Mar 13, 2016 at 9:31 PM, Brown, Len  wrote:
>> By BIOS (1.2.3 on a Dell XPS 13 9350) seems to want to enable C1E
>> auto-promotion (ugh!), which results in this difference across
>> suspend/resume according to turbostat:
>>
>> -cpu3: MSR_IA32_POWER_CTL: 0x0024005d (C1E auto-promotion: DISabled)
>> +cpu3: MSR_IA32_POWER_CTL: 0x0024005f (C1E auto-promotion: ENabled)
>>
>> Should intel_idle learn to re-disable idle promotion on resume?
>
> Yes, it seems that way.
>
> Go ahead and send a patch, or file a bug at bugzilla.kernel.org
> and we'll get to it.

Sent.  The only other differences I see across suspend/resume in
turbostat --debug (with taskset -c 0 to suppress spurious junk) are:

--- pre-susp.txt2016-03-13 22:21:39.889337697 -0700
+++ post-susp.txt2016-03-13 21:38:20.782503438 -0700
@@ -24,8 +24,8 @@
 cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008006 (UNdemote-C3,
UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=6: pc8)
 cpu0: MSR_PM_ENABLE: 0x0001 (HWP)
 cpu0: MSR_HWP_CAPABILITIES: 0x0108171c (high 0x1c guar 0x17 eff 0x8 low 0x1)
-cpu0: MSR_HWP_REQUEST: 0x80001c04 (min 0x4 max 0x1c des 0x0 epp 0x80
window 0x0 pkg 0x0)
-cpu0: MSR_HWP_INTERRUPT: 0x0001 (EN_Guaranteed_Perf_Change,
Dis_Excursion_Min)
+cpu0: MSR_HWP_REQUEST: 0x8000ff01 (min 0x1 max 0xff des 0x0 epp 0x80
window 0x0 pkg 0x0)
+cpu0: MSR_HWP_INTERRUPT: 0x (Dis_Guaranteed_Perf_Change,
Dis_Excursion_Min)
 cpu0: MSR_HWP_STATUS: 0x (No-Guaranteed_Perf_Change, No-Excursion_Min)
 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x0006 (balanced)
 cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.61
Joules, 0.000977 sec.)
@@ -36,6 +36,6 @@
 cpu0: MSR_DRAM_POWER_LIMIT: 0x5400de (UNlocked)
 cpu0: DRAM Limit: DISabled (0.00 Watts, 0.000977 sec, clamp DISabled)
 cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x0064 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x8836 (46 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x8836 (46 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x8836 (46 C +/- 1)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x883c (40 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x883c (40 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x8840 (36 C +/- 1)


Are either of those potentially interesting?


Re: C1E auto-promotion suspend/resume

2016-03-13 Thread Andy Lutomirski
On Sun, Mar 13, 2016 at 9:31 PM, Brown, Len  wrote:
>> By BIOS (1.2.3 on a Dell XPS 13 9350) seems to want to enable C1E
>> auto-promotion (ugh!), which results in this difference across
>> suspend/resume according to turbostat:
>>
>> -cpu3: MSR_IA32_POWER_CTL: 0x0024005d (C1E auto-promotion: DISabled)
>> +cpu3: MSR_IA32_POWER_CTL: 0x0024005f (C1E auto-promotion: ENabled)
>>
>> Should intel_idle learn to re-disable idle promotion on resume?
>
> Yes, it seems that way.
>
> Go ahead and send a patch, or file a bug at bugzilla.kernel.org
> and we'll get to it.

Sent.  The only other differences I see across suspend/resume in
turbostat --debug (with taskset -c 0 to suppress spurious junk) are:

--- pre-susp.txt2016-03-13 22:21:39.889337697 -0700
+++ post-susp.txt2016-03-13 21:38:20.782503438 -0700
@@ -24,8 +24,8 @@
 cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008006 (UNdemote-C3,
UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=6: pc8)
 cpu0: MSR_PM_ENABLE: 0x0001 (HWP)
 cpu0: MSR_HWP_CAPABILITIES: 0x0108171c (high 0x1c guar 0x17 eff 0x8 low 0x1)
-cpu0: MSR_HWP_REQUEST: 0x80001c04 (min 0x4 max 0x1c des 0x0 epp 0x80
window 0x0 pkg 0x0)
-cpu0: MSR_HWP_INTERRUPT: 0x0001 (EN_Guaranteed_Perf_Change,
Dis_Excursion_Min)
+cpu0: MSR_HWP_REQUEST: 0x8000ff01 (min 0x1 max 0xff des 0x0 epp 0x80
window 0x0 pkg 0x0)
+cpu0: MSR_HWP_INTERRUPT: 0x (Dis_Guaranteed_Perf_Change,
Dis_Excursion_Min)
 cpu0: MSR_HWP_STATUS: 0x (No-Guaranteed_Perf_Change, No-Excursion_Min)
 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x0006 (balanced)
 cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.61
Joules, 0.000977 sec.)
@@ -36,6 +36,6 @@
 cpu0: MSR_DRAM_POWER_LIMIT: 0x5400de (UNlocked)
 cpu0: DRAM Limit: DISabled (0.00 Watts, 0.000977 sec, clamp DISabled)
 cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x0064 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x8836 (46 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x8836 (46 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x8836 (46 C +/- 1)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x883c (40 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x883c (40 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x8840 (36 C +/- 1)


Are either of those potentially interesting?


Re: [RFC PATCH V2] checkpatch: Check output format style of __func__ uses

2016-03-13 Thread Julia Lawall
On Sun, 13 Mar 2016, Joe Perches wrote:

> Loggng messages that emit function names have many different forms.
> Perhaps it'd be better for logging consistency and grep ease to
> exclusively use "%s:"
> 
> As well, function tracing logging uses are generally unnecessary given
> the kernel's function tracing (ftrace) capability.
> 
> Right now, grep shows these mixtures of forms:
> 
> 13704 "%s:"
> 3839  "%s "
> 2787  "%s()"
> 
> Some of these are macros definitions of various styles.
> 
> Unfortunately, given the complexity of these macro definition styles,
> checkpatch isn't an ideal tool to find these macros.
> 
> Maybe a coccinelle script might be better suited to find and fix all
> the various types of uses.
> 
> Add a --fix option for these logging messages with __func__.

I'm not good enough at perl to really understand this.  Coudl you give an 
example of what it does, and of what it does not do?

thanks,
julia

> 
> Signed-off-by: Joe Perches 
> ---
> 
> v2: Warn on function tracing logging
> Add --fix option
> 
>  scripts/checkpatch.pl | 55 
> +++
>  1 file changed, 55 insertions(+)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 75ce6d0..b695f75 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -1415,6 +1415,22 @@ sub raw_line {
>   return $line;
>  }
>  
> +sub cooked_line {
> + my ($linenr, $cnt) = @_;
> +
> + my $offset = $linenr - 1;
> + $cnt++;
> +
> + my $line;
> + while ($cnt) {
> + $line = $lines[$offset++];
> + next if (defined($line) && $line =~ /^-/);
> + $cnt--;
> + }
> +
> + return $line;
> +}
> +
>  sub cat_vet {
>   my ($vet) = @_;
>   my ($res, $coded);
> @@ -5681,6 +5697,45 @@ sub process {
>   }
>   }
>  
> +# check how __func__ is formatted, prefer "%s:...',  __func__
> + if ($^V && $^V ge 5.10.0 &&
> + defined $stat &&
> + $stat =~ /\b__func__\b/ &&
> + $stat =~ 
> /^\+\s*$logFunctions\s*\(\s*[^"]*$String\s*,\s*__func__\b/m &&
> + (() = $stat =~ /^\+|\n\+/g) == 1 &&
> + (() = $stat =~ /;/g) <= 1) {
> + my $herectx = $here . "\n";
> + my $cooked_linenr = -1;
> + my $cooked_line = "";
> + my $raw_line = "";
> + my $cnt = statement_rawlines($stat);
> + for (my $n = 0; $n < $cnt; $n++) {
> + $herectx .= raw_line($linenr, $n) . "\n";
> + if ($cooked_linenr == -1 && 
> cooked_line($linenr, $n) =~ /$String/) {
> + $cooked_linenr = $linenr + $n;
> + $cooked_line = cooked_line($linenr, $n);
> + $raw_line = raw_line($linenr, $n);
> + }
> + }
> + my $qs = get_quoted_string($cooked_line, $raw_line);
> + if ($qs =~ 
> /^"\s*%s(?:[\s:\-]*|[\s:\-]*\(\s*\)\s*[\s:\-]*)?(?:enter|entering|entered|exit|exiting)?\s*\.*\s*\\n"$/i)
>  {
> + if (WARN("FUNC_STYLE",
> +  "Prefer using ftrace to logging 
> function entry/exit\n" . $herectx) &&
> +  $cnt == 1 &&
> +  $fix) {
> + fix_delete_line($fixlinenr, $rawline);
> + }
> + } elsif ($qs !~ /^"%s:/) {
> + if (WARN("FUNC_STYLE",
> +  "Prefer using formatting style '%s:' 
> for __func__\n" . $herectx) &&
> + $fix) {
> + $fixed[$cooked_linenr - 1] =~ 
> s/[:\s]*%s(?:[:\s,\-]*|[\s:\-]*\(\s*\)\s*[\s:\-]*)?//;
> + $fixed[$cooked_linenr - 1] =~ s/"/"%s: 
> /;
> + $fixed[$cooked_linenr - 1] =~ s/"%s: 
> \\n/"%s\\n/;
> + }
> + }
> + }
> +
>  # check for uses of __DATE__, __TIME__, __TIMESTAMP__
>   while ($line =~ /\b(__(?:DATE|TIME|TIMESTAMP)__)\b/g) {
>   ERROR("DATE_TIME",
> -- 
> 2.6.3.368.gf34be46
> 
> 


Re: [RFC PATCH V2] checkpatch: Check output format style of __func__ uses

2016-03-13 Thread Julia Lawall
On Sun, 13 Mar 2016, Joe Perches wrote:

> Loggng messages that emit function names have many different forms.
> Perhaps it'd be better for logging consistency and grep ease to
> exclusively use "%s:"
> 
> As well, function tracing logging uses are generally unnecessary given
> the kernel's function tracing (ftrace) capability.
> 
> Right now, grep shows these mixtures of forms:
> 
> 13704 "%s:"
> 3839  "%s "
> 2787  "%s()"
> 
> Some of these are macros definitions of various styles.
> 
> Unfortunately, given the complexity of these macro definition styles,
> checkpatch isn't an ideal tool to find these macros.
> 
> Maybe a coccinelle script might be better suited to find and fix all
> the various types of uses.
> 
> Add a --fix option for these logging messages with __func__.

I'm not good enough at perl to really understand this.  Coudl you give an 
example of what it does, and of what it does not do?

thanks,
julia

> 
> Signed-off-by: Joe Perches 
> ---
> 
> v2: Warn on function tracing logging
> Add --fix option
> 
>  scripts/checkpatch.pl | 55 
> +++
>  1 file changed, 55 insertions(+)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index 75ce6d0..b695f75 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -1415,6 +1415,22 @@ sub raw_line {
>   return $line;
>  }
>  
> +sub cooked_line {
> + my ($linenr, $cnt) = @_;
> +
> + my $offset = $linenr - 1;
> + $cnt++;
> +
> + my $line;
> + while ($cnt) {
> + $line = $lines[$offset++];
> + next if (defined($line) && $line =~ /^-/);
> + $cnt--;
> + }
> +
> + return $line;
> +}
> +
>  sub cat_vet {
>   my ($vet) = @_;
>   my ($res, $coded);
> @@ -5681,6 +5697,45 @@ sub process {
>   }
>   }
>  
> +# check how __func__ is formatted, prefer "%s:...',  __func__
> + if ($^V && $^V ge 5.10.0 &&
> + defined $stat &&
> + $stat =~ /\b__func__\b/ &&
> + $stat =~ 
> /^\+\s*$logFunctions\s*\(\s*[^"]*$String\s*,\s*__func__\b/m &&
> + (() = $stat =~ /^\+|\n\+/g) == 1 &&
> + (() = $stat =~ /;/g) <= 1) {
> + my $herectx = $here . "\n";
> + my $cooked_linenr = -1;
> + my $cooked_line = "";
> + my $raw_line = "";
> + my $cnt = statement_rawlines($stat);
> + for (my $n = 0; $n < $cnt; $n++) {
> + $herectx .= raw_line($linenr, $n) . "\n";
> + if ($cooked_linenr == -1 && 
> cooked_line($linenr, $n) =~ /$String/) {
> + $cooked_linenr = $linenr + $n;
> + $cooked_line = cooked_line($linenr, $n);
> + $raw_line = raw_line($linenr, $n);
> + }
> + }
> + my $qs = get_quoted_string($cooked_line, $raw_line);
> + if ($qs =~ 
> /^"\s*%s(?:[\s:\-]*|[\s:\-]*\(\s*\)\s*[\s:\-]*)?(?:enter|entering|entered|exit|exiting)?\s*\.*\s*\\n"$/i)
>  {
> + if (WARN("FUNC_STYLE",
> +  "Prefer using ftrace to logging 
> function entry/exit\n" . $herectx) &&
> +  $cnt == 1 &&
> +  $fix) {
> + fix_delete_line($fixlinenr, $rawline);
> + }
> + } elsif ($qs !~ /^"%s:/) {
> + if (WARN("FUNC_STYLE",
> +  "Prefer using formatting style '%s:' 
> for __func__\n" . $herectx) &&
> + $fix) {
> + $fixed[$cooked_linenr - 1] =~ 
> s/[:\s]*%s(?:[:\s,\-]*|[\s:\-]*\(\s*\)\s*[\s:\-]*)?//;
> + $fixed[$cooked_linenr - 1] =~ s/"/"%s: 
> /;
> + $fixed[$cooked_linenr - 1] =~ s/"%s: 
> \\n/"%s\\n/;
> + }
> + }
> + }
> +
>  # check for uses of __DATE__, __TIME__, __TIMESTAMP__
>   while ($line =~ /\b(__(?:DATE|TIME|TIMESTAMP)__)\b/g) {
>   ERROR("DATE_TIME",
> -- 
> 2.6.3.368.gf34be46
> 
> 


Re: [net-next PATCH 0/3] Fix differences between IPv4 and IPv6 TCP/UDP checksum calculation

2016-03-13 Thread David Miller
From: Alexander Duyck 
Date: Fri, 11 Mar 2016 14:05:28 -0800

> This patch series is meant to address the differences that exist between
> IPv4 and IPv6 in terms of checksum calculation.  Specifically the IPv6
> function csum_ipv6_magic treated length as a value that could be greater
> than 64K, while csum_tcpudp_magic was truncating the length at 16 bits.
> After looking over the code and giving it some thought I decided it would
> be best to update the IPv4 function so that it worked the same way the IPv6
> one did.  This allows us to get the same results given the same inputs for
> both functions.  As a result we can use the same processes to reverse the
> calculation in the event we need to do something like remove the length of
> the pseudo-header checksum.
> 
> I also took the opportunity to standardize things so that the parameters
> for these functions all use the correct types.  IPv4 addresses are __be32,
> length should always be __u32, and protocol is a __u8.
> 
> With this change in place it corrects an issue with UDP tunnels in which we
> were getting a checksum that was off by 1 when performing fragmentation on
> inner UDP packets.

Series applied, thanks Alexander.


Re: [net-next PATCH 0/3] Fix differences between IPv4 and IPv6 TCP/UDP checksum calculation

2016-03-13 Thread David Miller
From: Alexander Duyck 
Date: Fri, 11 Mar 2016 14:05:28 -0800

> This patch series is meant to address the differences that exist between
> IPv4 and IPv6 in terms of checksum calculation.  Specifically the IPv6
> function csum_ipv6_magic treated length as a value that could be greater
> than 64K, while csum_tcpudp_magic was truncating the length at 16 bits.
> After looking over the code and giving it some thought I decided it would
> be best to update the IPv4 function so that it worked the same way the IPv6
> one did.  This allows us to get the same results given the same inputs for
> both functions.  As a result we can use the same processes to reverse the
> calculation in the event we need to do something like remove the length of
> the pseudo-header checksum.
> 
> I also took the opportunity to standardize things so that the parameters
> for these functions all use the correct types.  IPv4 addresses are __be32,
> length should always be __u32, and protocol is a __u8.
> 
> With this change in place it corrects an issue with UDP tunnels in which we
> were getting a checksum that was off by 1 when performing fragmentation on
> inner UDP packets.

Series applied, thanks Alexander.


[PATCH] devpts: Make ptmx be owned by the userns owner instead of userns-local 0

2016-03-13 Thread Andy Lutomirski
We used to have ptmx be owned by the inner uid and gid 0.  Change
this: if the owner and group are both mapped but are not both 0,
then use the owner instead.

For container-style namespaces (LXC, etc), this should have no
effect -- UID 0 is will either be the owner or will be unmapped.

The important behavior change is for sandboxes: many sandboxes
intentionally do not create an inner uid 0.  Without this patch,
mounting devpts in such a sandbox is awkward.  With this patch, it
will just work and ptmx will be owned by the namespace owner.

Cc: Alexander Larsson 
Cc: mcla...@redhat.com
Cc: "Eric W. Biederman" 
Cc: Linux Containers 
Signed-off-by: Andy Lutomirski 
---
 fs/devpts/inode.c | 34 ++
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 655f21f99160..d6fa2d1beee3 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEVPTS_DEFAULT_MODE 0600
 /*
@@ -250,10 +251,35 @@ static int mknod_ptmx(struct super_block *sb)
kuid_t root_uid;
kgid_t root_gid;
 
-   root_uid = make_kuid(current_user_ns(), 0);
-   root_gid = make_kgid(current_user_ns(), 0);
-   if (!uid_valid(root_uid) || !gid_valid(root_gid))
-   return -EINVAL;
+   /*
+* For a new devpts instance, ptmx is owned by the creating user
+* namespace's owner.  Usually, that will be 0 as seen by the
+* user namespace, but for unprivileged sandbox namespaces,
+* there may not be a uid 0 or gid 0 at all.
+*/
+   root_uid = current_user_ns()->owner;
+   root_gid = current_user_ns()->group;
+
+   if (!uid_valid(root_uid) || !gid_valid(root_gid)) {
+   /*
+* It's very unlikely for us to get here if the userns
+* owner is not mapped, but it's possible -- we'd have
+* to be running in the userns with capabilities granted
+* by unshare or setns, since there is no inner
+* privileged user.  Nonetheless, this could happen, and
+* we don't want ptmx to be owned by an unmapped user or
+* group.
+*
+* If this happens fall back to historical behavior:
+* try to have ptmx be owned by 0:0.
+*/
+   root_uid = make_kuid(current_user_ns(), 0);
+   root_gid = make_kgid(current_user_ns(), 0);
+
+   /* If this still doesn't work, give up. */
+   if (!uid_valid(root_uid) || !gid_valid(root_gid))
+   return -EINVAL;
+   }
 
inode_lock(d_inode(root));
 
-- 
2.5.0



[PATCH] devpts: Make ptmx be owned by the userns owner instead of userns-local 0

2016-03-13 Thread Andy Lutomirski
We used to have ptmx be owned by the inner uid and gid 0.  Change
this: if the owner and group are both mapped but are not both 0,
then use the owner instead.

For container-style namespaces (LXC, etc), this should have no
effect -- UID 0 is will either be the owner or will be unmapped.

The important behavior change is for sandboxes: many sandboxes
intentionally do not create an inner uid 0.  Without this patch,
mounting devpts in such a sandbox is awkward.  With this patch, it
will just work and ptmx will be owned by the namespace owner.

Cc: Alexander Larsson 
Cc: mcla...@redhat.com
Cc: "Eric W. Biederman" 
Cc: Linux Containers 
Signed-off-by: Andy Lutomirski 
---
 fs/devpts/inode.c | 34 ++
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 655f21f99160..d6fa2d1beee3 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEVPTS_DEFAULT_MODE 0600
 /*
@@ -250,10 +251,35 @@ static int mknod_ptmx(struct super_block *sb)
kuid_t root_uid;
kgid_t root_gid;
 
-   root_uid = make_kuid(current_user_ns(), 0);
-   root_gid = make_kgid(current_user_ns(), 0);
-   if (!uid_valid(root_uid) || !gid_valid(root_gid))
-   return -EINVAL;
+   /*
+* For a new devpts instance, ptmx is owned by the creating user
+* namespace's owner.  Usually, that will be 0 as seen by the
+* user namespace, but for unprivileged sandbox namespaces,
+* there may not be a uid 0 or gid 0 at all.
+*/
+   root_uid = current_user_ns()->owner;
+   root_gid = current_user_ns()->group;
+
+   if (!uid_valid(root_uid) || !gid_valid(root_gid)) {
+   /*
+* It's very unlikely for us to get here if the userns
+* owner is not mapped, but it's possible -- we'd have
+* to be running in the userns with capabilities granted
+* by unshare or setns, since there is no inner
+* privileged user.  Nonetheless, this could happen, and
+* we don't want ptmx to be owned by an unmapped user or
+* group.
+*
+* If this happens fall back to historical behavior:
+* try to have ptmx be owned by 0:0.
+*/
+   root_uid = make_kuid(current_user_ns(), 0);
+   root_gid = make_kgid(current_user_ns(), 0);
+
+   /* If this still doesn't work, give up. */
+   if (!uid_valid(root_uid) || !gid_valid(root_gid))
+   return -EINVAL;
+   }
 
inode_lock(d_inode(root));
 
-- 
2.5.0



Re: [PATCH v1 13/19] zsmalloc: factor page chain functionality out

2016-03-13 Thread Minchan Kim
On Sat, Mar 12, 2016 at 11:09:36AM +0800, xuyiping wrote:
> 
> 
> On 2016/3/11 15:30, Minchan Kim wrote:
> >For migration, we need to create sub-page chain of zspage
> >dynamically so this patch factors it out from alloc_zspage.
> >
> >As a minor refactoring, it makes OBJ_ALLOCATED_TAG assign
> >more clear in obj_malloc(it could be another patch but it's
> >trivial so I want to put together in this patch).
> >
> >Signed-off-by: Minchan Kim 
> >---
> >  mm/zsmalloc.c | 78 
> > ++-
> >  1 file changed, 45 insertions(+), 33 deletions(-)
> >
> >diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> >index bfc6a048afac..f86f8aaeb902 100644
> >--- a/mm/zsmalloc.c
> >+++ b/mm/zsmalloc.c
> >@@ -977,7 +977,9 @@ static void init_zspage(struct size_class *class, struct 
> >page *first_page)
> > unsigned long off = 0;
> > struct page *page = first_page;
> >
> >-VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >+first_page->freelist = NULL;
> >+INIT_LIST_HEAD(_page->lru);
> >+set_zspage_inuse(first_page, 0);
> >
> > while (page) {
> > struct page *next_page;
> >@@ -1022,13 +1024,44 @@ static void init_zspage(struct size_class *class, 
> >struct page *first_page)
> > set_freeobj(first_page, 0);
> >  }
> >
> >+static void create_page_chain(struct page *pages[], int nr_pages)
> >+{
> >+int i;
> >+struct page *page;
> >+struct page *prev_page = NULL;
> >+struct page *first_page = NULL;
> >+
> >+for (i = 0; i < nr_pages; i++) {
> >+page = pages[i];
> >+
> >+INIT_LIST_HEAD(>lru);
> >+if (i == 0) {
> >+SetPagePrivate(page);
> >+set_page_private(page, 0);
> >+first_page = page;
> >+}
> >+
> >+if (i == 1)
> >+set_page_private(first_page, (unsigned long)page);
> >+if (i >= 1)
> >+set_page_private(page, (unsigned long)first_page);
> >+if (i >= 2)
> >+list_add(>lru, _page->lru);
> >+if (i == nr_pages - 1)
> >+SetPagePrivate2(page);
> >+
> >+prev_page = page;
> >+}
> >+}
> >+
> >  /*
> >   * Allocate a zspage for the given size class
> >   */
> >  static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
> >  {
> >-int i, error;
> >+int i;
> > struct page *first_page = NULL, *uninitialized_var(prev_page);
> >+struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
> >
> > /*
> >  * Allocate individual pages and link them together as:
> >@@ -1041,43 +1074,23 @@ static struct page *alloc_zspage(struct size_class 
> >*class, gfp_t flags)
> 
>   *uninitialized_var(prev_page) in alloc_zspage is not in use more.

True.
It says why we should avoid uninitialized_var if possible.
If we didn't use uninitialized_var, compiler could warn about it
when I did build test.

Thanks.



Re: [PATCH v1 13/19] zsmalloc: factor page chain functionality out

2016-03-13 Thread Minchan Kim
On Sat, Mar 12, 2016 at 11:09:36AM +0800, xuyiping wrote:
> 
> 
> On 2016/3/11 15:30, Minchan Kim wrote:
> >For migration, we need to create sub-page chain of zspage
> >dynamically so this patch factors it out from alloc_zspage.
> >
> >As a minor refactoring, it makes OBJ_ALLOCATED_TAG assign
> >more clear in obj_malloc(it could be another patch but it's
> >trivial so I want to put together in this patch).
> >
> >Signed-off-by: Minchan Kim 
> >---
> >  mm/zsmalloc.c | 78 
> > ++-
> >  1 file changed, 45 insertions(+), 33 deletions(-)
> >
> >diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> >index bfc6a048afac..f86f8aaeb902 100644
> >--- a/mm/zsmalloc.c
> >+++ b/mm/zsmalloc.c
> >@@ -977,7 +977,9 @@ static void init_zspage(struct size_class *class, struct 
> >page *first_page)
> > unsigned long off = 0;
> > struct page *page = first_page;
> >
> >-VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >+first_page->freelist = NULL;
> >+INIT_LIST_HEAD(_page->lru);
> >+set_zspage_inuse(first_page, 0);
> >
> > while (page) {
> > struct page *next_page;
> >@@ -1022,13 +1024,44 @@ static void init_zspage(struct size_class *class, 
> >struct page *first_page)
> > set_freeobj(first_page, 0);
> >  }
> >
> >+static void create_page_chain(struct page *pages[], int nr_pages)
> >+{
> >+int i;
> >+struct page *page;
> >+struct page *prev_page = NULL;
> >+struct page *first_page = NULL;
> >+
> >+for (i = 0; i < nr_pages; i++) {
> >+page = pages[i];
> >+
> >+INIT_LIST_HEAD(>lru);
> >+if (i == 0) {
> >+SetPagePrivate(page);
> >+set_page_private(page, 0);
> >+first_page = page;
> >+}
> >+
> >+if (i == 1)
> >+set_page_private(first_page, (unsigned long)page);
> >+if (i >= 1)
> >+set_page_private(page, (unsigned long)first_page);
> >+if (i >= 2)
> >+list_add(>lru, _page->lru);
> >+if (i == nr_pages - 1)
> >+SetPagePrivate2(page);
> >+
> >+prev_page = page;
> >+}
> >+}
> >+
> >  /*
> >   * Allocate a zspage for the given size class
> >   */
> >  static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
> >  {
> >-int i, error;
> >+int i;
> > struct page *first_page = NULL, *uninitialized_var(prev_page);
> >+struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
> >
> > /*
> >  * Allocate individual pages and link them together as:
> >@@ -1041,43 +1074,23 @@ static struct page *alloc_zspage(struct size_class 
> >*class, gfp_t flags)
> 
>   *uninitialized_var(prev_page) in alloc_zspage is not in use more.

True.
It says why we should avoid uninitialized_var if possible.
If we didn't use uninitialized_var, compiler could warn about it
when I did build test.

Thanks.



[GIT PULL] Thermal SoC management updates for v4.6-rc1

2016-03-13 Thread Eduardo Valentin
Hello Rui,

Please pull from

  git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal linus

to receive Thermal-SoC Management updates for v4.6-rc1 with top-most

61f846f37354fd294f3172845d9fec2c03f60a45:

  thermal: doc: Add details of 
devm_thermal_zone_of_sensor_{register,unregister} (2016-03-09 10:51:41 -0800)

on top of commit 2850713576e81e3b887cd92a9965fba0dd1717c0:

  Merge branch 'for-linus' of git://git.kernel.dk/linux-block (2016-02-17 
11:59:23 -0800)

Specifics in this pull request:
- New driver:  Mediatek thermal driver, thanks Sascha
- New API: devm_ versions for OF thermal sensor register API, thanks Laxman
- These change have been CI tested using KernelCI bot [1,2]. \o/

[1] - 
https://kernelci.org/boot/all/job/evalenti/kernel/v4.5-rc6-28-g873b196fb797/
[2] - https://kernelci.org/build/evalenti/kernel/v4.5-rc6-28-g873b196fb797/

BR,

Eduardo Valentin


Eduardo Valentin (2):
  thermal: small style cleanup in mtk_thermal
  thermal: mtk: allow compile testing on UM

Laxman Dewangan (3):
  thermal: doc: Add details of thermal_zone_of_sensor_{register,unregister}
  thermal: of-thermal: Add devm version of thermal_zone_of_sensor_register
  thermal: doc: Add details of 
devm_thermal_zone_of_sensor_{register,unregister}

Sascha Hauer (2):
  dt-bindings: thermal: Add binding document for Mediatek thermal controller
  thermal: Add Mediatek thermal controller support

 .../bindings/thermal/mediatek-thermal.txt  |  43 ++
 Documentation/thermal/sysfs-api.txt|  68 +++
 drivers/thermal/Kconfig|   9 +
 drivers/thermal/Makefile   |   1 +
 drivers/thermal/mtk_thermal.c  | 625 +
 drivers/thermal/of-thermal.c   |  81 +++
 include/linux/thermal.h|  18 +
 7 files changed, 845 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
 create mode 100644 drivers/thermal/mtk_thermal.c


[GIT PULL] Thermal SoC management updates for v4.6-rc1

2016-03-13 Thread Eduardo Valentin
Hello Rui,

Please pull from

  git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal linus

to receive Thermal-SoC Management updates for v4.6-rc1 with top-most

61f846f37354fd294f3172845d9fec2c03f60a45:

  thermal: doc: Add details of 
devm_thermal_zone_of_sensor_{register,unregister} (2016-03-09 10:51:41 -0800)

on top of commit 2850713576e81e3b887cd92a9965fba0dd1717c0:

  Merge branch 'for-linus' of git://git.kernel.dk/linux-block (2016-02-17 
11:59:23 -0800)

Specifics in this pull request:
- New driver:  Mediatek thermal driver, thanks Sascha
- New API: devm_ versions for OF thermal sensor register API, thanks Laxman
- These change have been CI tested using KernelCI bot [1,2]. \o/

[1] - 
https://kernelci.org/boot/all/job/evalenti/kernel/v4.5-rc6-28-g873b196fb797/
[2] - https://kernelci.org/build/evalenti/kernel/v4.5-rc6-28-g873b196fb797/

BR,

Eduardo Valentin


Eduardo Valentin (2):
  thermal: small style cleanup in mtk_thermal
  thermal: mtk: allow compile testing on UM

Laxman Dewangan (3):
  thermal: doc: Add details of thermal_zone_of_sensor_{register,unregister}
  thermal: of-thermal: Add devm version of thermal_zone_of_sensor_register
  thermal: doc: Add details of 
devm_thermal_zone_of_sensor_{register,unregister}

Sascha Hauer (2):
  dt-bindings: thermal: Add binding document for Mediatek thermal controller
  thermal: Add Mediatek thermal controller support

 .../bindings/thermal/mediatek-thermal.txt  |  43 ++
 Documentation/thermal/sysfs-api.txt|  68 +++
 drivers/thermal/Kconfig|   9 +
 drivers/thermal/Makefile   |   1 +
 drivers/thermal/mtk_thermal.c  | 625 +
 drivers/thermal/of-thermal.c   |  81 +++
 include/linux/thermal.h|  18 +
 7 files changed, 845 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
 create mode 100644 drivers/thermal/mtk_thermal.c


[PATCH 0/2] xtensa: add core variants used in sample configurations

2016-03-13 Thread Max Filippov
Hello,

this series adds two core variant header sets for test_mmuhifi_c3 and
test_kc705_hifi used in sample configurations for SMP and audio respectively.

Max Filippov (1):
  xtensa: add test_kc705_hifi variant

Piet Delaney (1):
  xtensa: add Three Core HiFi-2 MX Variant.

 .../test_kc705_hifi/include/variant/core.h | 532 +
 .../test_kc705_hifi/include/variant/tie-asm.h  | 329 +
 .../variants/test_kc705_hifi/include/variant/tie.h | 190 
 .../test_mmuhifi_c3/include/variant/core.h | 384 +++
 .../test_mmuhifi_c3/include/variant/tie-asm.h  | 183 +++
 .../variants/test_mmuhifi_c3/include/variant/tie.h | 141 ++
 6 files changed, 1759 insertions(+)
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_kc705_hifi/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/tie.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie.h

-- 
2.1.4



[PATCH 0/2] xtensa: add core variants used in sample configurations

2016-03-13 Thread Max Filippov
Hello,

this series adds two core variant header sets for test_mmuhifi_c3 and
test_kc705_hifi used in sample configurations for SMP and audio respectively.

Max Filippov (1):
  xtensa: add test_kc705_hifi variant

Piet Delaney (1):
  xtensa: add Three Core HiFi-2 MX Variant.

 .../test_kc705_hifi/include/variant/core.h | 532 +
 .../test_kc705_hifi/include/variant/tie-asm.h  | 329 +
 .../variants/test_kc705_hifi/include/variant/tie.h | 190 
 .../test_mmuhifi_c3/include/variant/core.h | 384 +++
 .../test_mmuhifi_c3/include/variant/tie-asm.h  | 183 +++
 .../variants/test_mmuhifi_c3/include/variant/tie.h | 141 ++
 6 files changed, 1759 insertions(+)
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_kc705_hifi/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/tie.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie.h

-- 
2.1.4



[PATCH 1/2] xtensa: add Three Core HiFi-2 MX Variant.

2016-03-13 Thread Max Filippov
From: Piet Delaney 

This variant has coherent cache, is equipped with interrupt distributor
and is capable of running SMP linux.

Signed-off-by: Piet Delaney 
Signed-off-by: Max Filippov 
---
 .../test_mmuhifi_c3/include/variant/core.h | 384 +
 .../test_mmuhifi_c3/include/variant/tie-asm.h  | 183 ++
 .../variants/test_mmuhifi_c3/include/variant/tie.h | 141 
 3 files changed, 708 insertions(+)
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie.h

diff --git a/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h 
b/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
new file mode 100644
index 000..309caa1
--- /dev/null
+++ b/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
@@ -0,0 +1,384 @@
+/*
+ * Xtensa processor core configuration information.
+ *
+ * This file is subject to the terms and conditions of version 2.1 of the GNU
+ * Lesser General Public License as published by the Free Software Foundation.
+ *
+ * Copyright (c) 1999-2009 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/
+   Parameters Useful for Any Code, USER or PRIVILEGED
+ /
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*--
+   ISA
+  --*/
+
+#define XCHAL_HAVE_BE  0   /* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED1   /* windowed registers option */
+#define XCHAL_NUM_AREGS32  /* num of physical addr 
regs */
+#define XCHAL_NUM_AREGS_LOG2   5   /* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE 8   /* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG   1   /* debug option */
+#define XCHAL_HAVE_DENSITY 1   /* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS   1   /* zero-overhead loops */
+#define XCHAL_HAVE_NSA 1   /* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX  1   /* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT1   /* SEXT instruction */
+#define XCHAL_HAVE_CLAMPS  1   /* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16   1   /* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32   1   /* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH  0   /* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32   0   /* QUOS/QUOU/REMS/REMU 
instructions */
+#define XCHAL_HAVE_L32R1   /* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS   1   /* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16 0   /* CONST16 instruction */
+#define XCHAL_HAVE_ADDX1   /* ADDX#/SUBX# 
instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES   0   /* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES  0   /* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12  1   /* (obsolete option) */
+#define XCHAL_HAVE_ABS 1   /* ABS instruction */
+/*#define XCHAL_HAVE_POPC  0*/ /* POPC instruction */
+/*#define XCHAL_HAVE_CRC   0*/ /* CRC instruction */
+#define XCHAL_HAVE_RELEASE_SYNC1   /* L32AI/S32RI 
instructions */
+#define XCHAL_HAVE_S32C1I  1   /* S32C1I instruction */
+#define XCHAL_HAVE_SPECULATION 0   /* speculation */
+#define XCHAL_HAVE_FULL_RESET  1   /* all regs/state reset */
+#define XCHAL_NUM_CONTEXTS 1   /* */
+#define XCHAL_NUM_MISC_REGS2   /* num of scratch regs (0..4) */
+#define XCHAL_HAVE_TAP_MASTER  0   /* JTAG TAP control instr's */
+#define XCHAL_HAVE_PRID1   /* processor ID 
register */
+#define XCHAL_HAVE_EXTERN_REGS 1   /* WER/RER instructions */
+#define XCHAL_HAVE_MP_INTERRUPTS   1   /* interrupt distributor port */
+#define XCHAL_HAVE_MP_RUNSTALL 1   /* core RunStall control port */
+#define XCHAL_HAVE_THREADPTR   1   /* THREADPTR register */
+#define XCHAL_HAVE_BOOLEANS1   /* boolean registers */
+#define XCHAL_HAVE_CP  

[PATCH 2/2] xtensa: add test_kc705_hifi variant

2016-03-13 Thread Max Filippov
This variant has HiFi3 coprocessor and is used in sample audio-enabled
configuration.

Signed-off-by: Max Filippov 
---
 .../test_kc705_hifi/include/variant/core.h | 532 +
 .../test_kc705_hifi/include/variant/tie-asm.h  | 329 +
 .../variants/test_kc705_hifi/include/variant/tie.h | 190 
 3 files changed, 1051 insertions(+)
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_kc705_hifi/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/tie.h

diff --git a/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h 
b/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
new file mode 100644
index 000..1ed2cb8
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
@@ -0,0 +1,532 @@
+/* 
+ * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa
+ * processor CORE configuration
+ *
+ *  See , which includes this file, for more details.
+ */
+
+/* Xtensa processor core configuration information.
+
+   Copyright (c) 1999-2014 Tensilica Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/
+   Parameters Useful for Any Code, USER or PRIVILEGED
+ /
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*--
+   ISA
+  --*/
+
+#define XCHAL_HAVE_BE  0   /* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED1   /* windowed registers option */
+#define XCHAL_NUM_AREGS32  /* num of physical addr 
regs */
+#define XCHAL_NUM_AREGS_LOG2   5   /* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE 8   /* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG   1   /* debug option */
+#define XCHAL_HAVE_DENSITY 1   /* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS   1   /* zero-overhead loops */
+#define XCHAL_LOOP_BUFFER_SIZE 0   /* zero-ov. loop instr buffer 
size */
+#define XCHAL_HAVE_NSA 1   /* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX  1   /* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT1   /* SEXT instruction */
+#define XCHAL_HAVE_CLAMPS  1   /* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16   1   /* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32   1   /* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH  1   /* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32   1   /* QUOS/QUOU/REMS/REMU 
instructions */
+#define XCHAL_HAVE_L32R1   /* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS   0   /* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16 0   /* CONST16 instruction */
+#define XCHAL_HAVE_ADDX1   /* ADDX#/SUBX# 
instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES   0   /* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES  0   /* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12  1   /* (obsolete option) */
+#define XCHAL_HAVE_ABS 1   /* ABS instruction */
+/*#define XCHAL_HAVE_POPC  0*/ /* POPC 

[PATCH 1/2] xtensa: add Three Core HiFi-2 MX Variant.

2016-03-13 Thread Max Filippov
From: Piet Delaney 

This variant has coherent cache, is equipped with interrupt distributor
and is capable of running SMP linux.

Signed-off-by: Piet Delaney 
Signed-off-by: Max Filippov 
---
 .../test_mmuhifi_c3/include/variant/core.h | 384 +
 .../test_mmuhifi_c3/include/variant/tie-asm.h  | 183 ++
 .../variants/test_mmuhifi_c3/include/variant/tie.h | 141 
 3 files changed, 708 insertions(+)
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_mmuhifi_c3/include/variant/tie.h

diff --git a/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h 
b/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
new file mode 100644
index 000..309caa1
--- /dev/null
+++ b/arch/xtensa/variants/test_mmuhifi_c3/include/variant/core.h
@@ -0,0 +1,384 @@
+/*
+ * Xtensa processor core configuration information.
+ *
+ * This file is subject to the terms and conditions of version 2.1 of the GNU
+ * Lesser General Public License as published by the Free Software Foundation.
+ *
+ * Copyright (c) 1999-2009 Tensilica Inc.
+ */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/
+   Parameters Useful for Any Code, USER or PRIVILEGED
+ /
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*--
+   ISA
+  --*/
+
+#define XCHAL_HAVE_BE  0   /* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED1   /* windowed registers option */
+#define XCHAL_NUM_AREGS32  /* num of physical addr 
regs */
+#define XCHAL_NUM_AREGS_LOG2   5   /* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE 8   /* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG   1   /* debug option */
+#define XCHAL_HAVE_DENSITY 1   /* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS   1   /* zero-overhead loops */
+#define XCHAL_HAVE_NSA 1   /* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX  1   /* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT1   /* SEXT instruction */
+#define XCHAL_HAVE_CLAMPS  1   /* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16   1   /* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32   1   /* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH  0   /* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32   0   /* QUOS/QUOU/REMS/REMU 
instructions */
+#define XCHAL_HAVE_L32R1   /* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS   1   /* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16 0   /* CONST16 instruction */
+#define XCHAL_HAVE_ADDX1   /* ADDX#/SUBX# 
instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES   0   /* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES  0   /* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12  1   /* (obsolete option) */
+#define XCHAL_HAVE_ABS 1   /* ABS instruction */
+/*#define XCHAL_HAVE_POPC  0*/ /* POPC instruction */
+/*#define XCHAL_HAVE_CRC   0*/ /* CRC instruction */
+#define XCHAL_HAVE_RELEASE_SYNC1   /* L32AI/S32RI 
instructions */
+#define XCHAL_HAVE_S32C1I  1   /* S32C1I instruction */
+#define XCHAL_HAVE_SPECULATION 0   /* speculation */
+#define XCHAL_HAVE_FULL_RESET  1   /* all regs/state reset */
+#define XCHAL_NUM_CONTEXTS 1   /* */
+#define XCHAL_NUM_MISC_REGS2   /* num of scratch regs (0..4) */
+#define XCHAL_HAVE_TAP_MASTER  0   /* JTAG TAP control instr's */
+#define XCHAL_HAVE_PRID1   /* processor ID 
register */
+#define XCHAL_HAVE_EXTERN_REGS 1   /* WER/RER instructions */
+#define XCHAL_HAVE_MP_INTERRUPTS   1   /* interrupt distributor port */
+#define XCHAL_HAVE_MP_RUNSTALL 1   /* core RunStall control port */
+#define XCHAL_HAVE_THREADPTR   1   /* THREADPTR register */
+#define XCHAL_HAVE_BOOLEANS1   /* boolean registers */
+#define XCHAL_HAVE_CP  1   /* CPENABLE reg (coprocessor) */
+#define XCHAL_CP_MAXCFG   

[PATCH 2/2] xtensa: add test_kc705_hifi variant

2016-03-13 Thread Max Filippov
This variant has HiFi3 coprocessor and is used in sample audio-enabled
configuration.

Signed-off-by: Max Filippov 
---
 .../test_kc705_hifi/include/variant/core.h | 532 +
 .../test_kc705_hifi/include/variant/tie-asm.h  | 329 +
 .../variants/test_kc705_hifi/include/variant/tie.h | 190 
 3 files changed, 1051 insertions(+)
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
 create mode 100644 
arch/xtensa/variants/test_kc705_hifi/include/variant/tie-asm.h
 create mode 100644 arch/xtensa/variants/test_kc705_hifi/include/variant/tie.h

diff --git a/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h 
b/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
new file mode 100644
index 000..1ed2cb8
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_hifi/include/variant/core.h
@@ -0,0 +1,532 @@
+/* 
+ * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa
+ * processor CORE configuration
+ *
+ *  See , which includes this file, for more details.
+ */
+
+/* Xtensa processor core configuration information.
+
+   Copyright (c) 1999-2014 Tensilica Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/
+   Parameters Useful for Any Code, USER or PRIVILEGED
+ /
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*--
+   ISA
+  --*/
+
+#define XCHAL_HAVE_BE  0   /* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED1   /* windowed registers option */
+#define XCHAL_NUM_AREGS32  /* num of physical addr 
regs */
+#define XCHAL_NUM_AREGS_LOG2   5   /* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE 8   /* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG   1   /* debug option */
+#define XCHAL_HAVE_DENSITY 1   /* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS   1   /* zero-overhead loops */
+#define XCHAL_LOOP_BUFFER_SIZE 0   /* zero-ov. loop instr buffer 
size */
+#define XCHAL_HAVE_NSA 1   /* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX  1   /* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT1   /* SEXT instruction */
+#define XCHAL_HAVE_CLAMPS  1   /* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16   1   /* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32   1   /* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH  1   /* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32   1   /* QUOS/QUOU/REMS/REMU 
instructions */
+#define XCHAL_HAVE_L32R1   /* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS   0   /* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16 0   /* CONST16 instruction */
+#define XCHAL_HAVE_ADDX1   /* ADDX#/SUBX# 
instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES   0   /* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES  0   /* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12  1   /* (obsolete option) */
+#define XCHAL_HAVE_ABS 1   /* ABS instruction */
+/*#define XCHAL_HAVE_POPC  0*/ /* POPC instruction */
+/*#define 

Re: [PATCH v1 09/19] zsmalloc: keep max_object in size_class

2016-03-13 Thread Minchan Kim
On Sat, Mar 12, 2016 at 09:44:48AM +0800, xuyiping wrote:
> 
> 
> On 2016/3/11 15:30, Minchan Kim wrote:
> >Every zspage in a size_class has same number of max objects so
> >we could move it to a size_class.
> >
> >Signed-off-by: Minchan Kim 
> >---
> >  mm/zsmalloc.c | 29 ++---
> >  1 file changed, 14 insertions(+), 15 deletions(-)
> >
> >diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> >index b4fb11831acb..ca663c82c1fc 100644
> >--- a/mm/zsmalloc.c
> >+++ b/mm/zsmalloc.c
> >@@ -32,8 +32,6 @@
> >   * page->freelist: points to the first free object in zspage.
> >   * Free objects are linked together using in-place
> >   * metadata.
> >- *  page->objects: maximum number of objects we can store in this
> >- *  zspage (class->zspage_order * PAGE_SIZE / class->size)
> >   * page->lru: links together first pages of various zspages.
> >   * Basically forming list of zspages in a fullness group.
> >   * page->mapping: class index and fullness group of the zspage
> >@@ -211,6 +209,7 @@ struct size_class {
> >  * of ZS_ALIGN.
> >  */
> > int size;
> >+int objs_per_zspage;
> > unsigned int index;
> >
> > struct zs_size_stat stats;
> >@@ -622,21 +621,22 @@ static inline void zs_pool_stat_destroy(struct zs_pool 
> >*pool)
> >   * the pool (not yet implemented). This function returns fullness
> >   * status of the given page.
> >   */
> >-static enum fullness_group get_fullness_group(struct page *first_page)
> >+static enum fullness_group get_fullness_group(struct size_class *class,
> >+struct page *first_page)
> >  {
> >-int inuse, max_objects;
> >+int inuse, objs_per_zspage;
> > enum fullness_group fg;
> >
> > VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >
> > inuse = first_page->inuse;
> >-max_objects = first_page->objects;
> >+objs_per_zspage = class->objs_per_zspage;
> >
> > if (inuse == 0)
> > fg = ZS_EMPTY;
> >-else if (inuse == max_objects)
> >+else if (inuse == objs_per_zspage)
> > fg = ZS_FULL;
> >-else if (inuse <= 3 * max_objects / fullness_threshold_frac)
> >+else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
> > fg = ZS_ALMOST_EMPTY;
> > else
> > fg = ZS_ALMOST_FULL;
> >@@ -723,7 +723,7 @@ static enum fullness_group fix_fullness_group(struct 
> >size_class *class,
> > enum fullness_group currfg, newfg;
> >
> > get_zspage_mapping(first_page, _idx, );
> >-newfg = get_fullness_group(first_page);
> >+newfg = get_fullness_group(class, first_page);
> > if (newfg == currfg)
> > goto out;
> >
> >@@ -1003,9 +1003,6 @@ static struct page *alloc_zspage(struct size_class 
> >*class, gfp_t flags)
> > init_zspage(class, first_page);
> >
> > first_page->freelist = location_to_obj(first_page, 0);
> >-/* Maximum number of objects we can store in this zspage */
> >-first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
> >-
> > error = 0; /* Success */
> >
> >  cleanup:
> >@@ -1235,11 +1232,11 @@ static bool can_merge(struct size_class *prev, int 
> >size, int pages_per_zspage)
> > return true;
> >  }
> >
> >-static bool zspage_full(struct page *first_page)
> >+static bool zspage_full(struct size_class *class, struct page *first_page)
> >  {
> > VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >
> >-return first_page->inuse == first_page->objects;
> >+return first_page->inuse == class->objs_per_zspage;
> >  }
> >
> >  unsigned long zs_get_total_pages(struct zs_pool *pool)
> >@@ -1625,7 +1622,7 @@ static int migrate_zspage(struct zs_pool *pool, struct 
> >size_class *class,
> > }
> >
> > /* Stop if there is no more space */
> >-if (zspage_full(d_page)) {
> >+if (zspage_full(class, d_page)) {
> > unpin_tag(handle);
> > ret = -ENOMEM;
> > break;
> >@@ -1684,7 +1681,7 @@ static enum fullness_group putback_zspage(struct 
> >zs_pool *pool,
> >  {
> > enum fullness_group fullness;
> >
> >-fullness = get_fullness_group(first_page);
> >+fullness = get_fullness_group(class, first_page);
> > insert_zspage(class, fullness, first_page);
> > set_zspage_mapping(first_page, class->index, fullness);
> >
> >@@ -1933,6 +1930,8 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t 
> >flags)
> > class->size = size;
> > class->index = i;
> > class->pages_per_zspage = pages_per_zspage;
> >+class->objs_per_zspage = class->pages_per_zspage *
> >+PAGE_SIZE / class->size;
> > if (pages_per_zspage == 1 &&
> > get_maxobj_per_zspage(size, pages_per_zspage) == 1)
> > class->huge = true;
> 
>   computes the 

Re: [PATCH v1 09/19] zsmalloc: keep max_object in size_class

2016-03-13 Thread Minchan Kim
On Sat, Mar 12, 2016 at 09:44:48AM +0800, xuyiping wrote:
> 
> 
> On 2016/3/11 15:30, Minchan Kim wrote:
> >Every zspage in a size_class has same number of max objects so
> >we could move it to a size_class.
> >
> >Signed-off-by: Minchan Kim 
> >---
> >  mm/zsmalloc.c | 29 ++---
> >  1 file changed, 14 insertions(+), 15 deletions(-)
> >
> >diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> >index b4fb11831acb..ca663c82c1fc 100644
> >--- a/mm/zsmalloc.c
> >+++ b/mm/zsmalloc.c
> >@@ -32,8 +32,6 @@
> >   * page->freelist: points to the first free object in zspage.
> >   * Free objects are linked together using in-place
> >   * metadata.
> >- *  page->objects: maximum number of objects we can store in this
> >- *  zspage (class->zspage_order * PAGE_SIZE / class->size)
> >   * page->lru: links together first pages of various zspages.
> >   * Basically forming list of zspages in a fullness group.
> >   * page->mapping: class index and fullness group of the zspage
> >@@ -211,6 +209,7 @@ struct size_class {
> >  * of ZS_ALIGN.
> >  */
> > int size;
> >+int objs_per_zspage;
> > unsigned int index;
> >
> > struct zs_size_stat stats;
> >@@ -622,21 +621,22 @@ static inline void zs_pool_stat_destroy(struct zs_pool 
> >*pool)
> >   * the pool (not yet implemented). This function returns fullness
> >   * status of the given page.
> >   */
> >-static enum fullness_group get_fullness_group(struct page *first_page)
> >+static enum fullness_group get_fullness_group(struct size_class *class,
> >+struct page *first_page)
> >  {
> >-int inuse, max_objects;
> >+int inuse, objs_per_zspage;
> > enum fullness_group fg;
> >
> > VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >
> > inuse = first_page->inuse;
> >-max_objects = first_page->objects;
> >+objs_per_zspage = class->objs_per_zspage;
> >
> > if (inuse == 0)
> > fg = ZS_EMPTY;
> >-else if (inuse == max_objects)
> >+else if (inuse == objs_per_zspage)
> > fg = ZS_FULL;
> >-else if (inuse <= 3 * max_objects / fullness_threshold_frac)
> >+else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
> > fg = ZS_ALMOST_EMPTY;
> > else
> > fg = ZS_ALMOST_FULL;
> >@@ -723,7 +723,7 @@ static enum fullness_group fix_fullness_group(struct 
> >size_class *class,
> > enum fullness_group currfg, newfg;
> >
> > get_zspage_mapping(first_page, _idx, );
> >-newfg = get_fullness_group(first_page);
> >+newfg = get_fullness_group(class, first_page);
> > if (newfg == currfg)
> > goto out;
> >
> >@@ -1003,9 +1003,6 @@ static struct page *alloc_zspage(struct size_class 
> >*class, gfp_t flags)
> > init_zspage(class, first_page);
> >
> > first_page->freelist = location_to_obj(first_page, 0);
> >-/* Maximum number of objects we can store in this zspage */
> >-first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
> >-
> > error = 0; /* Success */
> >
> >  cleanup:
> >@@ -1235,11 +1232,11 @@ static bool can_merge(struct size_class *prev, int 
> >size, int pages_per_zspage)
> > return true;
> >  }
> >
> >-static bool zspage_full(struct page *first_page)
> >+static bool zspage_full(struct size_class *class, struct page *first_page)
> >  {
> > VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
> >
> >-return first_page->inuse == first_page->objects;
> >+return first_page->inuse == class->objs_per_zspage;
> >  }
> >
> >  unsigned long zs_get_total_pages(struct zs_pool *pool)
> >@@ -1625,7 +1622,7 @@ static int migrate_zspage(struct zs_pool *pool, struct 
> >size_class *class,
> > }
> >
> > /* Stop if there is no more space */
> >-if (zspage_full(d_page)) {
> >+if (zspage_full(class, d_page)) {
> > unpin_tag(handle);
> > ret = -ENOMEM;
> > break;
> >@@ -1684,7 +1681,7 @@ static enum fullness_group putback_zspage(struct 
> >zs_pool *pool,
> >  {
> > enum fullness_group fullness;
> >
> >-fullness = get_fullness_group(first_page);
> >+fullness = get_fullness_group(class, first_page);
> > insert_zspage(class, fullness, first_page);
> > set_zspage_mapping(first_page, class->index, fullness);
> >
> >@@ -1933,6 +1930,8 @@ struct zs_pool *zs_create_pool(const char *name, gfp_t 
> >flags)
> > class->size = size;
> > class->index = i;
> > class->pages_per_zspage = pages_per_zspage;
> >+class->objs_per_zspage = class->pages_per_zspage *
> >+PAGE_SIZE / class->size;
> > if (pages_per_zspage == 1 &&
> > get_maxobj_per_zspage(size, pages_per_zspage) == 1)
> > class->huge = true;
> 
>   computes the "objs_per_zspage" twice 

Linux 4.5

2016-03-13 Thread Linus Torvalds
So this is later on a Sunday than my usual schedule, because I just
couldn't make up my mind whether I should do another rc8 or not, and
kept just waffling about it. In the end, I obviously decided not to,
but it could have gone either way.

We did have one nasty regression that got fixed yesterday, and the
networking pull early in the week was larger than I would have wished
for. But the block layer should be all good now, and David went
through all his networking commits an extra time just to make me feel
comfy about it, so in the end I didn't see any point to making the
release cycle any longer than usual.

And on the whole, everything here is pretty small. The diffstat looks
a bit larger for an xfs fix, because that fix has three cleanup
refactoring patches that precedes it. And there's a access type
pattern fix in the sound layer that generated lots of noise, but is
all very simple in the end.

In addition to the above, there's random small fixes all over -
shortlog appended for people who want to skim the details as usual.

Go test, and obviously with 4.5 released, I'll start the merge window for 4.6.

   Linus

---
Aaro Koskinen (1):
  MIPS: Fix build with DEBUG_ZBOOT and MACH_JZ4780

Al Viro (2):
  ncpfs: fix a braino in OOM handling in ncp_fill_cache()
  jffs2: reduce the breakage on recovery from halfway failed rename()

Alan Cox (1):
  ASoC: Intel: Skylake: fix pointer scaling

Alex Deucher (3):
  drm/radeon/dp: add back special handling for NUTMEG
  drm/amdgpu/dp: add back special handling for NUTMEG
  Revert "drm/radeon/pm: adjust display configuration after powerstate"

Alexandre Belloni (2):
  phy: micrel: Ensure interrupts are reenabled on resume
  phy: micrel: Disable auto negotiation on startup

Andreas Irestål (1):
  ASoC: adau17x1: Fix incorrect BCLK ratio definitions

Andy Lutomirski (1):
  x86/fpu: Fix 'no387' regression

Anton Bondarenko (1):
  spi: imx: allow only WML aligned transfers to use DMA

Ard Biesheuvel (2):
  arm64: account for sparsemem section alignment when choosing
vmemmap offset
  memremap: check pfn validity before passing to pfn_to_page()

Arend van Spriel (1):
  cfg80211: stop critical protocol session upon disconnect event

Arnd Bergmann (2):
  ASoC: trace: fix printing jack name
  ssb: host_soc depends on sprom

Atsushi Nemoto (1):
  gianfar: Enable eTSEC-106 erratum w/a for MPC8548E Rev2

Benjamin Poirier (1):
  mld, igmp: Fix reserved tailroom calculation

Bernie Harris (1):
  tunnel: Clear IPCB(skb)->opt before dst_link_failure called

Bill Sommerfeld (1):
  udp6: fix UDP/IPv6 encap resubmit path

Bjørn Mork (3):
  qmi_wwan: add Sierra Wireless EM74xx device ID
  cdc_ncm: toggle altsetting to force reset before setup
  cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind

Bob Moore (1):
  ACPICA: Revert "Parser: Fix for SuperName method invocation"

Boris BREZILLON (1):
  MAINTAINERS: add a maintainer for the NAND subsystem

Borislav Petkov (2):
  x86/delay: Avoid preemptible context checks in delay_mwaitx()
  x86/fpu: Fix eager-FPU handling on legacy FPU machines

Brian Foster (4):
  xfs: separate log head record discovery from verification
  xfs: refactor unmount record detection into helper
  xfs: refactor in-core log state update to helper
  xfs: only run torn log write detection on dirty logs

Carlo Caione (1):
  ASoC: cht_bsw_rt5645: Fix writing to string literal

Charles Keepax (1):
  ASoC: samsung: Use IRQ safe spin lock calls

Chris Bainbridge (1):
  mac80211: fix use of uninitialised values in RX aggregation

Christian Borntraeger (1):
  s390/cpumf: Fix lpp detection

Chun-Hao Lin (1):
  r8169:fix "rtl_counters_cond == 1 (loop: 1000, delay: 10)" log spam.

Chunhao Lin (1):
  r8169: Enable RX_MULTI_EN for RTL_GIGA_MAC_VER_41~48

Colin Ian King (3):
  asix: do not free array priv->mdio->irq
  net/ethoc: do not free array priv->mdio->irq
  net: eth: altera: do not free array priv->mdio->irq

Dan Carpenter (1):
  net: moxa: fix an error code

Dan Williams (2):
  list: kill list_force_poison()
  mm: fix mixed zone detection in devm_memremap_pages

Daniel Borkmann (2):
  bpf: fix csum setting for bpf_set_tunnel_key
  vxlan: fix missing options_len update on RX with collect metadata

David Ahern (1):
  net: vrf: Remove direct access to skb->data

David Hildenbrand (1):
  KVM: s390: correct fprs on SIGP (STOP AND) STORE STATUS

David Jander (1):
  gpu: ipu-v3: Reset IPU before activating IRQ

David Matlack (1):
  kvm: cap halt polling at exactly halt_poll_ns

Diego Viola (1):
  net: jme: fix suspend/resume on JMC260

Douglas Miller (1):
  be2net: Don't leak iomapped memory on removal.

Ed Spiridonov (1):
  can: mcp251x: avoid write to error flag register if it's unnecessary

Enrico Jorns (1):
  drm/imx: Add missing 

Linux 4.5

2016-03-13 Thread Linus Torvalds
So this is later on a Sunday than my usual schedule, because I just
couldn't make up my mind whether I should do another rc8 or not, and
kept just waffling about it. In the end, I obviously decided not to,
but it could have gone either way.

We did have one nasty regression that got fixed yesterday, and the
networking pull early in the week was larger than I would have wished
for. But the block layer should be all good now, and David went
through all his networking commits an extra time just to make me feel
comfy about it, so in the end I didn't see any point to making the
release cycle any longer than usual.

And on the whole, everything here is pretty small. The diffstat looks
a bit larger for an xfs fix, because that fix has three cleanup
refactoring patches that precedes it. And there's a access type
pattern fix in the sound layer that generated lots of noise, but is
all very simple in the end.

In addition to the above, there's random small fixes all over -
shortlog appended for people who want to skim the details as usual.

Go test, and obviously with 4.5 released, I'll start the merge window for 4.6.

   Linus

---
Aaro Koskinen (1):
  MIPS: Fix build with DEBUG_ZBOOT and MACH_JZ4780

Al Viro (2):
  ncpfs: fix a braino in OOM handling in ncp_fill_cache()
  jffs2: reduce the breakage on recovery from halfway failed rename()

Alan Cox (1):
  ASoC: Intel: Skylake: fix pointer scaling

Alex Deucher (3):
  drm/radeon/dp: add back special handling for NUTMEG
  drm/amdgpu/dp: add back special handling for NUTMEG
  Revert "drm/radeon/pm: adjust display configuration after powerstate"

Alexandre Belloni (2):
  phy: micrel: Ensure interrupts are reenabled on resume
  phy: micrel: Disable auto negotiation on startup

Andreas Irestål (1):
  ASoC: adau17x1: Fix incorrect BCLK ratio definitions

Andy Lutomirski (1):
  x86/fpu: Fix 'no387' regression

Anton Bondarenko (1):
  spi: imx: allow only WML aligned transfers to use DMA

Ard Biesheuvel (2):
  arm64: account for sparsemem section alignment when choosing
vmemmap offset
  memremap: check pfn validity before passing to pfn_to_page()

Arend van Spriel (1):
  cfg80211: stop critical protocol session upon disconnect event

Arnd Bergmann (2):
  ASoC: trace: fix printing jack name
  ssb: host_soc depends on sprom

Atsushi Nemoto (1):
  gianfar: Enable eTSEC-106 erratum w/a for MPC8548E Rev2

Benjamin Poirier (1):
  mld, igmp: Fix reserved tailroom calculation

Bernie Harris (1):
  tunnel: Clear IPCB(skb)->opt before dst_link_failure called

Bill Sommerfeld (1):
  udp6: fix UDP/IPv6 encap resubmit path

Bjørn Mork (3):
  qmi_wwan: add Sierra Wireless EM74xx device ID
  cdc_ncm: toggle altsetting to force reset before setup
  cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind

Bob Moore (1):
  ACPICA: Revert "Parser: Fix for SuperName method invocation"

Boris BREZILLON (1):
  MAINTAINERS: add a maintainer for the NAND subsystem

Borislav Petkov (2):
  x86/delay: Avoid preemptible context checks in delay_mwaitx()
  x86/fpu: Fix eager-FPU handling on legacy FPU machines

Brian Foster (4):
  xfs: separate log head record discovery from verification
  xfs: refactor unmount record detection into helper
  xfs: refactor in-core log state update to helper
  xfs: only run torn log write detection on dirty logs

Carlo Caione (1):
  ASoC: cht_bsw_rt5645: Fix writing to string literal

Charles Keepax (1):
  ASoC: samsung: Use IRQ safe spin lock calls

Chris Bainbridge (1):
  mac80211: fix use of uninitialised values in RX aggregation

Christian Borntraeger (1):
  s390/cpumf: Fix lpp detection

Chun-Hao Lin (1):
  r8169:fix "rtl_counters_cond == 1 (loop: 1000, delay: 10)" log spam.

Chunhao Lin (1):
  r8169: Enable RX_MULTI_EN for RTL_GIGA_MAC_VER_41~48

Colin Ian King (3):
  asix: do not free array priv->mdio->irq
  net/ethoc: do not free array priv->mdio->irq
  net: eth: altera: do not free array priv->mdio->irq

Dan Carpenter (1):
  net: moxa: fix an error code

Dan Williams (2):
  list: kill list_force_poison()
  mm: fix mixed zone detection in devm_memremap_pages

Daniel Borkmann (2):
  bpf: fix csum setting for bpf_set_tunnel_key
  vxlan: fix missing options_len update on RX with collect metadata

David Ahern (1):
  net: vrf: Remove direct access to skb->data

David Hildenbrand (1):
  KVM: s390: correct fprs on SIGP (STOP AND) STORE STATUS

David Jander (1):
  gpu: ipu-v3: Reset IPU before activating IRQ

David Matlack (1):
  kvm: cap halt polling at exactly halt_poll_ns

Diego Viola (1):
  net: jme: fix suspend/resume on JMC260

Douglas Miller (1):
  be2net: Don't leak iomapped memory on removal.

Ed Spiridonov (1):
  can: mcp251x: avoid write to error flag register if it's unnecessary

Enrico Jorns (1):
  drm/imx: Add missing 

[PATCH 03/22] ncr5380: Remove REAL_DMA and REAL_DMA_POLL macros

2016-03-13 Thread Finn Thain
For the NCR5380.c core driver, these macros are never used.
If REAL_DMA were to be defined, compilation would fail.

For the atari_NCR5380.c core driver, REAL_DMA is always defined.

Hence these macros are pointless.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |  218 +--
 drivers/scsi/NCR5380.h   |  112 --
 drivers/scsi/atari_NCR5380.c |   62 +---
 drivers/scsi/atari_scsi.c|   32 --
 drivers/scsi/sun3_scsi.c |   13 --
 5 files changed, 22 insertions(+), 415 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:22.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:23.0 +1100
@@ -35,18 +35,10 @@
  * code so that everything does the same thing that's done at the
  * end of a pseudo-DMA read operation.
  *
- * 2.  Fix REAL_DMA (interrupt driven, polled works fine) -
- * basically, transfer size needs to be reduced by one
- * and the last byte read as is done with PSEUDO_DMA.
- *
  * 4.  Test SCSI-II tagged queueing (I have no devices which support
  * tagged queueing)
  */
 
-#ifndef notyet
-#undef REAL_DMA
-#endif
-
 #ifdef BOARD_REQUIRES_NO_DELAY
 #define io_recovery_delay(x)
 #else
@@ -131,12 +123,6 @@
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
- * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
- *
- * REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- * rely on phase mismatch and EOP interrupts to determine end
- * of phase.
- *
  * These macros MUST be defined :
  *
  * NCR5380_read(register)  - read from the specified register
@@ -147,15 +133,9 @@
  * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * REAL functions :
- * NCR5380_REAL_DMA should be defined if real DMA is to be used.
  * Note that the DMA setup functions should return the number of bytes
  * that they were able to program the controller for.
  *
- * Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
- *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_dma_residual(instance); - residual count
@@ -486,12 +466,6 @@ static void prepare_info(struct Scsi_Hos
 #ifdef DIFFERENTIAL
 "DIFFERENTIAL "
 #endif
-#ifdef REAL_DMA
-"REAL_DMA "
-#endif
-#ifdef REAL_DMA_POLL
-"REAL_DMA_POLL "
-#endif
 #ifdef PARITY
 "PARITY "
 #endif
@@ -551,9 +525,8 @@ static int NCR5380_init(struct Scsi_Host
hostdata->id_higher_mask |= i;
for (i = 0; i < 8; ++i)
hostdata->busy[i] = 0;
-#ifdef REAL_DMA
-   hostdata->dmalen = 0;
-#endif
+   hostdata->dma_len = 0;
+
spin_lock_init(>lock);
hostdata->connected = NULL;
hostdata->sensing = NULL;
@@ -850,11 +823,7 @@ static void NCR5380_main(struct work_str
requeue_cmd(instance, cmd);
}
}
-   if (hostdata->connected
-#ifdef REAL_DMA
-   && !hostdata->dmalen
-#endif
-   ) {
+   if (hostdata->connected && !hostdata->dma_len) {
dsprintk(NDEBUG_MAIN, instance, "main: performing 
information transfer\n");
NCR5380_information_transfer(instance);
done = 0;
@@ -919,34 +888,6 @@ static irqreturn_t NCR5380_intr(int irq,
dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 
0x%02x, MR 0x%02x\n",
 irq, basr, sr, mr);
 
-#if defined(REAL_DMA)
-   if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
-   /* Probably End of DMA, Phase Mismatch or Loss of BSY.
-* We ack IRQ after clearing Mode Register. Workarounds
-* for End of DMA errata need to happen in DMA Mode.
-*/
-
-   dsprintk(NDEBUG_INTR, instance, "interrupt in DMA 
mode\n");
-
-   int transferred;
-
-   if (!hostdata->connected)
-   panic("scsi%d : DMA interrupt with no connected 
cmd\n",
- instance->hostno);
-
-   transferred = hostdata->dmalen - 
NCR5380_dma_residual(instance);
-   hostdata->connected->SCp.this_residual -= transferred;
-   hostdata->connected->SCp.ptr += transferred;
-   hostdata->dmalen = 0;
-
-   /* FIXME: we need to poll briefly then defer a 

[PATCH 03/22] ncr5380: Remove REAL_DMA and REAL_DMA_POLL macros

2016-03-13 Thread Finn Thain
For the NCR5380.c core driver, these macros are never used.
If REAL_DMA were to be defined, compilation would fail.

For the atari_NCR5380.c core driver, REAL_DMA is always defined.

Hence these macros are pointless.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |  218 +--
 drivers/scsi/NCR5380.h   |  112 --
 drivers/scsi/atari_NCR5380.c |   62 +---
 drivers/scsi/atari_scsi.c|   32 --
 drivers/scsi/sun3_scsi.c |   13 --
 5 files changed, 22 insertions(+), 415 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:22.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:23.0 +1100
@@ -35,18 +35,10 @@
  * code so that everything does the same thing that's done at the
  * end of a pseudo-DMA read operation.
  *
- * 2.  Fix REAL_DMA (interrupt driven, polled works fine) -
- * basically, transfer size needs to be reduced by one
- * and the last byte read as is done with PSEUDO_DMA.
- *
  * 4.  Test SCSI-II tagged queueing (I have no devices which support
  * tagged queueing)
  */
 
-#ifndef notyet
-#undef REAL_DMA
-#endif
-
 #ifdef BOARD_REQUIRES_NO_DELAY
 #define io_recovery_delay(x)
 #else
@@ -131,12 +123,6 @@
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
- * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
- *
- * REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- * rely on phase mismatch and EOP interrupts to determine end
- * of phase.
- *
  * These macros MUST be defined :
  *
  * NCR5380_read(register)  - read from the specified register
@@ -147,15 +133,9 @@
  * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * REAL functions :
- * NCR5380_REAL_DMA should be defined if real DMA is to be used.
  * Note that the DMA setup functions should return the number of bytes
  * that they were able to program the controller for.
  *
- * Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
- *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_dma_residual(instance); - residual count
@@ -486,12 +466,6 @@ static void prepare_info(struct Scsi_Hos
 #ifdef DIFFERENTIAL
 "DIFFERENTIAL "
 #endif
-#ifdef REAL_DMA
-"REAL_DMA "
-#endif
-#ifdef REAL_DMA_POLL
-"REAL_DMA_POLL "
-#endif
 #ifdef PARITY
 "PARITY "
 #endif
@@ -551,9 +525,8 @@ static int NCR5380_init(struct Scsi_Host
hostdata->id_higher_mask |= i;
for (i = 0; i < 8; ++i)
hostdata->busy[i] = 0;
-#ifdef REAL_DMA
-   hostdata->dmalen = 0;
-#endif
+   hostdata->dma_len = 0;
+
spin_lock_init(>lock);
hostdata->connected = NULL;
hostdata->sensing = NULL;
@@ -850,11 +823,7 @@ static void NCR5380_main(struct work_str
requeue_cmd(instance, cmd);
}
}
-   if (hostdata->connected
-#ifdef REAL_DMA
-   && !hostdata->dmalen
-#endif
-   ) {
+   if (hostdata->connected && !hostdata->dma_len) {
dsprintk(NDEBUG_MAIN, instance, "main: performing 
information transfer\n");
NCR5380_information_transfer(instance);
done = 0;
@@ -919,34 +888,6 @@ static irqreturn_t NCR5380_intr(int irq,
dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 
0x%02x, MR 0x%02x\n",
 irq, basr, sr, mr);
 
-#if defined(REAL_DMA)
-   if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
-   /* Probably End of DMA, Phase Mismatch or Loss of BSY.
-* We ack IRQ after clearing Mode Register. Workarounds
-* for End of DMA errata need to happen in DMA Mode.
-*/
-
-   dsprintk(NDEBUG_INTR, instance, "interrupt in DMA 
mode\n");
-
-   int transferred;
-
-   if (!hostdata->connected)
-   panic("scsi%d : DMA interrupt with no connected 
cmd\n",
- instance->hostno);
-
-   transferred = hostdata->dmalen - 
NCR5380_dma_residual(instance);
-   hostdata->connected->SCp.this_residual -= transferred;
-   hostdata->connected->SCp.ptr += transferred;
-   hostdata->dmalen = 0;
-
-   /* FIXME: we need to poll briefly then defer a 
workqueue task ! */
-   

[PATCH 0/4] intel_idle: Improve MSR fixup resume handling

2016-03-13 Thread Andy Lutomirski
I can't usefully test the fourth patch.  I'm reasonably confident
that the other three work, though, and I tested them on a laptop
that doesn't preserve the C1E auto-promotion flag across
suspend/resume.

Andy Lutomirski (4):
  intel_idle: Consolidate auto-promotion/auto-demotion fixups
  intel_idle: Remove a broadcast MSR fixup at boot
  intel_idle: Fix MSRs after resume
  intel_idle: Move BYT/CHT auto-demotion fixup into fix_this_cpu

 drivers/idle/intel_idle.c | 56 ---
 1 file changed, 29 insertions(+), 27 deletions(-)

-- 
2.5.0



[PATCH 2/4] intel_idle: Remove a broadcast MSR fixup at boot

2016-03-13 Thread Andy Lutomirski
intel_idle already fixes MSRs on each CPU when it registers for that
CPU, so it doesn't need to separately broadcast to all CPUs on
startup.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 32b3e6049994..338df09ad60b 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1079,8 +1079,6 @@ static int __init intel_idle_cpuidle_driver_init(void)
wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
 
-   on_each_cpu(fix_this_cpu, NULL, 1);
-
return 0;
 }
 
-- 
2.5.0



[PATCH 04/22] atari_NCR5380: Remove DMA_MIN_SIZE macro

2016-03-13 Thread Finn Thain
Only the atari_scsi and sun3_scsi drivers define DMA_MIN_SIZE.
Both drivers also define NCR5380_dma_xfer_len, which means
DMA_MIN_SIZE is redundant.

This removes another discrepancy between the two core drivers.

Signed-off-by: Finn Thain 

---
 drivers/scsi/atari_NCR5380.c |   16 
 drivers/scsi/atari_scsi.c|4 +++-
 drivers/scsi/sun3_scsi.c |   16 ++--
 3 files changed, 17 insertions(+), 19 deletions(-)

Index: linux/drivers/scsi/atari_NCR5380.c
===
--- linux.orig/drivers/scsi/atari_NCR5380.c 2016-03-14 15:26:23.0 
+1100
+++ linux/drivers/scsi/atari_NCR5380.c  2016-03-14 15:26:26.0 +1100
@@ -1857,12 +1857,11 @@ static void NCR5380_information_transfer
d = cmd->SCp.ptr;
}
/* this command setup for dma yet? */
-   if ((count >= DMA_MIN_SIZE) && 
(sun3_dma_setup_done != cmd)) {
-   if (cmd->request->cmd_type == 
REQ_TYPE_FS) {
-   sun3scsi_dma_setup(instance, d, 
count,
-  
rq_data_dir(cmd->request));
-   sun3_dma_setup_done = cmd;
-   }
+   if (sun3_dma_setup_done != cmd &&
+   sun3scsi_dma_xfer_len(count, cmd) > 0) {
+   sun3scsi_dma_setup(instance, d, count,
+  
rq_data_dir(cmd->request));
+   sun3_dma_setup_done = cmd;
}
 #ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
@@ -1927,7 +1926,7 @@ static void NCR5380_information_transfer
 #endif
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
-   if (transfersize >= DMA_MIN_SIZE) {
+   if (transfersize > 0) {
len = transfersize;
cmd->SCp.phase = phase;
if (NCR5380_transfer_dma(instance, 
,
@@ -2366,7 +2365,8 @@ static void NCR5380_reselect(struct Scsi
d = tmp->SCp.ptr;
}
/* setup this command for dma if not already */
-   if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
+   if (sun3_dma_setup_done != tmp &&
+   sun3scsi_dma_xfer_len(count, tmp) > 0) {
sun3scsi_dma_setup(instance, d, count,
   rq_data_dir(tmp->request));
sun3_dma_setup_done = tmp;
Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:23.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:26.0 +1100
@@ -87,7 +87,6 @@
 
 #define SUPPORT_TAGS
 #define MAX_TAGS32
-#define DMA_MIN_SIZE32
 
 #define NCR5380_implementation_fields   /* none */
 
@@ -605,6 +604,9 @@ static unsigned long atari_dma_xfer_len(
 {
unsigned long   possible_len, limit;
 
+   if (wanted_len < 32)
+   return 0;
+
if (IS_A_TT())
/* TT SCSI DMA can transfer arbitrary #bytes */
return wanted_len;
Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:26.0 +1100
@@ -39,9 +39,6 @@
 /* Definitions for the core NCR5380 driver. */
 
 /* #define SUPPORT_TAGS */
-/* minimum number of bytes to do dma on */
-#define DMA_MIN_SIZE129
-
 /* #define MAX_TAGS 32 */
 
 #define NCR5380_implementation_fields   /* none */
@@ -61,7 +58,7 @@
 #define NCR5380_dma_residual(instance) \
 sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
-sun3scsi_dma_xfer_len(cmd->SCp.this_residual, cmd, !((phase) & SR_IO))
+sun3scsi_dma_xfer_len(cmd->SCp.this_residual, cmd)
 
 #define NCR5380_acquire_dma_irq(instance)(1)
 #define NCR5380_release_dma_irq(instance)
@@ -262,14 +259,13 @@ static inline unsigned long sun3scsi_dma
return last_residual;
 }
 
-static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted,
- struct scsi_cmnd *cmd,
- int write_flag)

[PATCH 2/4] intel_idle: Remove a broadcast MSR fixup at boot

2016-03-13 Thread Andy Lutomirski
intel_idle already fixes MSRs on each CPU when it registers for that
CPU, so it doesn't need to separately broadcast to all CPUs on
startup.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 32b3e6049994..338df09ad60b 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1079,8 +1079,6 @@ static int __init intel_idle_cpuidle_driver_init(void)
wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
 
-   on_each_cpu(fix_this_cpu, NULL, 1);
-
return 0;
 }
 
-- 
2.5.0



[PATCH 04/22] atari_NCR5380: Remove DMA_MIN_SIZE macro

2016-03-13 Thread Finn Thain
Only the atari_scsi and sun3_scsi drivers define DMA_MIN_SIZE.
Both drivers also define NCR5380_dma_xfer_len, which means
DMA_MIN_SIZE is redundant.

This removes another discrepancy between the two core drivers.

Signed-off-by: Finn Thain 

---
 drivers/scsi/atari_NCR5380.c |   16 
 drivers/scsi/atari_scsi.c|4 +++-
 drivers/scsi/sun3_scsi.c |   16 ++--
 3 files changed, 17 insertions(+), 19 deletions(-)

Index: linux/drivers/scsi/atari_NCR5380.c
===
--- linux.orig/drivers/scsi/atari_NCR5380.c 2016-03-14 15:26:23.0 
+1100
+++ linux/drivers/scsi/atari_NCR5380.c  2016-03-14 15:26:26.0 +1100
@@ -1857,12 +1857,11 @@ static void NCR5380_information_transfer
d = cmd->SCp.ptr;
}
/* this command setup for dma yet? */
-   if ((count >= DMA_MIN_SIZE) && 
(sun3_dma_setup_done != cmd)) {
-   if (cmd->request->cmd_type == 
REQ_TYPE_FS) {
-   sun3scsi_dma_setup(instance, d, 
count,
-  
rq_data_dir(cmd->request));
-   sun3_dma_setup_done = cmd;
-   }
+   if (sun3_dma_setup_done != cmd &&
+   sun3scsi_dma_xfer_len(count, cmd) > 0) {
+   sun3scsi_dma_setup(instance, d, count,
+  
rq_data_dir(cmd->request));
+   sun3_dma_setup_done = cmd;
}
 #ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
@@ -1927,7 +1926,7 @@ static void NCR5380_information_transfer
 #endif
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
-   if (transfersize >= DMA_MIN_SIZE) {
+   if (transfersize > 0) {
len = transfersize;
cmd->SCp.phase = phase;
if (NCR5380_transfer_dma(instance, 
,
@@ -2366,7 +2365,8 @@ static void NCR5380_reselect(struct Scsi
d = tmp->SCp.ptr;
}
/* setup this command for dma if not already */
-   if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
+   if (sun3_dma_setup_done != tmp &&
+   sun3scsi_dma_xfer_len(count, tmp) > 0) {
sun3scsi_dma_setup(instance, d, count,
   rq_data_dir(tmp->request));
sun3_dma_setup_done = tmp;
Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:23.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:26.0 +1100
@@ -87,7 +87,6 @@
 
 #define SUPPORT_TAGS
 #define MAX_TAGS32
-#define DMA_MIN_SIZE32
 
 #define NCR5380_implementation_fields   /* none */
 
@@ -605,6 +604,9 @@ static unsigned long atari_dma_xfer_len(
 {
unsigned long   possible_len, limit;
 
+   if (wanted_len < 32)
+   return 0;
+
if (IS_A_TT())
/* TT SCSI DMA can transfer arbitrary #bytes */
return wanted_len;
Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:26.0 +1100
@@ -39,9 +39,6 @@
 /* Definitions for the core NCR5380 driver. */
 
 /* #define SUPPORT_TAGS */
-/* minimum number of bytes to do dma on */
-#define DMA_MIN_SIZE129
-
 /* #define MAX_TAGS 32 */
 
 #define NCR5380_implementation_fields   /* none */
@@ -61,7 +58,7 @@
 #define NCR5380_dma_residual(instance) \
 sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
-sun3scsi_dma_xfer_len(cmd->SCp.this_residual, cmd, !((phase) & SR_IO))
+sun3scsi_dma_xfer_len(cmd->SCp.this_residual, cmd)
 
 #define NCR5380_acquire_dma_irq(instance)(1)
 #define NCR5380_release_dma_irq(instance)
@@ -262,14 +259,13 @@ static inline unsigned long sun3scsi_dma
return last_residual;
 }
 
-static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted,
- struct scsi_cmnd *cmd,
- int write_flag)
+static inline unsigned long 

[PATCH 0/4] intel_idle: Improve MSR fixup resume handling

2016-03-13 Thread Andy Lutomirski
I can't usefully test the fourth patch.  I'm reasonably confident
that the other three work, though, and I tested them on a laptop
that doesn't preserve the C1E auto-promotion flag across
suspend/resume.

Andy Lutomirski (4):
  intel_idle: Consolidate auto-promotion/auto-demotion fixups
  intel_idle: Remove a broadcast MSR fixup at boot
  intel_idle: Fix MSRs after resume
  intel_idle: Move BYT/CHT auto-demotion fixup into fix_this_cpu

 drivers/idle/intel_idle.c | 56 ---
 1 file changed, 29 insertions(+), 27 deletions(-)

-- 
2.5.0



[PATCH 3/4] intel_idle: Fix MSRs after resume

2016-03-13 Thread Andy Lutomirski
Firmware that enables auto-promotion / auto-demotion flags we don't
like will probably re-enable them after suspend/resume.  Disable
them again after resume so they stay fixed.

I've seen this on my Dell XPS 13 9350.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 338df09ad60b..e3d7d8bbc843 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -61,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1026,6 +1027,15 @@ void intel_idle_state_table_update(void)
return;
 }
 
+static void intel_idle_resume(void)
+{
+   on_each_cpu(fix_this_cpu, NULL, 1);
+}
+
+static struct syscore_ops intel_idle_syscore_ops = {
+   .resume = intel_idle_resume,
+};
+
 /*
  * intel_idle_cpuidle_driver_init()
  * allocate, initialize cpuidle_states
@@ -1119,6 +1129,7 @@ static int __init intel_idle_init(void)
if (retval)
return retval;
 
+   register_syscore_ops(_idle_syscore_ops);
intel_idle_cpuidle_driver_init();
retval = cpuidle_register_driver(_idle_driver);
if (retval) {
@@ -1153,6 +1164,7 @@ static void __exit intel_idle_exit(void)
 {
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(_idle_driver);
+   unregister_syscore_ops(_idle_syscore_ops);
 
cpu_notifier_register_begin();
 
-- 
2.5.0



[PATCH 01/22] g_ncr5380: Remove CONFIG_SCSI_GENERIC_NCR53C400

2016-03-13 Thread Finn Thain
This change brings a number of improvements: fewer macros, better test
coverage, simpler code and sane Kconfig options. The downside is a small
chance of incompatibility (which seems unavoidable).

CONFIG_SCSI_GENERIC_NCR53C400 exists to enable or inhibit pseudo DMA
transfers when the driver is used with 53C400-compatible cards. Thanks to
Ondrej Zary's patches, PDMA now works which means it can be enabled
unconditionally.

Due to bad design, CONFIG_SCSI_GENERIC_NCR53C400 ties together unrelated
functionality as it sets both PSEUDO_DMA and BIOSPARAM macros. This patch
effectively enables PSEUDO_DMA and disables BIOSPARAM.

The defconfigs and the Kconfig default leave CONFIG_SCSI_GENERIC_NCR53C400
undefined. Red Hat 9 and CentOS 2.1 were the same. This leaves both
PSEUDO_DMA and BIOSPARAM disabled. The effect of this patch should be
better performance from enabling PSEUDO_DMA.

On the other hand, Debian 4 and SLES 10 had CONFIG_SCSI_GENERIC_NCR53C400
enabled, so both PSEUDO_DMA and BIOSPARAM were enabled. This patch might
affect configurations like this by disabling BIOSPARAM. My best guess is
that this could be a problem only in the vanishingly rare case that
1) the CHS values stored in the boot device partition table are wrong and
2) a 5380 card is in use (because PDMA on 53C400 used to be broken).

Signed-off-by: Finn Thain 

---

Here are the distro kernel versions I looked at:

CentOS 2.1:

$ strings 
kernel-2.4.9-e.40.i686/lib/modules/2.4.9-e.40/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Red Hat 7:

$ strings 
kernel-2.4.18-3.i386/lib/modules/2.4.18-3/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Red Hat 9:

$ strings 
kernel-2.4.20-8.i586/lib/modules/2.4.20-8/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Debian 4:

$ strings 
linux-image-2.6.24-etchnhalf.1-486_2.6.24-6-etchnhalf.9etch3_i386/lib/modules/2.6.24-etchnhalf.1-486/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d
$ strings 
kernel-image-2.6.8-2-386_2.6.8-13_i386/lib/modules/2.6.8-2-386/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d


SLES 10.2:

$ strings 
kernel-default-2.6.18.2-34.i586/lib/modules/2.6.18.2-34-default/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d

---
 drivers/scsi/Kconfig |   11 --
 drivers/scsi/g_NCR5380.c |   75 ++-
 drivers/scsi/g_NCR5380.h |   16 +-
 3 files changed, 25 insertions(+), 77 deletions(-)

Index: linux/drivers/scsi/Kconfig
===
--- linux.orig/drivers/scsi/Kconfig 2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/Kconfig  2016-03-14 15:26:20.0 +1100
@@ -812,17 +812,6 @@ config SCSI_GENERIC_NCR5380_MMIO
  To compile this driver as a module, choose M here: the
  module will be called g_NCR5380_mmio.
 
-config SCSI_GENERIC_NCR53C400
-   bool "Enable NCR53c400 extensions"
-   depends on SCSI_GENERIC_NCR5380
-   help
- This enables certain optimizations for the NCR53c400 SCSI cards.
- You might as well try it out.  Note that this driver will only probe
- for the Trantor T130B in its default configuration; you might have
- to pass a command line option to the kernel at boot time if it does
- not detect your card.  See the file
-  for details.
-
 config SCSI_IPS
tristate "IBM ServeRAID support"
depends on PCI && SCSI
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:20.0 +1100
@@ -57,10 +57,7 @@
  */
 
 #define AUTOPROBE_IRQ
-
-#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define PSEUDO_DMA
-#endif
 
 #include 
 #include 
@@ -270,7 +267,7 @@ static int __init generic_NCR5380_detect
 #ifndef SCSI_G_NCR5380_MEM
int i;
int port_idx = -1;
-   unsigned long region_size = 16;
+   unsigned long region_size;
 #endif
static unsigned int __initdata ncr_53c400a_ports[] = {
0x280, 0x290, 0x300, 0x310, 0x330, 0x340, 0x348, 0x350, 0
@@ -290,6 +287,7 @@ static int __init generic_NCR5380_detect
 #ifdef SCSI_G_NCR5380_MEM
unsigned long base;
void __iomem *iomem;
+   resource_size_t iomem_size;
 #endif
 
if (ncr_irq)
@@ -353,9 +351,7 @@ static int __init generic_NCR5380_detect
flags = FLAG_NO_PSEUDO_DMA;
break;
case BOARD_NCR53C400:
-#ifdef PSEUDO_DMA
flags = FLAG_NO_DMA_FIXUP;
-#endif
break;
case BOARD_NCR53C400A:
flags = 

Re: linux-next: build failure after merge of the aio tree

2016-03-13 Thread Stephen Rothwell
Hi Ben,

On Sat, 16 Jan 2016 09:55:15 +1100 Stephen Rothwell  
wrote:
>
> On Fri, 15 Jan 2016 10:18:21 -0500 Benjamin LaHaise  wrote:
> >
> > On Fri, Jan 15, 2016 at 01:25:31AM -0800, Christoph Hellwig wrote:  
> > > On Fri, Jan 15, 2016 at 08:23:16PM +1100, Stephen Rothwell wrote:
> > > > Via the aio tree (git://git.kvack.org/~bcrl/aio-next.git#master) added
> > > > in July 2013 at Ben's request.  The code was added to the aio tree in
> > > > Jan 12 (my time), but has never been in a published linux-next tree due
> > > > to the above build problem (I back out to the previous days version of
> > > > the aio tree).
> > > 
> > > Well, it's code Ben posted a few days ago, which to say it mildly is
> > > rather controversial.  It's cetainly not 4.5 material.
> > 
> > It still needs the exposure.  
> 
> If it is not destined for v4.5, then it should not (yet) be in
> linux-next.  It should wait until after v4.5-rc1 is released (the merge
> window closes).  I would also argue that if the functionality itself is
> still under active review (and I haven't competely followed the
> discussion so I don't know where that is up to, but Christoph, at
> least, seems not completely convinced), then it should also not yet be
> in linux-next.

OK, so at this point (just to get rid of the build failure I have done this:

I have reset the aio tree head to commit

  b47275df9e1c ("aio: add support for aio poll via aio thread helper")

and then cherry-picked the following commits on top:

  fb2e69217129 ("aio: Fix compile error due to unexpected use of cmpxchg()")
  0964acffc614 ("aio: revert addition of io_send_sig() in generic_write_checks")

> > As for the build failure, it's a bug in the arch __get_user() 
> > implementation 
> > that needs to be fixed.  __get_user() should really be able to handle 64 
> > bit 
> > types.  
> 
> Yeah, it is a bit weird.

Well, you need to negotiate that with the affected architectures.

-- 
Cheers,
Stephen Rothwell


[PATCH 1/4] intel_idle: Consolidate auto-promotion/auto-demotion fixups

2016-03-13 Thread Andy Lutomirski
This eliminates some duplicate code and will make it easier
add fixup calls in places where they're currently missing.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 34 +-
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cd4510a63375..32b3e6049994 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -810,21 +810,21 @@ static struct notifier_block cpu_hotplug_notifier = {
.notifier_call = cpu_hotplug_notify,
 };
 
-static void auto_demotion_disable(void *dummy)
+static void fix_this_cpu(void *dummy)
 {
unsigned long long msr_bits;
 
-   rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
-   msr_bits &= ~(icpu->auto_demotion_disable_flags);
-   wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
-}
-static void c1e_promotion_disable(void *dummy)
-{
-   unsigned long long msr_bits;
+   if (icpu->auto_demotion_disable_flags) {
+   rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+   msr_bits &= ~(icpu->auto_demotion_disable_flags);
+   wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+   }
 
-   rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
-   msr_bits &= ~0x2;
-   wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   if (icpu->disable_promotion_to_c1e) {
+   rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   msr_bits &= ~0x2;
+   wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   }
 }
 
 static const struct idle_cpu idle_cpu_nehalem = {
@@ -1074,16 +1074,12 @@ static int __init intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
 
-   if (icpu->auto_demotion_disable_flags)
-   on_each_cpu(auto_demotion_disable, NULL, 1);
-
if (icpu->byt_auto_demotion_disable_flag) {
wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
 
-   if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
-   on_each_cpu(c1e_promotion_disable, NULL, 1);
+   on_each_cpu(fix_this_cpu, NULL, 1);
 
return 0;
 }
@@ -1108,11 +1104,7 @@ static int intel_idle_cpu_init(int cpu)
return -EIO;
}
 
-   if (icpu->auto_demotion_disable_flags)
-   smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
-
-   if (icpu->disable_promotion_to_c1e)
-   smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1);
+   smp_call_function_single(cpu, fix_this_cpu, NULL, 1);
 
return 0;
 }
-- 
2.5.0



[PATCH 3/4] intel_idle: Fix MSRs after resume

2016-03-13 Thread Andy Lutomirski
Firmware that enables auto-promotion / auto-demotion flags we don't
like will probably re-enable them after suspend/resume.  Disable
them again after resume so they stay fixed.

I've seen this on my Dell XPS 13 9350.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 338df09ad60b..e3d7d8bbc843 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -61,6 +61,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1026,6 +1027,15 @@ void intel_idle_state_table_update(void)
return;
 }
 
+static void intel_idle_resume(void)
+{
+   on_each_cpu(fix_this_cpu, NULL, 1);
+}
+
+static struct syscore_ops intel_idle_syscore_ops = {
+   .resume = intel_idle_resume,
+};
+
 /*
  * intel_idle_cpuidle_driver_init()
  * allocate, initialize cpuidle_states
@@ -1119,6 +1129,7 @@ static int __init intel_idle_init(void)
if (retval)
return retval;
 
+   register_syscore_ops(_idle_syscore_ops);
intel_idle_cpuidle_driver_init();
retval = cpuidle_register_driver(_idle_driver);
if (retval) {
@@ -1153,6 +1164,7 @@ static void __exit intel_idle_exit(void)
 {
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(_idle_driver);
+   unregister_syscore_ops(_idle_syscore_ops);
 
cpu_notifier_register_begin();
 
-- 
2.5.0



[PATCH 01/22] g_ncr5380: Remove CONFIG_SCSI_GENERIC_NCR53C400

2016-03-13 Thread Finn Thain
This change brings a number of improvements: fewer macros, better test
coverage, simpler code and sane Kconfig options. The downside is a small
chance of incompatibility (which seems unavoidable).

CONFIG_SCSI_GENERIC_NCR53C400 exists to enable or inhibit pseudo DMA
transfers when the driver is used with 53C400-compatible cards. Thanks to
Ondrej Zary's patches, PDMA now works which means it can be enabled
unconditionally.

Due to bad design, CONFIG_SCSI_GENERIC_NCR53C400 ties together unrelated
functionality as it sets both PSEUDO_DMA and BIOSPARAM macros. This patch
effectively enables PSEUDO_DMA and disables BIOSPARAM.

The defconfigs and the Kconfig default leave CONFIG_SCSI_GENERIC_NCR53C400
undefined. Red Hat 9 and CentOS 2.1 were the same. This leaves both
PSEUDO_DMA and BIOSPARAM disabled. The effect of this patch should be
better performance from enabling PSEUDO_DMA.

On the other hand, Debian 4 and SLES 10 had CONFIG_SCSI_GENERIC_NCR53C400
enabled, so both PSEUDO_DMA and BIOSPARAM were enabled. This patch might
affect configurations like this by disabling BIOSPARAM. My best guess is
that this could be a problem only in the vanishingly rare case that
1) the CHS values stored in the boot device partition table are wrong and
2) a 5380 card is in use (because PDMA on 53C400 used to be broken).

Signed-off-by: Finn Thain 

---

Here are the distro kernel versions I looked at:

CentOS 2.1:

$ strings 
kernel-2.4.9-e.40.i686/lib/modules/2.4.9-e.40/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Red Hat 7:

$ strings 
kernel-2.4.18-3.i386/lib/modules/2.4.18-3/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Red Hat 9:

$ strings 
kernel-2.4.20-8.i586/lib/modules/2.4.20-8/kernel/drivers/scsi/g_NCR5380.o | 
grep extension
NO NCR53C400 driver extensions


Debian 4:

$ strings 
linux-image-2.6.24-etchnhalf.1-486_2.6.24-6-etchnhalf.9etch3_i386/lib/modules/2.6.24-etchnhalf.1-486/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d
$ strings 
kernel-image-2.6.8-2-386_2.6.8-13_i386/lib/modules/2.6.8-2-386/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d


SLES 10.2:

$ strings 
kernel-default-2.6.18.2-34.i586/lib/modules/2.6.18.2-34-default/kernel/drivers/scsi/g_NCR5380_mmio.ko
 | grep extension
NCR53C400 extension version %d

---
 drivers/scsi/Kconfig |   11 --
 drivers/scsi/g_NCR5380.c |   75 ++-
 drivers/scsi/g_NCR5380.h |   16 +-
 3 files changed, 25 insertions(+), 77 deletions(-)

Index: linux/drivers/scsi/Kconfig
===
--- linux.orig/drivers/scsi/Kconfig 2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/Kconfig  2016-03-14 15:26:20.0 +1100
@@ -812,17 +812,6 @@ config SCSI_GENERIC_NCR5380_MMIO
  To compile this driver as a module, choose M here: the
  module will be called g_NCR5380_mmio.
 
-config SCSI_GENERIC_NCR53C400
-   bool "Enable NCR53c400 extensions"
-   depends on SCSI_GENERIC_NCR5380
-   help
- This enables certain optimizations for the NCR53c400 SCSI cards.
- You might as well try it out.  Note that this driver will only probe
- for the Trantor T130B in its default configuration; you might have
- to pass a command line option to the kernel at boot time if it does
- not detect your card.  See the file
-  for details.
-
 config SCSI_IPS
tristate "IBM ServeRAID support"
depends on PCI && SCSI
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:20.0 +1100
@@ -57,10 +57,7 @@
  */
 
 #define AUTOPROBE_IRQ
-
-#ifdef CONFIG_SCSI_GENERIC_NCR53C400
 #define PSEUDO_DMA
-#endif
 
 #include 
 #include 
@@ -270,7 +267,7 @@ static int __init generic_NCR5380_detect
 #ifndef SCSI_G_NCR5380_MEM
int i;
int port_idx = -1;
-   unsigned long region_size = 16;
+   unsigned long region_size;
 #endif
static unsigned int __initdata ncr_53c400a_ports[] = {
0x280, 0x290, 0x300, 0x310, 0x330, 0x340, 0x348, 0x350, 0
@@ -290,6 +287,7 @@ static int __init generic_NCR5380_detect
 #ifdef SCSI_G_NCR5380_MEM
unsigned long base;
void __iomem *iomem;
+   resource_size_t iomem_size;
 #endif
 
if (ncr_irq)
@@ -353,9 +351,7 @@ static int __init generic_NCR5380_detect
flags = FLAG_NO_PSEUDO_DMA;
break;
case BOARD_NCR53C400:
-#ifdef PSEUDO_DMA
flags = FLAG_NO_DMA_FIXUP;
-#endif
break;
case BOARD_NCR53C400A:
flags = FLAG_NO_DMA_FIXUP;
@@ -381,20 +377,22 

Re: linux-next: build failure after merge of the aio tree

2016-03-13 Thread Stephen Rothwell
Hi Ben,

On Sat, 16 Jan 2016 09:55:15 +1100 Stephen Rothwell  
wrote:
>
> On Fri, 15 Jan 2016 10:18:21 -0500 Benjamin LaHaise  wrote:
> >
> > On Fri, Jan 15, 2016 at 01:25:31AM -0800, Christoph Hellwig wrote:  
> > > On Fri, Jan 15, 2016 at 08:23:16PM +1100, Stephen Rothwell wrote:
> > > > Via the aio tree (git://git.kvack.org/~bcrl/aio-next.git#master) added
> > > > in July 2013 at Ben's request.  The code was added to the aio tree in
> > > > Jan 12 (my time), but has never been in a published linux-next tree due
> > > > to the above build problem (I back out to the previous days version of
> > > > the aio tree).
> > > 
> > > Well, it's code Ben posted a few days ago, which to say it mildly is
> > > rather controversial.  It's cetainly not 4.5 material.
> > 
> > It still needs the exposure.  
> 
> If it is not destined for v4.5, then it should not (yet) be in
> linux-next.  It should wait until after v4.5-rc1 is released (the merge
> window closes).  I would also argue that if the functionality itself is
> still under active review (and I haven't competely followed the
> discussion so I don't know where that is up to, but Christoph, at
> least, seems not completely convinced), then it should also not yet be
> in linux-next.

OK, so at this point (just to get rid of the build failure I have done this:

I have reset the aio tree head to commit

  b47275df9e1c ("aio: add support for aio poll via aio thread helper")

and then cherry-picked the following commits on top:

  fb2e69217129 ("aio: Fix compile error due to unexpected use of cmpxchg()")
  0964acffc614 ("aio: revert addition of io_send_sig() in generic_write_checks")

> > As for the build failure, it's a bug in the arch __get_user() 
> > implementation 
> > that needs to be fixed.  __get_user() should really be able to handle 64 
> > bit 
> > types.  
> 
> Yeah, it is a bit weird.

Well, you need to negotiate that with the affected architectures.

-- 
Cheers,
Stephen Rothwell


[PATCH 1/4] intel_idle: Consolidate auto-promotion/auto-demotion fixups

2016-03-13 Thread Andy Lutomirski
This eliminates some duplicate code and will make it easier
add fixup calls in places where they're currently missing.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 34 +-
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cd4510a63375..32b3e6049994 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -810,21 +810,21 @@ static struct notifier_block cpu_hotplug_notifier = {
.notifier_call = cpu_hotplug_notify,
 };
 
-static void auto_demotion_disable(void *dummy)
+static void fix_this_cpu(void *dummy)
 {
unsigned long long msr_bits;
 
-   rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
-   msr_bits &= ~(icpu->auto_demotion_disable_flags);
-   wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
-}
-static void c1e_promotion_disable(void *dummy)
-{
-   unsigned long long msr_bits;
+   if (icpu->auto_demotion_disable_flags) {
+   rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+   msr_bits &= ~(icpu->auto_demotion_disable_flags);
+   wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+   }
 
-   rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
-   msr_bits &= ~0x2;
-   wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   if (icpu->disable_promotion_to_c1e) {
+   rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   msr_bits &= ~0x2;
+   wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+   }
 }
 
 static const struct idle_cpu idle_cpu_nehalem = {
@@ -1074,16 +1074,12 @@ static int __init intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
 
-   if (icpu->auto_demotion_disable_flags)
-   on_each_cpu(auto_demotion_disable, NULL, 1);
-
if (icpu->byt_auto_demotion_disable_flag) {
wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
}
 
-   if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
-   on_each_cpu(c1e_promotion_disable, NULL, 1);
+   on_each_cpu(fix_this_cpu, NULL, 1);
 
return 0;
 }
@@ -1108,11 +1104,7 @@ static int intel_idle_cpu_init(int cpu)
return -EIO;
}
 
-   if (icpu->auto_demotion_disable_flags)
-   smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
-
-   if (icpu->disable_promotion_to_c1e)
-   smp_call_function_single(cpu, c1e_promotion_disable, NULL, 1);
+   smp_call_function_single(cpu, fix_this_cpu, NULL, 1);
 
return 0;
 }
-- 
2.5.0



[PATCH 4/4] intel_idle: Move BYT/CHT auto-demotion fixup into fix_this_cpu

2016-03-13 Thread Andy Lutomirski
The demotion policy config registers are per-package and these CPUs
only exist in single-package variants AFAIK, so it's not stricly
necessary to fix these MSRs on each core, but fixing them on resume
is still likely to be helpful, so move them into fix_this_cpu.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index e3d7d8bbc843..cbb02d28dc48 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -826,6 +826,11 @@ static void fix_this_cpu(void *dummy)
msr_bits &= ~0x2;
wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
}
+
+   if (icpu->byt_auto_demotion_disable_flag) {
+   wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
+   wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
+   }
 }
 
 static const struct idle_cpu idle_cpu_nehalem = {
@@ -1084,11 +1089,6 @@ static int __init intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
 
-   if (icpu->byt_auto_demotion_disable_flag) {
-   wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
-   wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
-   }
-
return 0;
 }
 
-- 
2.5.0



[PATCH 4/4] intel_idle: Move BYT/CHT auto-demotion fixup into fix_this_cpu

2016-03-13 Thread Andy Lutomirski
The demotion policy config registers are per-package and these CPUs
only exist in single-package variants AFAIK, so it's not stricly
necessary to fix these MSRs on each core, but fixing them on resume
is still likely to be helpful, so move them into fix_this_cpu.

Signed-off-by: Andy Lutomirski 
---
 drivers/idle/intel_idle.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index e3d7d8bbc843..cbb02d28dc48 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -826,6 +826,11 @@ static void fix_this_cpu(void *dummy)
msr_bits &= ~0x2;
wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
}
+
+   if (icpu->byt_auto_demotion_disable_flag) {
+   wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
+   wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
+   }
 }
 
 static const struct idle_cpu idle_cpu_nehalem = {
@@ -1084,11 +1089,6 @@ static int __init intel_idle_cpuidle_driver_init(void)
drv->state_count += 1;
}
 
-   if (icpu->byt_auto_demotion_disable_flag) {
-   wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
-   wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
-   }
-
return 0;
 }
 
-- 
2.5.0



[PATCH 08/22] ncr5380: Use DMA hooks for PDMA

2016-03-13 Thread Finn Thain
Those wrapper drivers which use DMA define the REAL_DMA macro and
those which use pseudo DMA define PSEUDO_DMA. These macros need to be
removed for a number of reasons, not least of which is to have drivers
share more code.

Redefine the PDMA send and receive hooks as DMA setup hooks, so that the
DMA code can be shared by all 5380 wrapper drivers. This will help to
reunify the forked core driver.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   10 ++
 drivers/scsi/arm/cumana_1.c |   10 ++
 drivers/scsi/arm/oak.c  |   10 ++
 drivers/scsi/dmx3191d.c |4 ++--
 drivers/scsi/dtc.c  |6 --
 drivers/scsi/dtc.h  |2 ++
 drivers/scsi/g_NCR5380.c|   10 ++
 drivers/scsi/g_NCR5380.h|4 ++--
 drivers/scsi/mac_scsi.c |5 ++---
 drivers/scsi/pas16.c|   14 --
 drivers/scsi/pas16.h|2 ++
 drivers/scsi/t128.c |   12 ++--
 drivers/scsi/t128.h |2 ++
 13 files changed, 50 insertions(+), 41 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:32.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:34.0 +1100
@@ -127,17 +127,11 @@
  * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_dma_residual(instance); - residual count
  *
- * PSEUDO functions :
- * NCR5380_pwrite(instance, src, count)
- * NCR5380_pread(instance, dst, count);
- *
  * The generic driver is initialized by calling NCR5380_init(instance),
  * after setting the appropriate host specific fields and ID.  If the
  * driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
@@ -1511,7 +1505,7 @@ static int NCR5380_transfer_dma(struct S
  */
 
if (p & SR_IO) {
-   foo = NCR5380_pread(instance, d,
+   foo = NCR5380_dma_recv_setup(instance, d,
hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
@@ -1542,7 +1536,7 @@ static int NCR5380_transfer_dma(struct S
d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
} else {
-   foo = NCR5380_pwrite(instance, d, c);
+   foo = NCR5380_dma_send_setup(instance, d, c);
if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:29.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:34.0 +1100
@@ -18,6 +18,8 @@
 #define NCR5380_write(reg, value)  cumanascsi_write(instance, reg, value)
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+#define NCR5380_dma_recv_setup cumanascsi_pread
+#define NCR5380_dma_send_setup cumanascsi_pwrite
 
 #define NCR5380_intr   cumanascsi_intr
 #define NCR5380_queue_command  cumanascsi_queue_command
@@ -39,8 +41,8 @@ void cumanascsi_setup(char *str, int *in
 #define L(v)   (((v)<<16)|((v) & 0x))
 #define H(v)   (((v)>>16)|((v) & 0x))
 
-static inline int
-NCR5380_pwrite(struct Scsi_Host *host, unsigned char *addr, int len)
+static inline int cumanascsi_pwrite(struct Scsi_Host *host,
+unsigned char *addr, int len)
 {
   unsigned long *laddr;
   void __iomem *dma = priv(host)->dma + 0x2000;
@@ -102,8 +104,8 @@ end:
   return len;
 }
 
-static inline int
-NCR5380_pread(struct Scsi_Host *host, unsigned char *addr, int len)
+static inline int cumanascsi_pread(struct Scsi_Host *host,
+   unsigned char *addr, int len)
 {
   unsigned long *laddr;
   void __iomem *dma = priv(host)->dma + 0x2000;
Index: linux/drivers/scsi/arm/oak.c
===
--- linux.orig/drivers/scsi/arm/oak.c   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/arm/oak.c2016-03-14 15:26:34.0 +1100
@@ -24,6 +24,8 @@
writeb(value, priv(instance)->base + ((reg) << 2))
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) (0)
+#define NCR5380_dma_recv_setup oakscsi_pread
+#define NCR5380_dma_send_setup oakscsi_pwrite
 
 #define NCR5380_queue_command  oakscsi_queue_command
 #define 

[PATCH 06/22] ncr5380: Remove PSEUDO_DMA macro

2016-03-13 Thread Finn Thain
For those wrapper drivers which only implement Programmed IO, have
NCR5380_dma_xfer_len() evaluate to zero. That allows PDMA to be easily
disabled at run-time and so the PSEUDO_DMA macro is no longer needed.

Also remove the spin counters used for debugging pseudo DMA drivers.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   32 +---
 drivers/scsi/NCR5380.h  |4 
 drivers/scsi/arm/cumana_1.c |2 --
 drivers/scsi/arm/oak.c  |3 +--
 drivers/scsi/dmx3191d.c |4 
 drivers/scsi/dtc.c  |7 ---
 drivers/scsi/dtc.h  |2 --
 drivers/scsi/g_NCR5380.c|1 -
 drivers/scsi/g_NCR5380.h|1 -
 drivers/scsi/mac_scsi.c |   10 --
 drivers/scsi/pas16.c|   10 --
 drivers/scsi/pas16.h|2 --
 drivers/scsi/t128.c |4 
 drivers/scsi/t128.h |2 --
 14 files changed, 6 insertions(+), 78 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:27.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:29.0 +1100
@@ -469,34 +469,9 @@ static void prepare_info(struct Scsi_Hos
 #ifdef PARITY
 "PARITY "
 #endif
-#ifdef PSEUDO_DMA
-"PSEUDO_DMA "
-#endif
 "");
 }
 
-#ifdef PSEUDO_DMA
-static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
-   char *buffer, int length)
-{
-   struct NCR5380_hostdata *hostdata = shost_priv(instance);
-
-   hostdata->spin_max_r = 0;
-   hostdata->spin_max_w = 0;
-   return 0;
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-struct Scsi_Host *instance)
-{
-   struct NCR5380_hostdata *hostdata = shost_priv(instance);
-
-   seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
-   hostdata->spin_max_w, hostdata->spin_max_r);
-   return 0;
-}
-#endif
-
 /**
  * NCR5380_init - initialise an NCR5380
  * @instance: adapter to configure
@@ -1436,7 +1411,6 @@ timeout:
return -1;
 }
 
-#if defined(PSEUDO_DMA)
 /*
  * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
  * unsigned char *phase, int *count, unsigned char **data)
@@ -1592,7 +1566,6 @@ static int NCR5380_transfer_dma(struct S
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
return foo;
 }
-#endif /* PSEUDO_DMA */
 
 /*
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
@@ -1683,7 +1656,6 @@ static void NCR5380_information_transfer
 * in an unconditional loop.
 */
 
-#if defined(PSEUDO_DMA)
transfersize = 0;
if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
@@ -1706,9 +1678,7 @@ static void NCR5380_information_transfer
/* XXX - need to source or sink 
data here, as appropriate */
} else
cmd->SCp.this_residual -= 
transfersize - len;
-   } else
-#endif /* PSEUDO_DMA */
-   {
+   } else {
/* Break up transfer into 3 ms chunks,
 * presuming 6 accesses per handshake.
 */
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:27.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:29.0 +1100
@@ -257,10 +257,6 @@ struct NCR5380_hostdata {
 #ifdef SUPPORT_TAGS
struct tag_alloc TagAlloc[8][8];/* 8 targets and 8 LUNs */
 #endif
-#ifdef PSEUDO_DMA
-   unsigned spin_max_r;
-   unsigned spin_max_w;
-#endif
struct workqueue_struct *work_q;
unsigned long accesses_per_ms;  /* chip register accesses per ms */
 };
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:27.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:29.0 +1100
@@ -13,8 +13,6 @@
 
 #include 
 
-#define PSEUDO_DMA
-
 #define priv(host) ((struct NCR5380_hostdata 
*)(host)->hostdata)
 #define NCR5380_read(reg)  cumanascsi_read(instance, reg)
 #define NCR5380_write(reg, value)  cumanascsi_write(instance, reg, value)
Index: linux/drivers/scsi/arm/oak.c
===
--- 

[PATCH 11/22] atari_scsi: Adopt NCR5380.c core driver

2016-03-13 Thread Finn Thain
Add support for the Atari ST DMA chip to the NCR5380.c core driver.
This code is copied from atari_NCR5380.c.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c|   32 
 drivers/scsi/atari_scsi.c |6 +++---
 2 files changed, 35 insertions(+), 3 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:42.0 +1100
@@ -29,6 +29,8 @@
  * Ronald van Cuijlenborg, Alan Cox and others.
  */
 
+/* Ported to Atari by Roman Hodek and others. */
+
 /*
  * Further development / testing that should be done :
  *
@@ -141,6 +143,14 @@
 #define NCR5380_io_delay(x)
 #endif
 
+#ifndef NCR5380_acquire_dma_irq
+#define NCR5380_acquire_dma_irq(x) (1)
+#endif
+
+#ifndef NCR5380_release_dma_irq
+#define NCR5380_release_dma_irq(x)
+#endif
+
 static int do_abort(struct Scsi_Host *);
 static void do_reset(struct Scsi_Host *);
 
@@ -658,6 +668,9 @@ static int NCR5380_queue_command(struct
 
cmd->result = 0;
 
+   if (!NCR5380_acquire_dma_irq(instance))
+   return SCSI_MLQUEUE_HOST_BUSY;
+
spin_lock_irqsave(>lock, flags);
 
/*
@@ -682,6 +695,19 @@ static int NCR5380_queue_command(struct
return 0;
 }
 
+static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+   /* Caller does the locking needed to set & test these data atomically */
+   if (list_empty(>disconnected) &&
+   list_empty(>unissued) &&
+   list_empty(>autosense) &&
+   !hostdata->connected &&
+   !hostdata->selecting)
+   NCR5380_release_dma_irq(instance);
+}
+
 /**
  * dequeue_next_cmd - dequeue a command for processing
  * @instance: the scsi host instance
@@ -783,6 +809,7 @@ static void NCR5380_main(struct work_str
 
if (!NCR5380_select(instance, cmd)) {
dsprintk(NDEBUG_MAIN, instance, "main: select 
complete\n");
+   maybe_release_dma_irq(instance);
} else {
dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
 "main: select failed, returning %p to 
queue\n", cmd);
@@ -1828,6 +1855,8 @@ static void NCR5380_information_transfer
 
/* Enable reselect interrupts */
NCR5380_write(SELECT_ENABLE_REG, 
hostdata->id_mask);
+
+   maybe_release_dma_irq(instance);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
@@ -1963,6 +1992,7 @@ static void NCR5380_information_transfer
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
complete_cmd(instance, cmd);
+   maybe_release_dma_irq(instance);
NCR5380_write(SELECT_ENABLE_REG, 
hostdata->id_mask);
return;
}
@@ -2256,6 +2286,7 @@ out:
dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted 
%p\n", cmd);
 
queue_work(hostdata->work_q, >main_task);
+   maybe_release_dma_irq(instance);
spin_unlock_irqrestore(>lock, flags);
 
return result;
@@ -2336,6 +2367,7 @@ static int NCR5380_bus_reset(struct scsi
hostdata->dma_len = 0;
 
queue_work(hostdata->work_q, >main_task);
+   maybe_release_dma_irq(instance);
spin_unlock_irqrestore(>lock, flags);
 
return SUCCESS;
Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:37.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:42.0 +1100
@@ -97,9 +97,9 @@
 #define NCR5380_abort   atari_scsi_abort
 #define NCR5380_infoatari_scsi_info
 
-#define NCR5380_dma_read_setup(instance, data, count) \
+#define NCR5380_dma_recv_setup(instance, data, count) \
 atari_scsi_dma_setup(instance, data, count, 0)
-#define NCR5380_dma_write_setup(instance, data, count) \
+#define NCR5380_dma_send_setup(instance, data, count) \
 atari_scsi_dma_setup(instance, data, count, 1)
 #define NCR5380_dma_residual(instance) \
 atari_scsi_dma_residual(instance)
@@ -713,7 +713,7 @@ static void atari_scsi_falcon_reg_write(
 }
 
 
-#include "atari_NCR5380.c"
+#include "NCR5380.c"
 
 static int 

[PATCH 08/22] ncr5380: Use DMA hooks for PDMA

2016-03-13 Thread Finn Thain
Those wrapper drivers which use DMA define the REAL_DMA macro and
those which use pseudo DMA define PSEUDO_DMA. These macros need to be
removed for a number of reasons, not least of which is to have drivers
share more code.

Redefine the PDMA send and receive hooks as DMA setup hooks, so that the
DMA code can be shared by all 5380 wrapper drivers. This will help to
reunify the forked core driver.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   10 ++
 drivers/scsi/arm/cumana_1.c |   10 ++
 drivers/scsi/arm/oak.c  |   10 ++
 drivers/scsi/dmx3191d.c |4 ++--
 drivers/scsi/dtc.c  |6 --
 drivers/scsi/dtc.h  |2 ++
 drivers/scsi/g_NCR5380.c|   10 ++
 drivers/scsi/g_NCR5380.h|4 ++--
 drivers/scsi/mac_scsi.c |5 ++---
 drivers/scsi/pas16.c|   14 --
 drivers/scsi/pas16.h|2 ++
 drivers/scsi/t128.c |   12 ++--
 drivers/scsi/t128.h |2 ++
 13 files changed, 50 insertions(+), 41 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:32.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:34.0 +1100
@@ -127,17 +127,11 @@
  * specific implementation of the NCR5380
  *
  * Either real DMA *or* pseudo DMA may be implemented
- * Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
  *
  * NCR5380_dma_write_setup(instance, src, count) - initialize
  * NCR5380_dma_read_setup(instance, dst, count) - initialize
  * NCR5380_dma_residual(instance); - residual count
  *
- * PSEUDO functions :
- * NCR5380_pwrite(instance, src, count)
- * NCR5380_pread(instance, dst, count);
- *
  * The generic driver is initialized by calling NCR5380_init(instance),
  * after setting the appropriate host specific fields and ID.  If the
  * driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
@@ -1511,7 +1505,7 @@ static int NCR5380_transfer_dma(struct S
  */
 
if (p & SR_IO) {
-   foo = NCR5380_pread(instance, d,
+   foo = NCR5380_dma_recv_setup(instance, d,
hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
@@ -1542,7 +1536,7 @@ static int NCR5380_transfer_dma(struct S
d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
} else {
-   foo = NCR5380_pwrite(instance, d, c);
+   foo = NCR5380_dma_send_setup(instance, d, c);
if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:29.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:34.0 +1100
@@ -18,6 +18,8 @@
 #define NCR5380_write(reg, value)  cumanascsi_write(instance, reg, value)
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+#define NCR5380_dma_recv_setup cumanascsi_pread
+#define NCR5380_dma_send_setup cumanascsi_pwrite
 
 #define NCR5380_intr   cumanascsi_intr
 #define NCR5380_queue_command  cumanascsi_queue_command
@@ -39,8 +41,8 @@ void cumanascsi_setup(char *str, int *in
 #define L(v)   (((v)<<16)|((v) & 0x))
 #define H(v)   (((v)>>16)|((v) & 0x))
 
-static inline int
-NCR5380_pwrite(struct Scsi_Host *host, unsigned char *addr, int len)
+static inline int cumanascsi_pwrite(struct Scsi_Host *host,
+unsigned char *addr, int len)
 {
   unsigned long *laddr;
   void __iomem *dma = priv(host)->dma + 0x2000;
@@ -102,8 +104,8 @@ end:
   return len;
 }
 
-static inline int
-NCR5380_pread(struct Scsi_Host *host, unsigned char *addr, int len)
+static inline int cumanascsi_pread(struct Scsi_Host *host,
+   unsigned char *addr, int len)
 {
   unsigned long *laddr;
   void __iomem *dma = priv(host)->dma + 0x2000;
Index: linux/drivers/scsi/arm/oak.c
===
--- linux.orig/drivers/scsi/arm/oak.c   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/arm/oak.c2016-03-14 15:26:34.0 +1100
@@ -24,6 +24,8 @@
writeb(value, priv(instance)->base + ((reg) << 2))
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) (0)
+#define NCR5380_dma_recv_setup oakscsi_pread
+#define NCR5380_dma_send_setup oakscsi_pwrite
 
 #define NCR5380_queue_command  oakscsi_queue_command
 #define NCR5380_info   

[PATCH 06/22] ncr5380: Remove PSEUDO_DMA macro

2016-03-13 Thread Finn Thain
For those wrapper drivers which only implement Programmed IO, have
NCR5380_dma_xfer_len() evaluate to zero. That allows PDMA to be easily
disabled at run-time and so the PSEUDO_DMA macro is no longer needed.

Also remove the spin counters used for debugging pseudo DMA drivers.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   32 +---
 drivers/scsi/NCR5380.h  |4 
 drivers/scsi/arm/cumana_1.c |2 --
 drivers/scsi/arm/oak.c  |3 +--
 drivers/scsi/dmx3191d.c |4 
 drivers/scsi/dtc.c  |7 ---
 drivers/scsi/dtc.h  |2 --
 drivers/scsi/g_NCR5380.c|1 -
 drivers/scsi/g_NCR5380.h|1 -
 drivers/scsi/mac_scsi.c |   10 --
 drivers/scsi/pas16.c|   10 --
 drivers/scsi/pas16.h|2 --
 drivers/scsi/t128.c |4 
 drivers/scsi/t128.h |2 --
 14 files changed, 6 insertions(+), 78 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:27.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:29.0 +1100
@@ -469,34 +469,9 @@ static void prepare_info(struct Scsi_Hos
 #ifdef PARITY
 "PARITY "
 #endif
-#ifdef PSEUDO_DMA
-"PSEUDO_DMA "
-#endif
 "");
 }
 
-#ifdef PSEUDO_DMA
-static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
-   char *buffer, int length)
-{
-   struct NCR5380_hostdata *hostdata = shost_priv(instance);
-
-   hostdata->spin_max_r = 0;
-   hostdata->spin_max_w = 0;
-   return 0;
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
-struct Scsi_Host *instance)
-{
-   struct NCR5380_hostdata *hostdata = shost_priv(instance);
-
-   seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
-   hostdata->spin_max_w, hostdata->spin_max_r);
-   return 0;
-}
-#endif
-
 /**
  * NCR5380_init - initialise an NCR5380
  * @instance: adapter to configure
@@ -1436,7 +1411,6 @@ timeout:
return -1;
 }
 
-#if defined(PSEUDO_DMA)
 /*
  * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
  * unsigned char *phase, int *count, unsigned char **data)
@@ -1592,7 +1566,6 @@ static int NCR5380_transfer_dma(struct S
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
return foo;
 }
-#endif /* PSEUDO_DMA */
 
 /*
  * Function : NCR5380_information_transfer (struct Scsi_Host *instance)
@@ -1683,7 +1656,6 @@ static void NCR5380_information_transfer
 * in an unconditional loop.
 */
 
-#if defined(PSEUDO_DMA)
transfersize = 0;
if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
@@ -1706,9 +1678,7 @@ static void NCR5380_information_transfer
/* XXX - need to source or sink 
data here, as appropriate */
} else
cmd->SCp.this_residual -= 
transfersize - len;
-   } else
-#endif /* PSEUDO_DMA */
-   {
+   } else {
/* Break up transfer into 3 ms chunks,
 * presuming 6 accesses per handshake.
 */
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:27.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:29.0 +1100
@@ -257,10 +257,6 @@ struct NCR5380_hostdata {
 #ifdef SUPPORT_TAGS
struct tag_alloc TagAlloc[8][8];/* 8 targets and 8 LUNs */
 #endif
-#ifdef PSEUDO_DMA
-   unsigned spin_max_r;
-   unsigned spin_max_w;
-#endif
struct workqueue_struct *work_q;
unsigned long accesses_per_ms;  /* chip register accesses per ms */
 };
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:27.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:29.0 +1100
@@ -13,8 +13,6 @@
 
 #include 
 
-#define PSEUDO_DMA
-
 #define priv(host) ((struct NCR5380_hostdata 
*)(host)->hostdata)
 #define NCR5380_read(reg)  cumanascsi_read(instance, reg)
 #define NCR5380_write(reg, value)  cumanascsi_write(instance, reg, value)
Index: linux/drivers/scsi/arm/oak.c
===
--- linux.orig/drivers/scsi/arm/oak.c   

[PATCH 11/22] atari_scsi: Adopt NCR5380.c core driver

2016-03-13 Thread Finn Thain
Add support for the Atari ST DMA chip to the NCR5380.c core driver.
This code is copied from atari_NCR5380.c.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c|   32 
 drivers/scsi/atari_scsi.c |6 +++---
 2 files changed, 35 insertions(+), 3 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:42.0 +1100
@@ -29,6 +29,8 @@
  * Ronald van Cuijlenborg, Alan Cox and others.
  */
 
+/* Ported to Atari by Roman Hodek and others. */
+
 /*
  * Further development / testing that should be done :
  *
@@ -141,6 +143,14 @@
 #define NCR5380_io_delay(x)
 #endif
 
+#ifndef NCR5380_acquire_dma_irq
+#define NCR5380_acquire_dma_irq(x) (1)
+#endif
+
+#ifndef NCR5380_release_dma_irq
+#define NCR5380_release_dma_irq(x)
+#endif
+
 static int do_abort(struct Scsi_Host *);
 static void do_reset(struct Scsi_Host *);
 
@@ -658,6 +668,9 @@ static int NCR5380_queue_command(struct
 
cmd->result = 0;
 
+   if (!NCR5380_acquire_dma_irq(instance))
+   return SCSI_MLQUEUE_HOST_BUSY;
+
spin_lock_irqsave(>lock, flags);
 
/*
@@ -682,6 +695,19 @@ static int NCR5380_queue_command(struct
return 0;
 }
 
+static inline void maybe_release_dma_irq(struct Scsi_Host *instance)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+   /* Caller does the locking needed to set & test these data atomically */
+   if (list_empty(>disconnected) &&
+   list_empty(>unissued) &&
+   list_empty(>autosense) &&
+   !hostdata->connected &&
+   !hostdata->selecting)
+   NCR5380_release_dma_irq(instance);
+}
+
 /**
  * dequeue_next_cmd - dequeue a command for processing
  * @instance: the scsi host instance
@@ -783,6 +809,7 @@ static void NCR5380_main(struct work_str
 
if (!NCR5380_select(instance, cmd)) {
dsprintk(NDEBUG_MAIN, instance, "main: select 
complete\n");
+   maybe_release_dma_irq(instance);
} else {
dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
 "main: select failed, returning %p to 
queue\n", cmd);
@@ -1828,6 +1855,8 @@ static void NCR5380_information_transfer
 
/* Enable reselect interrupts */
NCR5380_write(SELECT_ENABLE_REG, 
hostdata->id_mask);
+
+   maybe_release_dma_irq(instance);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
@@ -1963,6 +1992,7 @@ static void NCR5380_information_transfer
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
complete_cmd(instance, cmd);
+   maybe_release_dma_irq(instance);
NCR5380_write(SELECT_ENABLE_REG, 
hostdata->id_mask);
return;
}
@@ -2256,6 +2286,7 @@ out:
dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted 
%p\n", cmd);
 
queue_work(hostdata->work_q, >main_task);
+   maybe_release_dma_irq(instance);
spin_unlock_irqrestore(>lock, flags);
 
return result;
@@ -2336,6 +2367,7 @@ static int NCR5380_bus_reset(struct scsi
hostdata->dma_len = 0;
 
queue_work(hostdata->work_q, >main_task);
+   maybe_release_dma_irq(instance);
spin_unlock_irqrestore(>lock, flags);
 
return SUCCESS;
Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:37.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:42.0 +1100
@@ -97,9 +97,9 @@
 #define NCR5380_abort   atari_scsi_abort
 #define NCR5380_infoatari_scsi_info
 
-#define NCR5380_dma_read_setup(instance, data, count) \
+#define NCR5380_dma_recv_setup(instance, data, count) \
 atari_scsi_dma_setup(instance, data, count, 0)
-#define NCR5380_dma_write_setup(instance, data, count) \
+#define NCR5380_dma_send_setup(instance, data, count) \
 atari_scsi_dma_setup(instance, data, count, 1)
 #define NCR5380_dma_residual(instance) \
 atari_scsi_dma_residual(instance)
@@ -713,7 +713,7 @@ static void atari_scsi_falcon_reg_write(
 }
 
 
-#include "atari_NCR5380.c"
+#include "NCR5380.c"
 
 static int atari_scsi_bus_reset(struct scsi_cmnd *cmd)
 {




[PATCH 05/22] ncr5380: Disable the DMA errata workaround flag by default

2016-03-13 Thread Finn Thain
The only chip that needs the workarounds enabled is an early NMOS
device. That means that the common case is to disable them.

Unfortunately the sense of the flag is such that it has to be set
for the common case.

Rename the flag so that zero can be used to mean "no errata workarounds
needed". This simplifies the code.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   14 +++---
 drivers/scsi/NCR5380.h  |2 +-
 drivers/scsi/arm/cumana_1.c |2 +-
 drivers/scsi/arm/oak.c  |2 +-
 drivers/scsi/dtc.c  |2 +-
 drivers/scsi/g_NCR5380.c|8 +---
 drivers/scsi/pas16.c|2 +-
 drivers/scsi/t128.c |2 +-
 8 files changed, 14 insertions(+), 20 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:27.0 +1100
@@ -457,7 +457,7 @@ static void prepare_info(struct Scsi_Hos
 instance->base, instance->irq,
 instance->can_queue, instance->cmd_per_lun,
 instance->sg_tablesize, instance->this_id,
-hostdata->flags & FLAG_NO_DMA_FIXUP  ? "NO_DMA_FIXUP "  : "",
+hostdata->flags & FLAG_DMA_FIXUP ? "DMA_FIXUP " : "",
 hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
 hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY "  : "",
 #ifdef AUTOPROBE_IRQ
@@ -1480,11 +1480,11 @@ static int NCR5380_transfer_dma(struct S
 * before the setting of DMA mode to after transfer of the last byte.
 */
 
-   if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+   if (hostdata->flags & FLAG_DMA_FIXUP)
+   NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
+   else
NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
MR_ENABLE_EOP_INTR);
-   else
-   NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 
dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, 
NCR5380_read(MODE_REG));
 
@@ -1540,8 +1540,8 @@ static int NCR5380_transfer_dma(struct S
 
if (p & SR_IO) {
foo = NCR5380_pread(instance, d,
-   hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
-   if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
+   hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
+   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * The workaround was to transfer fewer bytes than we
 * intended to with the pseudo-DMA read function, wait 
for
@@ -1571,7 +1571,7 @@ static int NCR5380_transfer_dma(struct S
}
} else {
foo = NCR5380_pwrite(instance, d, c);
-   if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
+   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
 * the byte we're interested, we'll ACK it and it will 
go false.
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:27.0 +1100
@@ -220,7 +220,7 @@
 #define NO_IRQ 0
 #endif
 
-#define FLAG_NO_DMA_FIXUP  1   /* No DMA errata workarounds */
+#define FLAG_DMA_FIXUP 1   /* Use DMA errata workarounds */
 #define FLAG_NO_PSEUDO_DMA 8   /* Inhibit DMA */
 #define FLAG_LATE_DMA_SETUP32  /* Setup NCR before DMA H/W */
 #define FLAG_TAGGED_QUEUING64  /* as X3T9.2 spelled it */
Index: linux/drivers/scsi/dtc.c
===
--- linux.orig/drivers/scsi/dtc.c   2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/dtc.c2016-03-14 15:26:27.0 +1100
@@ -229,7 +229,7 @@ found:
instance->base = addr;
((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
 
-   if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+   if (NCR5380_init(instance, 0))
goto out_unregister;
 
NCR5380_maybe_reset_bus(instance);
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:22.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:27.0 +1100
@@ -348,23 +348,17 @@ static int __init 

[PATCH 07/22] ncr5380: Remove BOARD_REQUIRES_NO_DELAY macro

2016-03-13 Thread Finn Thain
The io_recovery_delay macro is intended to insert a microsecond delay
between the chip register accesses that begin a DMA operation. This
is reportedly needed for some ISA boards.

Reverse the sense of the macro test so that in the common case,
where no delay is required, drivers need not define the macro.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |   18 --
 drivers/scsi/dtc.h   |2 ++
 drivers/scsi/g_NCR5380.h |2 ++
 drivers/scsi/t128.h  |2 ++
 4 files changed, 14 insertions(+), 10 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:32.0 +1100
@@ -39,12 +39,6 @@
  * tagged queueing)
  */
 
-#ifdef BOARD_REQUIRES_NO_DELAY
-#define io_recovery_delay(x)
-#else
-#define io_recovery_delay(x)   udelay(x)
-#endif
-
 /*
  * Design
  *
@@ -150,6 +144,10 @@
  * possible) function may be used.
  */
 
+#ifndef NCR5380_io_delay
+#define NCR5380_io_delay(x)
+#endif
+
 static int do_abort(struct Scsi_Host *);
 static void do_reset(struct Scsi_Host *);
 
@@ -1468,14 +1466,14 @@ static int NCR5380_transfer_dma(struct S
 */
 
if (p & SR_IO) {
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | 
ICR_ASSERT_DATA);
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(START_DMA_SEND_REG, 0);
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
}
 
 /*
Index: linux/drivers/scsi/dtc.h
===
--- linux.orig/drivers/scsi/dtc.h   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/dtc.h2016-03-14 15:26:32.0 +1100
@@ -28,6 +28,8 @@
 #define NCR5380_bus_reset  dtc_bus_reset
 #define NCR5380_info   dtc_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 /* 15 12 11 10
1001 1100   */
 
Index: linux/drivers/scsi/g_NCR5380.h
===
--- linux.orig/drivers/scsi/g_NCR5380.h 2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/g_NCR5380.h  2016-03-14 15:26:32.0 +1100
@@ -71,6 +71,8 @@
 #define NCR5380_pwrite generic_NCR5380_pwrite
 #define NCR5380_info generic_NCR5380_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 #define BOARD_NCR5380  0
 #define BOARD_NCR53C4001
 #define BOARD_NCR53C400A 2
Index: linux/drivers/scsi/t128.h
===
--- linux.orig/drivers/scsi/t128.h  2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/t128.h   2016-03-14 15:26:32.0 +1100
@@ -84,6 +84,8 @@
 #define NCR5380_bus_reset t128_bus_reset
 #define NCR5380_info t128_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 /* 15 14 12 10 7 5 3
1101 0100 1010 1000 */
 




[PATCH 05/22] ncr5380: Disable the DMA errata workaround flag by default

2016-03-13 Thread Finn Thain
The only chip that needs the workarounds enabled is an early NMOS
device. That means that the common case is to disable them.

Unfortunately the sense of the flag is such that it has to be set
for the common case.

Rename the flag so that zero can be used to mean "no errata workarounds
needed". This simplifies the code.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   14 +++---
 drivers/scsi/NCR5380.h  |2 +-
 drivers/scsi/arm/cumana_1.c |2 +-
 drivers/scsi/arm/oak.c  |2 +-
 drivers/scsi/dtc.c  |2 +-
 drivers/scsi/g_NCR5380.c|8 +---
 drivers/scsi/pas16.c|2 +-
 drivers/scsi/t128.c |2 +-
 8 files changed, 14 insertions(+), 20 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:27.0 +1100
@@ -457,7 +457,7 @@ static void prepare_info(struct Scsi_Hos
 instance->base, instance->irq,
 instance->can_queue, instance->cmd_per_lun,
 instance->sg_tablesize, instance->this_id,
-hostdata->flags & FLAG_NO_DMA_FIXUP  ? "NO_DMA_FIXUP "  : "",
+hostdata->flags & FLAG_DMA_FIXUP ? "DMA_FIXUP " : "",
 hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
 hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY "  : "",
 #ifdef AUTOPROBE_IRQ
@@ -1480,11 +1480,11 @@ static int NCR5380_transfer_dma(struct S
 * before the setting of DMA mode to after transfer of the last byte.
 */
 
-   if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+   if (hostdata->flags & FLAG_DMA_FIXUP)
+   NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
+   else
NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
MR_ENABLE_EOP_INTR);
-   else
-   NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
 
dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, 
NCR5380_read(MODE_REG));
 
@@ -1540,8 +1540,8 @@ static int NCR5380_transfer_dma(struct S
 
if (p & SR_IO) {
foo = NCR5380_pread(instance, d,
-   hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
-   if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
+   hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
+   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * The workaround was to transfer fewer bytes than we
 * intended to with the pseudo-DMA read function, wait 
for
@@ -1571,7 +1571,7 @@ static int NCR5380_transfer_dma(struct S
}
} else {
foo = NCR5380_pwrite(instance, d, c);
-   if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
+   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
 * the byte we're interested, we'll ACK it and it will 
go false.
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:23.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:27.0 +1100
@@ -220,7 +220,7 @@
 #define NO_IRQ 0
 #endif
 
-#define FLAG_NO_DMA_FIXUP  1   /* No DMA errata workarounds */
+#define FLAG_DMA_FIXUP 1   /* Use DMA errata workarounds */
 #define FLAG_NO_PSEUDO_DMA 8   /* Inhibit DMA */
 #define FLAG_LATE_DMA_SETUP32  /* Setup NCR before DMA H/W */
 #define FLAG_TAGGED_QUEUING64  /* as X3T9.2 spelled it */
Index: linux/drivers/scsi/dtc.c
===
--- linux.orig/drivers/scsi/dtc.c   2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/dtc.c2016-03-14 15:26:27.0 +1100
@@ -229,7 +229,7 @@ found:
instance->base = addr;
((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
 
-   if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+   if (NCR5380_init(instance, 0))
goto out_unregister;
 
NCR5380_maybe_reset_bus(instance);
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:22.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:27.0 +1100
@@ -348,23 +348,17 @@ static int __init generic_NCR5380_detect
flags = 

[PATCH 07/22] ncr5380: Remove BOARD_REQUIRES_NO_DELAY macro

2016-03-13 Thread Finn Thain
The io_recovery_delay macro is intended to insert a microsecond delay
between the chip register accesses that begin a DMA operation. This
is reportedly needed for some ISA boards.

Reverse the sense of the macro test so that in the common case,
where no delay is required, drivers need not define the macro.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |   18 --
 drivers/scsi/dtc.h   |2 ++
 drivers/scsi/g_NCR5380.h |2 ++
 drivers/scsi/t128.h  |2 ++
 4 files changed, 14 insertions(+), 10 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:32.0 +1100
@@ -39,12 +39,6 @@
  * tagged queueing)
  */
 
-#ifdef BOARD_REQUIRES_NO_DELAY
-#define io_recovery_delay(x)
-#else
-#define io_recovery_delay(x)   udelay(x)
-#endif
-
 /*
  * Design
  *
@@ -150,6 +144,10 @@
  * possible) function may be used.
  */
 
+#ifndef NCR5380_io_delay
+#define NCR5380_io_delay(x)
+#endif
+
 static int do_abort(struct Scsi_Host *);
 static void do_reset(struct Scsi_Host *);
 
@@ -1468,14 +1466,14 @@ static int NCR5380_transfer_dma(struct S
 */
 
if (p & SR_IO) {
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | 
ICR_ASSERT_DATA);
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
NCR5380_write(START_DMA_SEND_REG, 0);
-   io_recovery_delay(1);
+   NCR5380_io_delay(1);
}
 
 /*
Index: linux/drivers/scsi/dtc.h
===
--- linux.orig/drivers/scsi/dtc.h   2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/dtc.h2016-03-14 15:26:32.0 +1100
@@ -28,6 +28,8 @@
 #define NCR5380_bus_reset  dtc_bus_reset
 #define NCR5380_info   dtc_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 /* 15 12 11 10
1001 1100   */
 
Index: linux/drivers/scsi/g_NCR5380.h
===
--- linux.orig/drivers/scsi/g_NCR5380.h 2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/g_NCR5380.h  2016-03-14 15:26:32.0 +1100
@@ -71,6 +71,8 @@
 #define NCR5380_pwrite generic_NCR5380_pwrite
 #define NCR5380_info generic_NCR5380_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 #define BOARD_NCR5380  0
 #define BOARD_NCR53C4001
 #define BOARD_NCR53C400A 2
Index: linux/drivers/scsi/t128.h
===
--- linux.orig/drivers/scsi/t128.h  2016-03-14 15:26:29.0 +1100
+++ linux/drivers/scsi/t128.h   2016-03-14 15:26:32.0 +1100
@@ -84,6 +84,8 @@
 #define NCR5380_bus_reset t128_bus_reset
 #define NCR5380_info t128_info
 
+#define NCR5380_io_delay(x)udelay(x)
+
 /* 15 14 12 10 7 5 3
1101 0100 1010 1000 */
 




[PATCH 02/22] ncr5380: Remove FLAG_NO_PSEUDO_DMA where possible

2016-03-13 Thread Finn Thain
Drivers that define PSEUDO_DMA also define NCR5380_dma_xfer_len.
The core driver must call NCR5380_dma_xfer_len which means
FLAG_NO_PSEUDO_DMA can be eradicated from the core driver.

dmx3191d doesn't define PSEUDO_DMA and has no use for FLAG_NO_PSEUDO_DMA,
so remove it there also.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |3 +--
 drivers/scsi/dmx3191d.c  |2 +-
 drivers/scsi/g_NCR5380.c |7 ++-
 drivers/scsi/g_NCR5380.h |2 +-
 drivers/scsi/mac_scsi.c  |   15 ++-
 5 files changed, 23 insertions(+), 6 deletions(-)

Index: linux/drivers/scsi/dmx3191d.c
===
--- linux.orig/drivers/scsi/dmx3191d.c  2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/dmx3191d.c   2016-03-14 15:26:22.0 +1100
@@ -93,7 +93,7 @@ static int dmx3191d_probe_one(struct pci
 */
shost->irq = NO_IRQ;
 
-   error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+   error = NCR5380_init(shost, 0);
if (error)
goto out_host_put;
 
Index: linux/drivers/scsi/mac_scsi.c
===
--- linux.orig/drivers/scsi/mac_scsi.c  2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/mac_scsi.c   2016-03-14 15:26:22.0 +1100
@@ -37,7 +37,9 @@
 
 #define NCR5380_pread   macscsi_pread
 #define NCR5380_pwrite  macscsi_pwrite
-#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+macscsi_dma_xfer_len(instance, cmd)
 
 #define NCR5380_intrmacscsi_intr
 #define NCR5380_queue_command   macscsi_queue_command
@@ -303,6 +305,17 @@ static int macscsi_pwrite(struct Scsi_Ho
 }
 #endif
 
+static int macscsi_dma_xfer_len(struct Scsi_Host *instance,
+struct scsi_cmnd *cmd)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+   if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
+   return 0;
+
+   return cmd->transfersize;
+}
+
 #include "NCR5380.c"
 
 #define DRV_MODULE_NAME "mac_scsi"
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:22.0 +1100
@@ -712,10 +712,15 @@ static inline int NCR5380_pwrite(struct
return 0;
 }
 
-static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+static int generic_NCR5380_dma_xfer_len(struct Scsi_Host *instance,
+struct scsi_cmnd *cmd)
 {
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transfersize = cmd->transfersize;
 
+   if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
+   return 0;
+
/* Limit transfers to 32K, for xx400 & xx406
 * pseudoDMA that transfers in 128 bytes blocks.
 */
Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:22.0 +1100
@@ -1833,8 +1833,7 @@ static void NCR5380_information_transfer
 
 #if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
transfersize = 0;
-   if (!cmd->device->borken &&
-   !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+   if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
if (transfersize) {
Index: linux/drivers/scsi/g_NCR5380.h
===
--- linux.orig/drivers/scsi/g_NCR5380.h 2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/g_NCR5380.h  2016-03-14 15:26:22.0 +1100
@@ -61,7 +61,7 @@
 #endif
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
-generic_NCR5380_dma_xfer_len(cmd)
+generic_NCR5380_dma_xfer_len(instance, cmd)
 
 #define NCR5380_intr generic_NCR5380_intr
 #define NCR5380_queue_command generic_NCR5380_queue_command




[PATCH 14/22] ncr5380: Add MAX_LUN limit

2016-03-13 Thread Finn Thain
The driver has a limit of eight LUs because of the byte-sized bitfield
that is used for busy flags. Reject commands with LUN > 7.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c |6 ++
 drivers/scsi/NCR5380.h |2 ++
 2 files changed, 8 insertions(+)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:45.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:48.0 +1100
@@ -661,6 +661,12 @@ static int NCR5380_queue_command(struct
}
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
+   if (cmd->device->lun > MAX_LUN) {
+   cmd->result = DID_NO_CONNECT << 16;
+   cmd->scsi_done(cmd);
+   return 0;
+   }
+
cmd->result = 0;
 
if (!NCR5380_acquire_dma_irq(instance))
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:45.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:48.0 +1100
@@ -244,6 +244,8 @@ struct NCR5380_hostdata {
 
 #ifdef __KERNEL__
 
+#define MAX_LUN7
+
 struct NCR5380_cmd {
struct list_head list;
 };




[PATCH 10/22] ncr5380: Merge DMA implementation from atari_NCR5380 core driver

2016-03-13 Thread Finn Thain
Adopt the DMA implementation from atari_NCR5380.c. This means that
atari_scsi and sun3_scsi can make use of the NCR5380.c core driver
and the atari_NCR5380.c driver fork can be made redundant.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |  170 +++-
 drivers/scsi/arm/cumana_1.c |3 
 drivers/scsi/arm/oak.c  |3 
 drivers/scsi/dmx3191d.c |1 
 drivers/scsi/dtc.c  |2 
 drivers/scsi/dtc.h  |1 
 drivers/scsi/g_NCR5380.c|2 
 drivers/scsi/g_NCR5380.h|1 
 drivers/scsi/mac_scsi.c |3 
 drivers/scsi/pas16.c|2 
 drivers/scsi/pas16.h|1 
 drivers/scsi/t128.c |2 
 drivers/scsi/t128.h |1 
 13 files changed, 152 insertions(+), 40 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:37.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:39.0 +1100
@@ -31,9 +31,6 @@
 
 /*
  * Further development / testing that should be done :
- * 1.  Cleanup the NCR5380_transfer_dma function and DMA operation complete
- * code so that everything does the same thing that's done at the
- * end of a pseudo-DMA read operation.
  *
  * 4.  Test SCSI-II tagged queueing (I have no devices which support
  * tagged queueing)
@@ -117,6 +114,8 @@
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
+ * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
+ *
  * These macros MUST be defined :
  *
  * NCR5380_read(register)  - read from the specified register
@@ -801,6 +800,72 @@ static void NCR5380_main(struct work_str
} while (!done);
 }
 
+/*
+ * NCR5380_dma_complete - finish DMA transfer
+ * @instance: the scsi host instance
+ *
+ * Called by the interrupt handler when DMA finishes or a phase
+ * mismatch occurs (which would end the DMA transfer).
+ */
+
+static void NCR5380_dma_complete(struct Scsi_Host *instance)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+   int transferred;
+   unsigned char **data;
+   int *count;
+   int saved_data = 0, overrun = 0;
+   unsigned char p;
+
+   if (hostdata->read_overruns) {
+   p = hostdata->connected->SCp.phase;
+   if (p & SR_IO) {
+   udelay(10);
+   if ((NCR5380_read(BUS_AND_STATUS_REG) &
+(BASR_PHASE_MATCH | BASR_ACK)) ==
+   (BASR_PHASE_MATCH | BASR_ACK)) {
+   saved_data = NCR5380_read(INPUT_DATA_REG);
+   overrun = 1;
+   dsprintk(NDEBUG_DMA, instance, "read overrun 
handled\n");
+   }
+   }
+   }
+
+   NCR5380_write(MODE_REG, MR_BASE);
+   NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+   NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+   transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
+   hostdata->dma_len = 0;
+
+   data = (unsigned char **)>connected->SCp.ptr;
+   count = >connected->SCp.this_residual;
+   *data += transferred;
+   *count -= transferred;
+
+   if (hostdata->read_overruns) {
+   int cnt, toPIO;
+
+   if ((NCR5380_read(STATUS_REG) & PHASE_MASK) == p && (p & 
SR_IO)) {
+   cnt = toPIO = hostdata->read_overruns;
+   if (overrun) {
+   dsprintk(NDEBUG_DMA, instance,
+"Got an input overrun, using saved 
byte\n");
+   *(*data)++ = saved_data;
+   (*count)--;
+   cnt--;
+   toPIO--;
+   }
+   if (toPIO > 0) {
+   dsprintk(NDEBUG_DMA, instance,
+"Doing %d byte PIO to 0x%p\n", cnt, 
*data);
+   NCR5380_transfer_pio(instance, , , data);
+   *count -= toPIO - cnt;
+   }
+   }
+   }
+}
+
 #ifndef DONT_USE_INTR
 
 /**
@@ -855,7 +920,22 @@ static irqreturn_t NCR5380_intr(int irq,
dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 
0x%02x, MR 0x%02x\n",
 irq, basr, sr, mr);
 
-   if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+   if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+   /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+* We ack IRQ after clearing Mode Register. Workarounds
+* for End of DMA errata need to happen in DMA Mode.
+ 

[PATCH 02/22] ncr5380: Remove FLAG_NO_PSEUDO_DMA where possible

2016-03-13 Thread Finn Thain
Drivers that define PSEUDO_DMA also define NCR5380_dma_xfer_len.
The core driver must call NCR5380_dma_xfer_len which means
FLAG_NO_PSEUDO_DMA can be eradicated from the core driver.

dmx3191d doesn't define PSEUDO_DMA and has no use for FLAG_NO_PSEUDO_DMA,
so remove it there also.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |3 +--
 drivers/scsi/dmx3191d.c  |2 +-
 drivers/scsi/g_NCR5380.c |7 ++-
 drivers/scsi/g_NCR5380.h |2 +-
 drivers/scsi/mac_scsi.c  |   15 ++-
 5 files changed, 23 insertions(+), 6 deletions(-)

Index: linux/drivers/scsi/dmx3191d.c
===
--- linux.orig/drivers/scsi/dmx3191d.c  2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/dmx3191d.c   2016-03-14 15:26:22.0 +1100
@@ -93,7 +93,7 @@ static int dmx3191d_probe_one(struct pci
 */
shost->irq = NO_IRQ;
 
-   error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+   error = NCR5380_init(shost, 0);
if (error)
goto out_host_put;
 
Index: linux/drivers/scsi/mac_scsi.c
===
--- linux.orig/drivers/scsi/mac_scsi.c  2016-03-14 15:17:25.0 +1100
+++ linux/drivers/scsi/mac_scsi.c   2016-03-14 15:26:22.0 +1100
@@ -37,7 +37,9 @@
 
 #define NCR5380_pread   macscsi_pread
 #define NCR5380_pwrite  macscsi_pwrite
-#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+macscsi_dma_xfer_len(instance, cmd)
 
 #define NCR5380_intrmacscsi_intr
 #define NCR5380_queue_command   macscsi_queue_command
@@ -303,6 +305,17 @@ static int macscsi_pwrite(struct Scsi_Ho
 }
 #endif
 
+static int macscsi_dma_xfer_len(struct Scsi_Host *instance,
+struct scsi_cmnd *cmd)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+   if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
+   return 0;
+
+   return cmd->transfersize;
+}
+
 #include "NCR5380.c"
 
 #define DRV_MODULE_NAME "mac_scsi"
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:22.0 +1100
@@ -712,10 +712,15 @@ static inline int NCR5380_pwrite(struct
return 0;
 }
 
-static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+static int generic_NCR5380_dma_xfer_len(struct Scsi_Host *instance,
+struct scsi_cmnd *cmd)
 {
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transfersize = cmd->transfersize;
 
+   if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
+   return 0;
+
/* Limit transfers to 32K, for xx400 & xx406
 * pseudoDMA that transfers in 128 bytes blocks.
 */
Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:22.0 +1100
@@ -1833,8 +1833,7 @@ static void NCR5380_information_transfer
 
 #if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
transfersize = 0;
-   if (!cmd->device->borken &&
-   !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+   if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
if (transfersize) {
Index: linux/drivers/scsi/g_NCR5380.h
===
--- linux.orig/drivers/scsi/g_NCR5380.h 2016-03-14 15:26:20.0 +1100
+++ linux/drivers/scsi/g_NCR5380.h  2016-03-14 15:26:22.0 +1100
@@ -61,7 +61,7 @@
 #endif
 
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
-generic_NCR5380_dma_xfer_len(cmd)
+generic_NCR5380_dma_xfer_len(instance, cmd)
 
 #define NCR5380_intr generic_NCR5380_intr
 #define NCR5380_queue_command generic_NCR5380_queue_command




[PATCH 14/22] ncr5380: Add MAX_LUN limit

2016-03-13 Thread Finn Thain
The driver has a limit of eight LUs because of the byte-sized bitfield
that is used for busy flags. Reject commands with LUN > 7.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c |6 ++
 drivers/scsi/NCR5380.h |2 ++
 2 files changed, 8 insertions(+)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:45.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:48.0 +1100
@@ -661,6 +661,12 @@ static int NCR5380_queue_command(struct
}
 #endif /* (NDEBUG & NDEBUG_NO_WRITE) */
 
+   if (cmd->device->lun > MAX_LUN) {
+   cmd->result = DID_NO_CONNECT << 16;
+   cmd->scsi_done(cmd);
+   return 0;
+   }
+
cmd->result = 0;
 
if (!NCR5380_acquire_dma_irq(instance))
Index: linux/drivers/scsi/NCR5380.h
===
--- linux.orig/drivers/scsi/NCR5380.h   2016-03-14 15:26:45.0 +1100
+++ linux/drivers/scsi/NCR5380.h2016-03-14 15:26:48.0 +1100
@@ -244,6 +244,8 @@ struct NCR5380_hostdata {
 
 #ifdef __KERNEL__
 
+#define MAX_LUN7
+
 struct NCR5380_cmd {
struct list_head list;
 };




[PATCH 10/22] ncr5380: Merge DMA implementation from atari_NCR5380 core driver

2016-03-13 Thread Finn Thain
Adopt the DMA implementation from atari_NCR5380.c. This means that
atari_scsi and sun3_scsi can make use of the NCR5380.c core driver
and the atari_NCR5380.c driver fork can be made redundant.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |  170 +++-
 drivers/scsi/arm/cumana_1.c |3 
 drivers/scsi/arm/oak.c  |3 
 drivers/scsi/dmx3191d.c |1 
 drivers/scsi/dtc.c  |2 
 drivers/scsi/dtc.h  |1 
 drivers/scsi/g_NCR5380.c|2 
 drivers/scsi/g_NCR5380.h|1 
 drivers/scsi/mac_scsi.c |3 
 drivers/scsi/pas16.c|2 
 drivers/scsi/pas16.h|1 
 drivers/scsi/t128.c |2 
 drivers/scsi/t128.h |1 
 13 files changed, 152 insertions(+), 40 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:37.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:39.0 +1100
@@ -31,9 +31,6 @@
 
 /*
  * Further development / testing that should be done :
- * 1.  Cleanup the NCR5380_transfer_dma function and DMA operation complete
- * code so that everything does the same thing that's done at the
- * end of a pseudo-DMA read operation.
  *
  * 4.  Test SCSI-II tagged queueing (I have no devices which support
  * tagged queueing)
@@ -117,6 +114,8 @@
  *
  * PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
  *
+ * REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
+ *
  * These macros MUST be defined :
  *
  * NCR5380_read(register)  - read from the specified register
@@ -801,6 +800,72 @@ static void NCR5380_main(struct work_str
} while (!done);
 }
 
+/*
+ * NCR5380_dma_complete - finish DMA transfer
+ * @instance: the scsi host instance
+ *
+ * Called by the interrupt handler when DMA finishes or a phase
+ * mismatch occurs (which would end the DMA transfer).
+ */
+
+static void NCR5380_dma_complete(struct Scsi_Host *instance)
+{
+   struct NCR5380_hostdata *hostdata = shost_priv(instance);
+   int transferred;
+   unsigned char **data;
+   int *count;
+   int saved_data = 0, overrun = 0;
+   unsigned char p;
+
+   if (hostdata->read_overruns) {
+   p = hostdata->connected->SCp.phase;
+   if (p & SR_IO) {
+   udelay(10);
+   if ((NCR5380_read(BUS_AND_STATUS_REG) &
+(BASR_PHASE_MATCH | BASR_ACK)) ==
+   (BASR_PHASE_MATCH | BASR_ACK)) {
+   saved_data = NCR5380_read(INPUT_DATA_REG);
+   overrun = 1;
+   dsprintk(NDEBUG_DMA, instance, "read overrun 
handled\n");
+   }
+   }
+   }
+
+   NCR5380_write(MODE_REG, MR_BASE);
+   NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+   NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+   transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
+   hostdata->dma_len = 0;
+
+   data = (unsigned char **)>connected->SCp.ptr;
+   count = >connected->SCp.this_residual;
+   *data += transferred;
+   *count -= transferred;
+
+   if (hostdata->read_overruns) {
+   int cnt, toPIO;
+
+   if ((NCR5380_read(STATUS_REG) & PHASE_MASK) == p && (p & 
SR_IO)) {
+   cnt = toPIO = hostdata->read_overruns;
+   if (overrun) {
+   dsprintk(NDEBUG_DMA, instance,
+"Got an input overrun, using saved 
byte\n");
+   *(*data)++ = saved_data;
+   (*count)--;
+   cnt--;
+   toPIO--;
+   }
+   if (toPIO > 0) {
+   dsprintk(NDEBUG_DMA, instance,
+"Doing %d byte PIO to 0x%p\n", cnt, 
*data);
+   NCR5380_transfer_pio(instance, , , data);
+   *count -= toPIO - cnt;
+   }
+   }
+   }
+}
+
 #ifndef DONT_USE_INTR
 
 /**
@@ -855,7 +920,22 @@ static irqreturn_t NCR5380_intr(int irq,
dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 
0x%02x, MR 0x%02x\n",
 irq, basr, sr, mr);
 
-   if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+   if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+   /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+* We ack IRQ after clearing Mode Register. Workarounds
+* for End of DMA errata need to happen in DMA Mode.
+*/
+
+   

[PATCH 09/22] ncr5380: Adopt uniform DMA setup convention

2016-03-13 Thread Finn Thain
Standardize the DMA setup hooks so that the DMA implementation in
atari_NCR5380.c can be reconciled with pseudo DMA implementation in
NCR5380.c.

Calls to NCR5380_dma_recv_setup() and NCR5380_dma_send_setup() return
a negative value on failure, zero on PDMA transfer success and a positive
byte count for DMA setup success.

This convention is not entirely new, but is now applied consistently.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   21 ++---
 drivers/scsi/arm/cumana_1.c |   10 --
 drivers/scsi/arm/oak.c  |4 ++--
 drivers/scsi/atari_scsi.c   |3 ---
 4 files changed, 20 insertions(+), 18 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:34.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:37.0 +1100
@@ -1431,7 +1431,7 @@ static int NCR5380_transfer_dma(struct S
register unsigned char p = *phase;
register unsigned char *d = *data;
unsigned char tmp;
-   int foo;
+   int result;
 
if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
*phase = tmp;
@@ -1505,9 +1505,9 @@ static int NCR5380_transfer_dma(struct S
  */
 
if (p & SR_IO) {
-   foo = NCR5380_dma_recv_setup(instance, d,
+   result = NCR5380_dma_recv_setup(instance, d,
hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
-   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
+   if (!result && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * The workaround was to transfer fewer bytes than we
 * intended to with the pseudo-DMA read function, wait 
for
@@ -1525,19 +1525,19 @@ static int NCR5380_transfer_dma(struct S
 
if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
  BASR_DRQ, BASR_DRQ, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA read: 
DRQ timeout\n");
}
if (NCR5380_poll_politely(instance, STATUS_REG,
  SR_REQ, 0, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA read: 
!REQ timeout\n");
}
d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
} else {
-   foo = NCR5380_dma_send_setup(instance, d, c);
-   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
+   result = NCR5380_dma_send_setup(instance, d, c);
+   if (!result && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
 * the byte we're interested, we'll ACK it and it will 
go false.
@@ -1545,7 +1545,7 @@ static int NCR5380_transfer_dma(struct S
if (NCR5380_poll_politely2(instance,
 BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
 BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA write: 
DRQ and phase timeout\n");
}
}
@@ -1555,8 +1555,7 @@ static int NCR5380_transfer_dma(struct S
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
*data = d + c;
*count = 0;
-   *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-   return foo;
+   return result;
 }
 
 /*
@@ -1652,7 +1651,7 @@ static void NCR5380_information_transfer
if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
-   if (transfersize) {
+   if (transfersize > 0) {
len = transfersize;
if (NCR5380_transfer_dma(instance, 
,
, (unsigned char 
**)>SCp.ptr)) {
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:34.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:37.0 +1100
@@ -101,7 +101,10 @@ static inline int cumanascsi_pwrite(stru
   }
 end:
   writeb(priv(host)->ctrl | 0x40, priv(host)->base + CTRL);
-  return len;
+
+   if (len)
+

[PATCH 12/22] sun3_scsi: Adopt NCR5380.c core driver

2016-03-13 Thread Finn Thain
Add support for the custom Sun 3 DMA logic to the NCR5380.c core driver.
This code is copied from atari_NCR5380.c.

Signed-off-by: Finn Thain 

---

The Sun 3 DMA code is still configured by macros. I have simplified things
slightly but I have avoided more ambitious refactoring. It's not clear to
me what that should look like and I can't test sun3_scsi anyway. At least
this permits the removal of atari_NCR5380.c.

---
 drivers/scsi/NCR5380.c   |  131 +++
 drivers/scsi/sun3_scsi.c |8 +-
 2 files changed, 124 insertions(+), 15 deletions(-)

Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:26.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:44.0 +1100
@@ -51,10 +51,8 @@
 #define NCR5380_abort   sun3scsi_abort
 #define NCR5380_infosun3scsi_info
 
-#define NCR5380_dma_read_setup(instance, data, count) \
-sun3scsi_dma_setup(instance, data, count, 0)
-#define NCR5380_dma_write_setup(instance, data, count) \
-sun3scsi_dma_setup(instance, data, count, 1)
+#define NCR5380_dma_recv_setup(instance, data, count) (count)
+#define NCR5380_dma_send_setup(instance, data, count) (count)
 #define NCR5380_dma_residual(instance) \
 sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -403,7 +401,7 @@ static int sun3scsi_dma_finish(int write
 
 }

-#include "atari_NCR5380.c"
+#include "NCR5380.c"
 
 #ifdef SUN3_SCSI_VME
 #define SUN3_SCSI_NAME  "Sun3 NCR5380 VME SCSI"
Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:42.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:44.0 +1100
@@ -31,6 +31,8 @@
 
 /* Ported to Atari by Roman Hodek and others. */
 
+/* Adapted for the Sun 3 by Sam Creasey. */
+
 /*
  * Further development / testing that should be done :
  *
@@ -858,6 +860,23 @@ static void NCR5380_dma_complete(struct
}
}
 
+#ifdef CONFIG_SUN3
+   if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request {
+   pr_err("scsi%d: overrun in UDC counter -- not prepared to deal 
with this!\n",
+  instance->host_no);
+   BUG();
+   }
+
+   if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) 
==
+   (BASR_PHASE_MATCH | BASR_ACK)) {
+   pr_err("scsi%d: BASR %02x\n", instance->host_no,
+  NCR5380_read(BUS_AND_STATUS_REG));
+   pr_err("scsi%d: bus stuck in data phase -- probably a single 
byte overrun!\n",
+  instance->host_no);
+   BUG();
+   }
+#endif
+
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
@@ -981,10 +1000,16 @@ static irqreturn_t NCR5380_intr(int irq,
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
}
handled = 1;
} else {
shost_printk(KERN_NOTICE, instance, "interrupt without IRQ 
bit\n");
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
}
 
spin_unlock_irqrestore(>lock, flags);
@@ -1274,6 +1299,10 @@ static struct scsi_cmnd *NCR5380_select(
hostdata->connected = cmd;
hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_INTR;
+#endif
+
initialize_SCp(cmd);
 
cmd = NULL;
@@ -1557,6 +1586,11 @@ static int NCR5380_transfer_dma(struct S
dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address 
%p\n",
 (p & SR_IO) ? "receive" : "send", c, d);
 
+#ifdef CONFIG_SUN3
+   /* send start chain */
+   sun3scsi_dma_start(c, *data);
+#endif
+
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
MR_ENABLE_EOP_INTR);
@@ -1577,6 +1611,7 @@ static int NCR5380_transfer_dma(struct S
 */
 
if (p & SR_IO) {
+   NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_io_delay(1);
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
@@ -1587,6 +1622,13 @@ static int NCR5380_transfer_dma(struct S
NCR5380_io_delay(1);
}
 
+#ifdef CONFIG_SUN3
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
+   sun3_dma_active = 1;
+#endif
+
if 

Re: [PATCH v6 1/2] printk: Factor out buffering and irq work queuing in printk_deferred

2016-03-13 Thread Byungchul Park
On Mon, Mar 14, 2016 at 10:21:57AM +0900, Sergey Senozhatsky wrote:
> Hello Byungchul,
> 
> Sorry, I'll make sure I Cc you next time. Jan Kara's updated patch set
> 
> http://marc.info/?l=linux-kernel=145787625506342

Hello Sergey,

It would be appreciated if you or Jan Cc me from now.

> 
> it's quite close to what you have done in this patch, but Jan's
> patch also solves a number of more likely to happen cases.
> 
> have time to take a look?

I checked it now. I hope it will be accepted, then I can work mine based on
the Jan's patch.

> 
> the lock debug patch in your series is different. can we settle
> down async printk first and then return to it? it's not so simple...
> 
> 
> On (03/11/16 19:37), Byungchul Park wrote:
> [..]
> >  int printk_deferred(const char *fmt, ...)
> >  {
> > va_list args;
> > int r;
> >  
> > preempt_disable();
> > +
> > va_start(args, fmt);
> > -   r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
> > +   r = vprintk_deferred(fmt, args);
> > va_end(args);
> > +   printk_pending_output();
> >  
> > -   __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
> > -   irq_work_queue(this_cpu_ptr(_up_klogd_work));
> > preempt_enable();
> >  
> > return r;
> 
> vprintk_deferred() does vprintk_emit()->{spin_lock()} again? cosole_lock() is
> moved out of sight, but logbug_lock is still there. wouldn't this (in the 
> worst

We have to ensure the critical section by logbug_lock will not call
printk() or try to obtain the lock again. Current code works well in those
regards. logbuf_lock is not the thing we have to care when considering
the problem this patch deal with.

What do you think?

> case) result in endless loop after all? sorry if I'm missing something.

As long as we ensure it, the endless loop by logbuf_lock cannot happen.

> 
>   -ss


[PATCH 09/22] ncr5380: Adopt uniform DMA setup convention

2016-03-13 Thread Finn Thain
Standardize the DMA setup hooks so that the DMA implementation in
atari_NCR5380.c can be reconciled with pseudo DMA implementation in
NCR5380.c.

Calls to NCR5380_dma_recv_setup() and NCR5380_dma_send_setup() return
a negative value on failure, zero on PDMA transfer success and a positive
byte count for DMA setup success.

This convention is not entirely new, but is now applied consistently.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c  |   21 ++---
 drivers/scsi/arm/cumana_1.c |   10 --
 drivers/scsi/arm/oak.c  |4 ++--
 drivers/scsi/atari_scsi.c   |3 ---
 4 files changed, 20 insertions(+), 18 deletions(-)

Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:34.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:37.0 +1100
@@ -1431,7 +1431,7 @@ static int NCR5380_transfer_dma(struct S
register unsigned char p = *phase;
register unsigned char *d = *data;
unsigned char tmp;
-   int foo;
+   int result;
 
if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
*phase = tmp;
@@ -1505,9 +1505,9 @@ static int NCR5380_transfer_dma(struct S
  */
 
if (p & SR_IO) {
-   foo = NCR5380_dma_recv_setup(instance, d,
+   result = NCR5380_dma_recv_setup(instance, d,
hostdata->flags & FLAG_DMA_FIXUP ? c - 1 : c);
-   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
+   if (!result && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * The workaround was to transfer fewer bytes than we
 * intended to with the pseudo-DMA read function, wait 
for
@@ -1525,19 +1525,19 @@ static int NCR5380_transfer_dma(struct S
 
if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
  BASR_DRQ, BASR_DRQ, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA read: 
DRQ timeout\n");
}
if (NCR5380_poll_politely(instance, STATUS_REG,
  SR_REQ, 0, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA read: 
!REQ timeout\n");
}
d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
} else {
-   foo = NCR5380_dma_send_setup(instance, d, c);
-   if (!foo && (hostdata->flags & FLAG_DMA_FIXUP)) {
+   result = NCR5380_dma_send_setup(instance, d, c);
+   if (!result && (hostdata->flags & FLAG_DMA_FIXUP)) {
/*
 * Wait for the last byte to be sent.  If REQ is being 
asserted for
 * the byte we're interested, we'll ACK it and it will 
go false.
@@ -1545,7 +1545,7 @@ static int NCR5380_transfer_dma(struct S
if (NCR5380_poll_politely2(instance,
 BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
 BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
-   foo = -1;
+   result = -1;
shost_printk(KERN_ERR, instance, "PDMA write: 
DRQ and phase timeout\n");
}
}
@@ -1555,8 +1555,7 @@ static int NCR5380_transfer_dma(struct S
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
*data = d + c;
*count = 0;
-   *phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-   return foo;
+   return result;
 }
 
 /*
@@ -1652,7 +1651,7 @@ static void NCR5380_information_transfer
if (!cmd->device->borken)
transfersize = 
NCR5380_dma_xfer_len(instance, cmd, phase);
 
-   if (transfersize) {
+   if (transfersize > 0) {
len = transfersize;
if (NCR5380_transfer_dma(instance, 
,
, (unsigned char 
**)>SCp.ptr)) {
Index: linux/drivers/scsi/arm/cumana_1.c
===
--- linux.orig/drivers/scsi/arm/cumana_1.c  2016-03-14 15:26:34.0 
+1100
+++ linux/drivers/scsi/arm/cumana_1.c   2016-03-14 15:26:37.0 +1100
@@ -101,7 +101,10 @@ static inline int cumanascsi_pwrite(stru
   }
 end:
   writeb(priv(host)->ctrl | 0x40, priv(host)->base + CTRL);
-  return len;
+
+   if (len)
+   return -1;
+ 

[PATCH 12/22] sun3_scsi: Adopt NCR5380.c core driver

2016-03-13 Thread Finn Thain
Add support for the custom Sun 3 DMA logic to the NCR5380.c core driver.
This code is copied from atari_NCR5380.c.

Signed-off-by: Finn Thain 

---

The Sun 3 DMA code is still configured by macros. I have simplified things
slightly but I have avoided more ambitious refactoring. It's not clear to
me what that should look like and I can't test sun3_scsi anyway. At least
this permits the removal of atari_NCR5380.c.

---
 drivers/scsi/NCR5380.c   |  131 +++
 drivers/scsi/sun3_scsi.c |8 +-
 2 files changed, 124 insertions(+), 15 deletions(-)

Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:26.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:44.0 +1100
@@ -51,10 +51,8 @@
 #define NCR5380_abort   sun3scsi_abort
 #define NCR5380_infosun3scsi_info
 
-#define NCR5380_dma_read_setup(instance, data, count) \
-sun3scsi_dma_setup(instance, data, count, 0)
-#define NCR5380_dma_write_setup(instance, data, count) \
-sun3scsi_dma_setup(instance, data, count, 1)
+#define NCR5380_dma_recv_setup(instance, data, count) (count)
+#define NCR5380_dma_send_setup(instance, data, count) (count)
 #define NCR5380_dma_residual(instance) \
 sun3scsi_dma_residual(instance)
 #define NCR5380_dma_xfer_len(instance, cmd, phase) \
@@ -403,7 +401,7 @@ static int sun3scsi_dma_finish(int write
 
 }

-#include "atari_NCR5380.c"
+#include "NCR5380.c"
 
 #ifdef SUN3_SCSI_VME
 #define SUN3_SCSI_NAME  "Sun3 NCR5380 VME SCSI"
Index: linux/drivers/scsi/NCR5380.c
===
--- linux.orig/drivers/scsi/NCR5380.c   2016-03-14 15:26:42.0 +1100
+++ linux/drivers/scsi/NCR5380.c2016-03-14 15:26:44.0 +1100
@@ -31,6 +31,8 @@
 
 /* Ported to Atari by Roman Hodek and others. */
 
+/* Adapted for the Sun 3 by Sam Creasey. */
+
 /*
  * Further development / testing that should be done :
  *
@@ -858,6 +860,23 @@ static void NCR5380_dma_complete(struct
}
}
 
+#ifdef CONFIG_SUN3
+   if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request {
+   pr_err("scsi%d: overrun in UDC counter -- not prepared to deal 
with this!\n",
+  instance->host_no);
+   BUG();
+   }
+
+   if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) 
==
+   (BASR_PHASE_MATCH | BASR_ACK)) {
+   pr_err("scsi%d: BASR %02x\n", instance->host_no,
+  NCR5380_read(BUS_AND_STATUS_REG));
+   pr_err("scsi%d: bus stuck in data phase -- probably a single 
byte overrun!\n",
+  instance->host_no);
+   BUG();
+   }
+#endif
+
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
@@ -981,10 +1000,16 @@ static irqreturn_t NCR5380_intr(int irq,
NCR5380_read(RESET_PARITY_INTERRUPT_REG);
 
dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
}
handled = 1;
} else {
shost_printk(KERN_NOTICE, instance, "interrupt without IRQ 
bit\n");
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
}
 
spin_unlock_irqrestore(>lock, flags);
@@ -1274,6 +1299,10 @@ static struct scsi_cmnd *NCR5380_select(
hostdata->connected = cmd;
hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
 
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_INTR;
+#endif
+
initialize_SCp(cmd);
 
cmd = NULL;
@@ -1557,6 +1586,11 @@ static int NCR5380_transfer_dma(struct S
dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address 
%p\n",
 (p & SR_IO) ? "receive" : "send", c, d);
 
+#ifdef CONFIG_SUN3
+   /* send start chain */
+   sun3scsi_dma_start(c, *data);
+#endif
+
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
MR_ENABLE_EOP_INTR);
@@ -1577,6 +1611,7 @@ static int NCR5380_transfer_dma(struct S
 */
 
if (p & SR_IO) {
+   NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_io_delay(1);
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
@@ -1587,6 +1622,13 @@ static int NCR5380_transfer_dma(struct S
NCR5380_io_delay(1);
}
 
+#ifdef CONFIG_SUN3
+#ifdef SUN3_SCSI_VME
+   dregs->csr |= CSR_DMA_ENABLE;
+#endif
+   sun3_dma_active = 1;
+#endif
+
if (hostdata->flags & FLAG_LATE_DMA_SETUP) 

Re: [PATCH v6 1/2] printk: Factor out buffering and irq work queuing in printk_deferred

2016-03-13 Thread Byungchul Park
On Mon, Mar 14, 2016 at 10:21:57AM +0900, Sergey Senozhatsky wrote:
> Hello Byungchul,
> 
> Sorry, I'll make sure I Cc you next time. Jan Kara's updated patch set
> 
> http://marc.info/?l=linux-kernel=145787625506342

Hello Sergey,

It would be appreciated if you or Jan Cc me from now.

> 
> it's quite close to what you have done in this patch, but Jan's
> patch also solves a number of more likely to happen cases.
> 
> have time to take a look?

I checked it now. I hope it will be accepted, then I can work mine based on
the Jan's patch.

> 
> the lock debug patch in your series is different. can we settle
> down async printk first and then return to it? it's not so simple...
> 
> 
> On (03/11/16 19:37), Byungchul Park wrote:
> [..]
> >  int printk_deferred(const char *fmt, ...)
> >  {
> > va_list args;
> > int r;
> >  
> > preempt_disable();
> > +
> > va_start(args, fmt);
> > -   r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
> > +   r = vprintk_deferred(fmt, args);
> > va_end(args);
> > +   printk_pending_output();
> >  
> > -   __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
> > -   irq_work_queue(this_cpu_ptr(_up_klogd_work));
> > preempt_enable();
> >  
> > return r;
> 
> vprintk_deferred() does vprintk_emit()->{spin_lock()} again? cosole_lock() is
> moved out of sight, but logbug_lock is still there. wouldn't this (in the 
> worst

We have to ensure the critical section by logbug_lock will not call
printk() or try to obtain the lock again. Current code works well in those
regards. logbuf_lock is not the thing we have to care when considering
the problem this patch deal with.

What do you think?

> case) result in endless loop after all? sorry if I'm missing something.

As long as we ensure it, the endless loop by logbuf_lock cannot happen.

> 
>   -ss


[PATCH 19/22] ncr5380: Update usage documentation

2016-03-13 Thread Finn Thain
Update kernel parameter documentation for atari_scsi, mac_scsi and
g_NCR5380 drivers. Remove duplication.

Signed-off-by: Finn Thain 

---
 Documentation/scsi/g_NCR5380.txt   |   17 ++-
 Documentation/scsi/scsi-parameters.txt |   11 +++---
 drivers/scsi/g_NCR5380.c   |   36 -
 3 files changed, 16 insertions(+), 48 deletions(-)

Index: linux/Documentation/scsi/scsi-parameters.txt
===
--- linux.orig/Documentation/scsi/scsi-parameters.txt   2016-03-14 
15:17:24.0 +1100
+++ linux/Documentation/scsi/scsi-parameters.txt2016-03-14 
15:26:54.0 +1100
@@ -27,13 +27,15 @@ parameters may be changed at runtime by
aic79xx=[HW,SCSI]
See Documentation/scsi/aic79xx.txt.
 
-   atascsi=[HW,SCSI] Atari SCSI
+   atascsi=[HW,SCSI]
+   See drivers/scsi/atari_scsi.c.
 
BusLogic=   [HW,SCSI]
See drivers/scsi/BusLogic.c, comment before function
BusLogic_ParseDriverOptions().
 
dtc3181e=   [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
eata=   [HW,SCSI]
 
@@ -51,8 +53,8 @@ parameters may be changed at runtime by
ips=[HW,SCSI] Adaptec / IBM ServeRAID controller
See header of drivers/scsi/ips.c.
 
-   mac5380=[HW,SCSI] Format:
-   

+   mac5380=[HW,SCSI]
+   See drivers/scsi/mac_scsi.c.
 
max_luns=   [SCSI] Maximum number of LUNs to probe.
Should be between 1 and 2^32-1.
@@ -65,10 +67,13 @@ parameters may be changed at runtime by
See header of drivers/scsi/NCR_D700.c.
 
ncr5380=[HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c400=  [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c400a= [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c406a= [HW,SCSI]
 
Index: linux/Documentation/scsi/g_NCR5380.txt
===
--- linux.orig/Documentation/scsi/g_NCR5380.txt 2016-03-14 15:17:24.0 
+1100
+++ linux/Documentation/scsi/g_NCR5380.txt  2016-03-14 15:26:54.0 
+1100
@@ -23,11 +23,10 @@ supported by the driver.
 
 If the default configuration does not work for you, you can use the kernel
 command lines (eg using the lilo append command):
-   ncr5380=port,irq,dma
-   ncr53c400=port,irq
-or
-   ncr5380=base,irq,dma
-   ncr53c400=base,irq
+   ncr5380=addr,irq
+   ncr53c400=addr,irq
+   ncr53c400a=addr,irq
+   dtc3181e=addr,irq
 
 The driver does not probe for any addresses or ports other than those in
 the OVERRIDE or given to the kernel as above.
@@ -36,19 +35,17 @@ This driver provides some information on
 /proc/scsi/g_NCR5380/x where x is the scsi card number as detected at boot
 time. More info to come in the future.
 
-When NCR53c400 support is compiled in, BIOS parameters will be returned by
-the driver (the raw 5380 driver does not and I don't plan to fiddle with
-it!).
-
 This driver works as a module.
 When included as a module, parameters can be passed on the insmod/modprobe
 command line:
   ncr_irq=xx   the interrupt
   ncr_addr=xx  the port or base address (for port or memory
mapped, resp.)
-  ncr_dma=xx   the DMA
   ncr_5380=1   to set up for a NCR5380 board
   ncr_53c400=1 to set up for a NCR53C400 board
+  ncr_53c400a=1 to set up for a NCR53C400A board
+  dtc_3181e=1  to set up for a Domex Technology Corp 3181E board
+  hp_c2502=1   to set up for a Hewlett Packard C2502 board
 e.g.
 modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
   for a port mapped NCR5380 board or
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:52.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:54.0 +1100
@@ -18,42 +18,8 @@
  *
  * Added ISAPNP support for DTC436 adapters,
  * Thomas Sailer, sai...@ife.ee.ethz.ch
- */
-
-/* 
- * TODO : flesh out DMA support, find some one actually using this (I have
- * a memory mapped Trantor board that works fine)
- */
-
-/*
- * The card is detected and initialized in one of several ways : 
- * 1.  With command line overrides - NCR5380=port,irq may be 
- * used on the LILO command line to override the defaults.
- *
- * 2.  With the GENERIC_NCR5380_OVERRIDE compile time define.  This is 
- * specified as an array of address, irq, dma, board tuples.  Ie, for
- * one board at 0x350, IRQ5, no dma, I could say  
- * -DGENERIC_NCR5380_OVERRIDE={{0xcc000, 5, 

[PATCH 19/22] ncr5380: Update usage documentation

2016-03-13 Thread Finn Thain
Update kernel parameter documentation for atari_scsi, mac_scsi and
g_NCR5380 drivers. Remove duplication.

Signed-off-by: Finn Thain 

---
 Documentation/scsi/g_NCR5380.txt   |   17 ++-
 Documentation/scsi/scsi-parameters.txt |   11 +++---
 drivers/scsi/g_NCR5380.c   |   36 -
 3 files changed, 16 insertions(+), 48 deletions(-)

Index: linux/Documentation/scsi/scsi-parameters.txt
===
--- linux.orig/Documentation/scsi/scsi-parameters.txt   2016-03-14 
15:17:24.0 +1100
+++ linux/Documentation/scsi/scsi-parameters.txt2016-03-14 
15:26:54.0 +1100
@@ -27,13 +27,15 @@ parameters may be changed at runtime by
aic79xx=[HW,SCSI]
See Documentation/scsi/aic79xx.txt.
 
-   atascsi=[HW,SCSI] Atari SCSI
+   atascsi=[HW,SCSI]
+   See drivers/scsi/atari_scsi.c.
 
BusLogic=   [HW,SCSI]
See drivers/scsi/BusLogic.c, comment before function
BusLogic_ParseDriverOptions().
 
dtc3181e=   [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
eata=   [HW,SCSI]
 
@@ -51,8 +53,8 @@ parameters may be changed at runtime by
ips=[HW,SCSI] Adaptec / IBM ServeRAID controller
See header of drivers/scsi/ips.c.
 
-   mac5380=[HW,SCSI] Format:
-   

+   mac5380=[HW,SCSI]
+   See drivers/scsi/mac_scsi.c.
 
max_luns=   [SCSI] Maximum number of LUNs to probe.
Should be between 1 and 2^32-1.
@@ -65,10 +67,13 @@ parameters may be changed at runtime by
See header of drivers/scsi/NCR_D700.c.
 
ncr5380=[HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c400=  [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c400a= [HW,SCSI]
+   See Documentation/scsi/g_NCR5380.txt.
 
ncr53c406a= [HW,SCSI]
 
Index: linux/Documentation/scsi/g_NCR5380.txt
===
--- linux.orig/Documentation/scsi/g_NCR5380.txt 2016-03-14 15:17:24.0 
+1100
+++ linux/Documentation/scsi/g_NCR5380.txt  2016-03-14 15:26:54.0 
+1100
@@ -23,11 +23,10 @@ supported by the driver.
 
 If the default configuration does not work for you, you can use the kernel
 command lines (eg using the lilo append command):
-   ncr5380=port,irq,dma
-   ncr53c400=port,irq
-or
-   ncr5380=base,irq,dma
-   ncr53c400=base,irq
+   ncr5380=addr,irq
+   ncr53c400=addr,irq
+   ncr53c400a=addr,irq
+   dtc3181e=addr,irq
 
 The driver does not probe for any addresses or ports other than those in
 the OVERRIDE or given to the kernel as above.
@@ -36,19 +35,17 @@ This driver provides some information on
 /proc/scsi/g_NCR5380/x where x is the scsi card number as detected at boot
 time. More info to come in the future.
 
-When NCR53c400 support is compiled in, BIOS parameters will be returned by
-the driver (the raw 5380 driver does not and I don't plan to fiddle with
-it!).
-
 This driver works as a module.
 When included as a module, parameters can be passed on the insmod/modprobe
 command line:
   ncr_irq=xx   the interrupt
   ncr_addr=xx  the port or base address (for port or memory
mapped, resp.)
-  ncr_dma=xx   the DMA
   ncr_5380=1   to set up for a NCR5380 board
   ncr_53c400=1 to set up for a NCR53C400 board
+  ncr_53c400a=1 to set up for a NCR53C400A board
+  dtc_3181e=1  to set up for a Domex Technology Corp 3181E board
+  hp_c2502=1   to set up for a Hewlett Packard C2502 board
 e.g.
 modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
   for a port mapped NCR5380 board or
Index: linux/drivers/scsi/g_NCR5380.c
===
--- linux.orig/drivers/scsi/g_NCR5380.c 2016-03-14 15:26:52.0 +1100
+++ linux/drivers/scsi/g_NCR5380.c  2016-03-14 15:26:54.0 +1100
@@ -18,42 +18,8 @@
  *
  * Added ISAPNP support for DTC436 adapters,
  * Thomas Sailer, sai...@ife.ee.ethz.ch
- */
-
-/* 
- * TODO : flesh out DMA support, find some one actually using this (I have
- * a memory mapped Trantor board that works fine)
- */
-
-/*
- * The card is detected and initialized in one of several ways : 
- * 1.  With command line overrides - NCR5380=port,irq may be 
- * used on the LILO command line to override the defaults.
- *
- * 2.  With the GENERIC_NCR5380_OVERRIDE compile time define.  This is 
- * specified as an array of address, irq, dma, board tuples.  Ie, for
- * one board at 0x350, IRQ5, no dma, I could say  
- * -DGENERIC_NCR5380_OVERRIDE={{0xcc000, 5, DMA_NONE, BOARD_NCR5380}}
- 

[PATCH 15/22] dmx3191d: Drop max_sectors limit

2016-03-13 Thread Finn Thain
The dmx3191d driver is not capable of DMA or PDMA so all transfers
use PIO. Now that large slow PIO transfers periodically stop and call
cond_resched(), the max_sectors limit can go away.

Signed-off-by: Finn Thain 

---
 drivers/scsi/dmx3191d.c |1 -
 1 file changed, 1 deletion(-)

Index: linux/drivers/scsi/dmx3191d.c
===
--- linux.orig/drivers/scsi/dmx3191d.c  2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/dmx3191d.c   2016-03-14 15:26:49.0 +1100
@@ -67,7 +67,6 @@ static struct scsi_host_template dmx3191
.cmd_per_lun= 2,
.use_clustering = DISABLE_CLUSTERING,
.cmd_size   = NCR5380_CMD_SIZE,
-   .max_sectors= 128,
 };
 
 static int dmx3191d_probe_one(struct pci_dev *pdev,




[PATCH] usb: dwc3: add disable receiver detection in P3 quirk

2016-03-13 Thread Rajesh Bhagat
Some freescale QorIQ platforms require to disable receiver detection
in P3 for correct detection of USB devices. If GUSB3PIPECTL(DISRXDETINP3)
is set, Core will change PHY power state to P2 and then perform receiver 
detection. After receiver detection, Core will change PHY power state to 
P3. Same quirk would be added in dts file in future patches. 

Signed-off-by: Sriram Dash 
Signed-off-by: Rajesh Bhagat 
---
 drivers/usb/dwc3/core.c  |6 ++
 drivers/usb/dwc3/core.h  |2 ++
 drivers/usb/dwc3/platform_data.h |1 +
 3 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index de5e01f..b2f2b08 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -446,6 +446,9 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
if (dwc->u2ss_inp3_quirk)
reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK;
 
+   if (dwc->dis_rxdet_inp3_quirk)
+   reg |= DWC3_GUSB3PIPECTL_DISRXDETINP3;
+
if (dwc->req_p1p2p3_quirk)
reg |= DWC3_GUSB3PIPECTL_REQP1P2P3;
 
@@ -903,6 +906,8 @@ static int dwc3_probe(struct platform_device *pdev)
"snps,u2exit_lfps_quirk");
dwc->u2ss_inp3_quirk = device_property_read_bool(dev,
"snps,u2ss_inp3_quirk");
+   dwc->dis_rxdet_inp3_quirk = device_property_read_bool(dev,
+   "snps,dis_rxdet_inp3_quirk");
dwc->req_p1p2p3_quirk = device_property_read_bool(dev,
"snps,req_p1p2p3_quirk");
dwc->del_p1p2p3_quirk = device_property_read_bool(dev,
@@ -945,6 +950,7 @@ static int dwc3_probe(struct platform_device *pdev)
dwc->disable_scramble_quirk = pdata->disable_scramble_quirk;
dwc->u2exit_lfps_quirk = pdata->u2exit_lfps_quirk;
dwc->u2ss_inp3_quirk = pdata->u2ss_inp3_quirk;
+   dwc->dis_rxdet_inp3_quirk = pdata->dis_rxdet_inp3_quirk;
dwc->req_p1p2p3_quirk = pdata->req_p1p2p3_quirk;
dwc->del_p1p2p3_quirk = pdata->del_p1p2p3_quirk;
dwc->del_phy_power_chg_quirk = pdata->del_phy_power_chg_quirk;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index e4f8b90..41cc22c 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -193,6 +193,7 @@
 /* Global USB3 PIPE Control Register */
 #define DWC3_GUSB3PIPECTL_PHYSOFTRST   (1 << 31)
 #define DWC3_GUSB3PIPECTL_U2SSINP3OK   (1 << 29)
+#define DWC3_GUSB3PIPECTL_DISRXDETINP3 (1 << 28)
 #define DWC3_GUSB3PIPECTL_REQP1P2P3(1 << 24)
 #define DWC3_GUSB3PIPECTL_DEP1P2P3(n)  ((n) << 19)
 #define DWC3_GUSB3PIPECTL_DEP1P2P3_MASKDWC3_GUSB3PIPECTL_DEP1P2P3(7)
@@ -873,6 +874,7 @@ struct dwc3 {
 
unsignedtx_de_emphasis_quirk:1;
unsignedtx_de_emphasis:2;
+   unsigneddis_rxdet_inp3_quirk:1;
 };
 
 /* -- 
*/
diff --git a/drivers/usb/dwc3/platform_data.h b/drivers/usb/dwc3/platform_data.h
index 2bb4d3a..9df1dfb 100644
--- a/drivers/usb/dwc3/platform_data.h
+++ b/drivers/usb/dwc3/platform_data.h
@@ -46,6 +46,7 @@ struct dwc3_platform_data {
 
unsigned tx_de_emphasis_quirk:1;
unsigned tx_de_emphasis:2;
+   unsigned dis_rxdet_inp3_quirk:1;
 
u32 fladj_value;
 
-- 
1.7.7.4



[PATCH 13/22] ncr5380: Remove disused atari_NCR5380.c core driver

2016-03-13 Thread Finn Thain
Now that atari_scsi and sun3_scsi have been converted to use the NCR5380.c
core driver, remove atari_NCR5380.c. Also remove the last vestiges of its
Tagged Command Queueing implementation from the wrapper drivers.

The TCQ support in atari_NCR5380.c is abandoned by this patch. It is not
merged into the remaining core driver because,

1) atari_scsi defines SUPPORT_TAGS but leaves FLAG_TAGGED_QUEUING disabled
by default, which indicates that it is mostly undesirable.

2) I'm told that it doesn't work correctly when enabled.

3) The algorithm does not make use of block layer tags which it will have
to do because scmd->tag is deprecated.

4) sun3_scsi doesn't define SUPPORT_TAGS at all, yet the the SUPPORT_TAGS
macro interacts with the CONFIG_SUN3 macro in 'interesting' ways.

5) Compile-time configuration with macros like SUPPORT_TAGS caused the
configuration space to explode, leading to untestable and unmaintainable
code that is too hard to reason about.

The merge_contiguous_buffers() code is also abandoned. This was unused
by sun3_scsi. Only atari_scsi used it and then only on TT, because only TT
supports scatter/gather. I suspect that the TT would work fine with
ENABLE_CLUSTERING instead. If someone can benchmark the difference then
perhaps the merge_contiguous_buffers() code can be be justified. Until
then we are better off without the extra complexity.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |   22 
 drivers/scsi/NCR5380.h   |   19 
 drivers/scsi/atari_NCR5380.c | 2632 ---
 drivers/scsi/atari_scsi.c|   11 
 drivers/scsi/mac_scsi.c  |8 
 drivers/scsi/sun3_scsi.c |   11 
 6 files changed, 4 insertions(+), 2699 deletions(-)

Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:42.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:45.0 +1100
@@ -85,9 +85,6 @@
 
 /* Definitions for the core NCR5380 driver. */
 
-#define SUPPORT_TAGS
-#define MAX_TAGS32
-
 #define NCR5380_implementation_fields   /* none */
 
 #define NCR5380_read(reg)   atari_scsi_reg_read(reg)
@@ -187,8 +184,6 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-static int setup_use_tagged_queuing = -1;
-module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 static int setup_toshiba_delay = -1;
@@ -477,8 +472,7 @@ static int __init atari_scsi_setup(char
setup_sg_tablesize = ints[3];
if (ints[0] >= 4)
setup_hostid = ints[4];
-   if (ints[0] >= 5)
-   setup_use_tagged_queuing = ints[5];
+   /* ints[5] (use_tagged_queuing) is ignored */
/* ints[6] (use_pdma) is ignored */
if (ints[0] >= 7)
setup_toshiba_delay = ints[7];
@@ -851,9 +845,6 @@ static int __init atari_scsi_probe(struc
instance->irq = irq->start;
 
host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-#ifdef SUPPORT_TAGS
-   host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
-#endif
host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
error = NCR5380_init(instance, host_flags);
Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:44.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:45.0 +1100
@@ -38,9 +38,6 @@
 
 /* Definitions for the core NCR5380 driver. */
 
-/* #define SUPPORT_TAGS */
-/* #define MAX_TAGS 32 */
-
 #define NCR5380_implementation_fields   /* none */
 
 #define NCR5380_read(reg)   sun3scsi_read(reg)
@@ -72,10 +69,6 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
-static int setup_use_tagged_queuing = -1;
-module_param(setup_use_tagged_queuing, int, 0);
-#endif
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 
@@ -509,10 +502,6 @@ static int __init sun3_scsi_probe(struct
instance->io_port = (unsigned long)ioaddr;
instance->irq = irq->start;
 
-#ifdef SUPPORT_TAGS
-   host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
-#endif
-
error = NCR5380_init(instance, host_flags);
if (error)
goto fail_init;
Index: linux/drivers/scsi/mac_scsi.c
===
--- linux.orig/drivers/scsi/mac_scsi.c  2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/mac_scsi.c   2016-03-14 15:26:45.0 +1100
@@ -55,8 +55,6 

[PATCH 15/22] dmx3191d: Drop max_sectors limit

2016-03-13 Thread Finn Thain
The dmx3191d driver is not capable of DMA or PDMA so all transfers
use PIO. Now that large slow PIO transfers periodically stop and call
cond_resched(), the max_sectors limit can go away.

Signed-off-by: Finn Thain 

---
 drivers/scsi/dmx3191d.c |1 -
 1 file changed, 1 deletion(-)

Index: linux/drivers/scsi/dmx3191d.c
===
--- linux.orig/drivers/scsi/dmx3191d.c  2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/dmx3191d.c   2016-03-14 15:26:49.0 +1100
@@ -67,7 +67,6 @@ static struct scsi_host_template dmx3191
.cmd_per_lun= 2,
.use_clustering = DISABLE_CLUSTERING,
.cmd_size   = NCR5380_CMD_SIZE,
-   .max_sectors= 128,
 };
 
 static int dmx3191d_probe_one(struct pci_dev *pdev,




[PATCH] usb: dwc3: add disable receiver detection in P3 quirk

2016-03-13 Thread Rajesh Bhagat
Some freescale QorIQ platforms require to disable receiver detection
in P3 for correct detection of USB devices. If GUSB3PIPECTL(DISRXDETINP3)
is set, Core will change PHY power state to P2 and then perform receiver 
detection. After receiver detection, Core will change PHY power state to 
P3. Same quirk would be added in dts file in future patches. 

Signed-off-by: Sriram Dash 
Signed-off-by: Rajesh Bhagat 
---
 drivers/usb/dwc3/core.c  |6 ++
 drivers/usb/dwc3/core.h  |2 ++
 drivers/usb/dwc3/platform_data.h |1 +
 3 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index de5e01f..b2f2b08 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -446,6 +446,9 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
if (dwc->u2ss_inp3_quirk)
reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK;
 
+   if (dwc->dis_rxdet_inp3_quirk)
+   reg |= DWC3_GUSB3PIPECTL_DISRXDETINP3;
+
if (dwc->req_p1p2p3_quirk)
reg |= DWC3_GUSB3PIPECTL_REQP1P2P3;
 
@@ -903,6 +906,8 @@ static int dwc3_probe(struct platform_device *pdev)
"snps,u2exit_lfps_quirk");
dwc->u2ss_inp3_quirk = device_property_read_bool(dev,
"snps,u2ss_inp3_quirk");
+   dwc->dis_rxdet_inp3_quirk = device_property_read_bool(dev,
+   "snps,dis_rxdet_inp3_quirk");
dwc->req_p1p2p3_quirk = device_property_read_bool(dev,
"snps,req_p1p2p3_quirk");
dwc->del_p1p2p3_quirk = device_property_read_bool(dev,
@@ -945,6 +950,7 @@ static int dwc3_probe(struct platform_device *pdev)
dwc->disable_scramble_quirk = pdata->disable_scramble_quirk;
dwc->u2exit_lfps_quirk = pdata->u2exit_lfps_quirk;
dwc->u2ss_inp3_quirk = pdata->u2ss_inp3_quirk;
+   dwc->dis_rxdet_inp3_quirk = pdata->dis_rxdet_inp3_quirk;
dwc->req_p1p2p3_quirk = pdata->req_p1p2p3_quirk;
dwc->del_p1p2p3_quirk = pdata->del_p1p2p3_quirk;
dwc->del_phy_power_chg_quirk = pdata->del_phy_power_chg_quirk;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index e4f8b90..41cc22c 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -193,6 +193,7 @@
 /* Global USB3 PIPE Control Register */
 #define DWC3_GUSB3PIPECTL_PHYSOFTRST   (1 << 31)
 #define DWC3_GUSB3PIPECTL_U2SSINP3OK   (1 << 29)
+#define DWC3_GUSB3PIPECTL_DISRXDETINP3 (1 << 28)
 #define DWC3_GUSB3PIPECTL_REQP1P2P3(1 << 24)
 #define DWC3_GUSB3PIPECTL_DEP1P2P3(n)  ((n) << 19)
 #define DWC3_GUSB3PIPECTL_DEP1P2P3_MASKDWC3_GUSB3PIPECTL_DEP1P2P3(7)
@@ -873,6 +874,7 @@ struct dwc3 {
 
unsignedtx_de_emphasis_quirk:1;
unsignedtx_de_emphasis:2;
+   unsigneddis_rxdet_inp3_quirk:1;
 };
 
 /* -- 
*/
diff --git a/drivers/usb/dwc3/platform_data.h b/drivers/usb/dwc3/platform_data.h
index 2bb4d3a..9df1dfb 100644
--- a/drivers/usb/dwc3/platform_data.h
+++ b/drivers/usb/dwc3/platform_data.h
@@ -46,6 +46,7 @@ struct dwc3_platform_data {
 
unsigned tx_de_emphasis_quirk:1;
unsigned tx_de_emphasis:2;
+   unsigned dis_rxdet_inp3_quirk:1;
 
u32 fladj_value;
 
-- 
1.7.7.4



[PATCH 13/22] ncr5380: Remove disused atari_NCR5380.c core driver

2016-03-13 Thread Finn Thain
Now that atari_scsi and sun3_scsi have been converted to use the NCR5380.c
core driver, remove atari_NCR5380.c. Also remove the last vestiges of its
Tagged Command Queueing implementation from the wrapper drivers.

The TCQ support in atari_NCR5380.c is abandoned by this patch. It is not
merged into the remaining core driver because,

1) atari_scsi defines SUPPORT_TAGS but leaves FLAG_TAGGED_QUEUING disabled
by default, which indicates that it is mostly undesirable.

2) I'm told that it doesn't work correctly when enabled.

3) The algorithm does not make use of block layer tags which it will have
to do because scmd->tag is deprecated.

4) sun3_scsi doesn't define SUPPORT_TAGS at all, yet the the SUPPORT_TAGS
macro interacts with the CONFIG_SUN3 macro in 'interesting' ways.

5) Compile-time configuration with macros like SUPPORT_TAGS caused the
configuration space to explode, leading to untestable and unmaintainable
code that is too hard to reason about.

The merge_contiguous_buffers() code is also abandoned. This was unused
by sun3_scsi. Only atari_scsi used it and then only on TT, because only TT
supports scatter/gather. I suspect that the TT would work fine with
ENABLE_CLUSTERING instead. If someone can benchmark the difference then
perhaps the merge_contiguous_buffers() code can be be justified. Until
then we are better off without the extra complexity.

Signed-off-by: Finn Thain 

---
 drivers/scsi/NCR5380.c   |   22 
 drivers/scsi/NCR5380.h   |   19 
 drivers/scsi/atari_NCR5380.c | 2632 ---
 drivers/scsi/atari_scsi.c|   11 
 drivers/scsi/mac_scsi.c  |8 
 drivers/scsi/sun3_scsi.c |   11 
 6 files changed, 4 insertions(+), 2699 deletions(-)

Index: linux/drivers/scsi/atari_scsi.c
===
--- linux.orig/drivers/scsi/atari_scsi.c2016-03-14 15:26:42.0 
+1100
+++ linux/drivers/scsi/atari_scsi.c 2016-03-14 15:26:45.0 +1100
@@ -85,9 +85,6 @@
 
 /* Definitions for the core NCR5380 driver. */
 
-#define SUPPORT_TAGS
-#define MAX_TAGS32
-
 #define NCR5380_implementation_fields   /* none */
 
 #define NCR5380_read(reg)   atari_scsi_reg_read(reg)
@@ -187,8 +184,6 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-static int setup_use_tagged_queuing = -1;
-module_param(setup_use_tagged_queuing, int, 0);
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 static int setup_toshiba_delay = -1;
@@ -477,8 +472,7 @@ static int __init atari_scsi_setup(char
setup_sg_tablesize = ints[3];
if (ints[0] >= 4)
setup_hostid = ints[4];
-   if (ints[0] >= 5)
-   setup_use_tagged_queuing = ints[5];
+   /* ints[5] (use_tagged_queuing) is ignored */
/* ints[6] (use_pdma) is ignored */
if (ints[0] >= 7)
setup_toshiba_delay = ints[7];
@@ -851,9 +845,6 @@ static int __init atari_scsi_probe(struc
instance->irq = irq->start;
 
host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-#ifdef SUPPORT_TAGS
-   host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
-#endif
host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
 
error = NCR5380_init(instance, host_flags);
Index: linux/drivers/scsi/sun3_scsi.c
===
--- linux.orig/drivers/scsi/sun3_scsi.c 2016-03-14 15:26:44.0 +1100
+++ linux/drivers/scsi/sun3_scsi.c  2016-03-14 15:26:45.0 +1100
@@ -38,9 +38,6 @@
 
 /* Definitions for the core NCR5380 driver. */
 
-/* #define SUPPORT_TAGS */
-/* #define MAX_TAGS 32 */
-
 #define NCR5380_implementation_fields   /* none */
 
 #define NCR5380_read(reg)   sun3scsi_read(reg)
@@ -72,10 +69,6 @@ static int setup_cmd_per_lun = -1;
 module_param(setup_cmd_per_lun, int, 0);
 static int setup_sg_tablesize = -1;
 module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
-static int setup_use_tagged_queuing = -1;
-module_param(setup_use_tagged_queuing, int, 0);
-#endif
 static int setup_hostid = -1;
 module_param(setup_hostid, int, 0);
 
@@ -509,10 +502,6 @@ static int __init sun3_scsi_probe(struct
instance->io_port = (unsigned long)ioaddr;
instance->irq = irq->start;
 
-#ifdef SUPPORT_TAGS
-   host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
-#endif
-
error = NCR5380_init(instance, host_flags);
if (error)
goto fail_init;
Index: linux/drivers/scsi/mac_scsi.c
===
--- linux.orig/drivers/scsi/mac_scsi.c  2016-03-14 15:26:39.0 +1100
+++ linux/drivers/scsi/mac_scsi.c   2016-03-14 15:26:45.0 +1100
@@ -55,8 +55,6 @@ static int 

  1   2   3   4   5   6   7   >