Re: [linux-sunxi] [PATCH 1/5] spi: sunxi: fix transfer timeout

2016-05-26 Thread Julian Calaby
Hi Michal,

On Fri, May 27, 2016 at 5:25 AM, Michal Suchanek  wrote:
> The trasfer timeout is fixed at 1000 ms. Reading a 4Mbyte flash over
> 1MHz SPI bus takes way longer than that. Calculate the timeout from the
> actual time the transfer is supposed to take and multiply by 2 for good
> measure.
>
> Signed-off-by: Michal Suchanek 
> ---
>
> v2:
> - fix build error
> - use unsigned instead of int in max_t
> - use tfr->speed_hz instead of sspi->max_speed_hz
> ---
>  drivers/spi/spi-sun4i.c | 11 ++-
>  drivers/spi/spi-sun6i.c | 11 ++-
>  2 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
> index 1ddd9e2..fe63bbd 100644
> --- a/drivers/spi/spi-sun4i.c
> +++ b/drivers/spi/spi-sun4i.c
> @@ -173,6 +173,7 @@ static int sun4i_spi_transfer_one(struct spi_master 
> *master,
>  {
> struct sun4i_spi *sspi = spi_master_get_devdata(master);
> unsigned int mclk_rate, div, timeout;
> +   unsigned int start, end, tx_time;
> unsigned int tx_len = 0;
> int ret = 0;
> u32 reg;
> @@ -279,9 +280,17 @@ static int sun4i_spi_transfer_one(struct spi_master 
> *master,
> reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
> sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
>
> +   tx_time = max_t(unsigned, tfr->len * 8 * 2 / (tfr->speed_hz / 1000),

You should probably use "unsigned int" instead of just "unsigned" here.

> +   100);
> +   start = jiffies;
> timeout = wait_for_completion_timeout(>done,
> - msecs_to_jiffies(1000));
> + msecs_to_jiffies(tx_time));
> +   end = jiffies;
> if (!timeout) {
> +   dev_warn(>dev,
> +"%s: timeout transferring %u bytes@%iHz for 
> %i(%i)ms",
> +dev_name(>dev), tfr->len, tfr->speed_hz,
> +jiffies_to_msecs(end - start), tx_time);

Should the debug changes be in a separate patch?

> ret = -ETIMEDOUT;
> goto out;
> }
> diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
> index 42e2c4b..8be5c5c 100644
> --- a/drivers/spi/spi-sun6i.c
> +++ b/drivers/spi/spi-sun6i.c
> @@ -162,6 +162,7 @@ static int sun6i_spi_transfer_one(struct spi_master 
> *master,
> unsigned int mclk_rate, div, timeout;
> unsigned int tx_len = 0;
> int ret = 0;
> +   unsigned int start, end, tx_time;
> u32 reg;
>
> /* We don't support transfer larger than the FIFO */
> @@ -269,9 +270,17 @@ static int sun6i_spi_transfer_one(struct spi_master 
> *master,
> reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
> sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
>
> +   tx_time = max_t(unsigned, tfr->len * 8 * 2 / (tfr->speed_hz / 1000),

Ditto, "unsigned int" instead of "unsigned"?

> +   100);
> +   start = jiffies;
> timeout = wait_for_completion_timeout(>done,
> - msecs_to_jiffies(1000));
> + msecs_to_jiffies(tx_time));
> +   end = jiffies;
> if (!timeout) {
> +   dev_warn(>dev,
> +"%s: timeout transferring %u bytes@%iHz for 
> %i(%i)ms",
> +dev_name(>dev), tfr->len, tfr->speed_hz,
> +jiffies_to_msecs(end - start), tx_time);

Ditto, separate patch?

Also, should the changes for the drivers be in two separate patches also?

Thanks,

-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/


Re: [linux-sunxi] [PATCH 1/5] spi: sunxi: fix transfer timeout

2016-05-26 Thread Julian Calaby
Hi Michal,

On Fri, May 27, 2016 at 5:25 AM, Michal Suchanek  wrote:
> The trasfer timeout is fixed at 1000 ms. Reading a 4Mbyte flash over
> 1MHz SPI bus takes way longer than that. Calculate the timeout from the
> actual time the transfer is supposed to take and multiply by 2 for good
> measure.
>
> Signed-off-by: Michal Suchanek 
> ---
>
> v2:
> - fix build error
> - use unsigned instead of int in max_t
> - use tfr->speed_hz instead of sspi->max_speed_hz
> ---
>  drivers/spi/spi-sun4i.c | 11 ++-
>  drivers/spi/spi-sun6i.c | 11 ++-
>  2 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
> index 1ddd9e2..fe63bbd 100644
> --- a/drivers/spi/spi-sun4i.c
> +++ b/drivers/spi/spi-sun4i.c
> @@ -173,6 +173,7 @@ static int sun4i_spi_transfer_one(struct spi_master 
> *master,
>  {
> struct sun4i_spi *sspi = spi_master_get_devdata(master);
> unsigned int mclk_rate, div, timeout;
> +   unsigned int start, end, tx_time;
> unsigned int tx_len = 0;
> int ret = 0;
> u32 reg;
> @@ -279,9 +280,17 @@ static int sun4i_spi_transfer_one(struct spi_master 
> *master,
> reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
> sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
>
> +   tx_time = max_t(unsigned, tfr->len * 8 * 2 / (tfr->speed_hz / 1000),

You should probably use "unsigned int" instead of just "unsigned" here.

> +   100);
> +   start = jiffies;
> timeout = wait_for_completion_timeout(>done,
> - msecs_to_jiffies(1000));
> + msecs_to_jiffies(tx_time));
> +   end = jiffies;
> if (!timeout) {
> +   dev_warn(>dev,
> +"%s: timeout transferring %u bytes@%iHz for 
> %i(%i)ms",
> +dev_name(>dev), tfr->len, tfr->speed_hz,
> +jiffies_to_msecs(end - start), tx_time);

Should the debug changes be in a separate patch?

> ret = -ETIMEDOUT;
> goto out;
> }
> diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
> index 42e2c4b..8be5c5c 100644
> --- a/drivers/spi/spi-sun6i.c
> +++ b/drivers/spi/spi-sun6i.c
> @@ -162,6 +162,7 @@ static int sun6i_spi_transfer_one(struct spi_master 
> *master,
> unsigned int mclk_rate, div, timeout;
> unsigned int tx_len = 0;
> int ret = 0;
> +   unsigned int start, end, tx_time;
> u32 reg;
>
> /* We don't support transfer larger than the FIFO */
> @@ -269,9 +270,17 @@ static int sun6i_spi_transfer_one(struct spi_master 
> *master,
> reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
> sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
>
> +   tx_time = max_t(unsigned, tfr->len * 8 * 2 / (tfr->speed_hz / 1000),

Ditto, "unsigned int" instead of "unsigned"?

> +   100);
> +   start = jiffies;
> timeout = wait_for_completion_timeout(>done,
> - msecs_to_jiffies(1000));
> + msecs_to_jiffies(tx_time));
> +   end = jiffies;
> if (!timeout) {
> +   dev_warn(>dev,
> +"%s: timeout transferring %u bytes@%iHz for 
> %i(%i)ms",
> +dev_name(>dev), tfr->len, tfr->speed_hz,
> +jiffies_to_msecs(end - start), tx_time);

Ditto, separate patch?

Also, should the changes for the drivers be in two separate patches also?

Thanks,

-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/


Re: [GIT PULL] xfs: updates for 4.7-rc1

2016-05-26 Thread Linus Torvalds
On Thu, May 26, 2016 at 5:13 PM, Dave Chinner  wrote:
> On Thu, May 26, 2016 at 10:19:13AM -0700, Linus Torvalds wrote:
>>
>> i'm ok with the late branches, it's not like xfs has been a problem spot.
>
> Still, I'll try to avoid them because it reduces testing time.

Oh, 100% agreed. I'm just saying that you get a pass because you've
historically been very reliable.

>> Your pull request mentions the 'for-next' branch, but I think you
>> *meant* to send me the "xfs-for-linus-4.7-rc1" tag which points to the
>> same commit and has your summary in it.
>
> 
>
> Mea culpa. I ran this command after creating the tag:
>
> $ git request-pull v4.6 
> git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git 
> tags/xfs-for-linus-4.7-rc1 > ~/tmp/t.txt
>
> And I didn't check the output closely enough. I forgot to push the
> tag to the upstream repo before running request-pull.
>
> Git often makes it very easy to make mistakes whilst simultaneously
> making it hard to notice you've made a mistake. :(

Actually, that one got fixed in git quite some time ago.

You probably have a fairly old version of git, which had the "helpful"
logic to pick another remote location if it couldn't find the exact
one you had specified, but it could find another one that matched in
content

It was a misguided attempt at automatically just "doing the right
thing" and being easier to use, but yes, it means that it does
something subtly different from what you ask for if you haven't pushed
out the remote you talked about yet.

Bu that was all fixed about two years ago, and I think made v2.0.

Do you perhaps happen to run something like debian-stable or similar
that never upgrades major versions, so you're still at git-1.x?

Building your own git is trivial, and a "make install" will just
install in your ~/bin directory so you don't even have to uninstall
the system one (assuming you have your own ~/bin before /usr/bin in
the path, of course).

   Linus


Re: [GIT PULL] xfs: updates for 4.7-rc1

2016-05-26 Thread Linus Torvalds
On Thu, May 26, 2016 at 5:13 PM, Dave Chinner  wrote:
> On Thu, May 26, 2016 at 10:19:13AM -0700, Linus Torvalds wrote:
>>
>> i'm ok with the late branches, it's not like xfs has been a problem spot.
>
> Still, I'll try to avoid them because it reduces testing time.

Oh, 100% agreed. I'm just saying that you get a pass because you've
historically been very reliable.

>> Your pull request mentions the 'for-next' branch, but I think you
>> *meant* to send me the "xfs-for-linus-4.7-rc1" tag which points to the
>> same commit and has your summary in it.
>
> 
>
> Mea culpa. I ran this command after creating the tag:
>
> $ git request-pull v4.6 
> git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git 
> tags/xfs-for-linus-4.7-rc1 > ~/tmp/t.txt
>
> And I didn't check the output closely enough. I forgot to push the
> tag to the upstream repo before running request-pull.
>
> Git often makes it very easy to make mistakes whilst simultaneously
> making it hard to notice you've made a mistake. :(

Actually, that one got fixed in git quite some time ago.

You probably have a fairly old version of git, which had the "helpful"
logic to pick another remote location if it couldn't find the exact
one you had specified, but it could find another one that matched in
content

It was a misguided attempt at automatically just "doing the right
thing" and being easier to use, but yes, it means that it does
something subtly different from what you ask for if you haven't pushed
out the remote you talked about yet.

Bu that was all fixed about two years ago, and I think made v2.0.

Do you perhaps happen to run something like debian-stable or similar
that never upgrades major versions, so you're still at git-1.x?

Building your own git is trivial, and a "make install" will just
install in your ~/bin directory so you don't even have to uninstall
the system one (assuming you have your own ~/bin before /usr/bin in
the path, of course).

   Linus


[PATCH] serial: 8250_fintek: fix the mismatched IRQ mode

2016-05-26 Thread Ji-Ze Hong (Peter Hong)
Some BIOS only use _OSI("Linux") to distinguish between Linux & Windows.
Apply Level/Low to UART trigger mode if Windows, Edge/High mode otherwise.
But since 2.6.23 the mainline kernel no longer returns true for
_OSI(“Linux”).

The default IRQ0~15 trigger mode in Linux is Edge/High mode without
ACPI MADT override. It mismatches IRQ mode and makes UART malfunctional on
such motherboard.

This patch will check the current IRQ mode and apply correct mode to UART.

The following link is F81216AD spec PDF:
http://html.alldatasheet.com/html-pdf/257956/FINTEK/F81216AD/5569/
25/F81216AD.html

LDN0~3
70h: IRQ channel & Mode register
Bit 6~5 :
00  : Active low level mode
01  : Active high edge mode
Bit 4   : Sharing Flag (0: not share/1: share)
Bit 3~0 : IRQ channel

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/tty/serial/8250/8250_fintek.c | 36 ++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_fintek.c 
b/drivers/tty/serial/8250/8250_fintek.c
index 870981d..737b4b3 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include  "8250.h"
 
 #define ADDR_PORT 0
@@ -30,6 +31,12 @@
 #define IO_ADDR2 0x60
 #define LDN 0x7
 
+#define IRQ_MODE   0x70
+#define IRQ_SHARE  BIT(4)
+#define IRQ_MODE_MASK  (BIT(6) | BIT(5))
+#define IRQ_LEVEL_LOW  0
+#define IRQ_EDGE_HIGH  BIT(5)
+
 #define RS485  0xF0
 #define RTS_INVERT BIT(5)
 #define RS485_URA BIT(4)
@@ -176,10 +183,37 @@ static int find_base_port(struct fintek_8250 *pdata, u16 
io_address)
return -ENODEV;
 }
 
+static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode)
+{
+   int status;
+   u8 tmp;
+
+   status = fintek_8250_enter_key(pdata->base_port, pdata->key);
+   if (status)
+   return status;
+
+   outb(LDN, pdata->base_port + ADDR_PORT);
+   outb(pdata->index, pdata->base_port + DATA_PORT);
+
+   outb(IRQ_MODE, pdata->base_port + ADDR_PORT);
+   tmp = inb(pdata->base_port + DATA_PORT);
+
+   tmp &= ~IRQ_MODE_MASK;
+   tmp |= IRQ_SHARE;
+   if (!level_mode)
+   tmp |= IRQ_EDGE_HIGH;
+
+   outb(tmp, pdata->base_port + DATA_PORT);
+   fintek_8250_exit_key(pdata->base_port);
+   return 0;
+}
+
 int fintek_8250_probe(struct uart_8250_port *uart)
 {
struct fintek_8250 *pdata;
struct fintek_8250 probe_data;
+   struct irq_data *irq_data = irq_get_irq_data(uart->port.irq);
+   bool level_mode = irqd_is_level_type(irq_data);
 
if (find_base_port(_data, uart->port.iobase))
return -ENODEV;
@@ -192,5 +226,5 @@ int fintek_8250_probe(struct uart_8250_port *uart)
uart->port.rs485_config = fintek_8250_rs485_config;
uart->port.private_data = pdata;
 
-   return 0;
+   return fintek_8250_set_irq_mode(pdata, level_mode);
 }
-- 
1.9.1



[PATCH] serial: 8250_fintek: fix the mismatched IRQ mode

2016-05-26 Thread Ji-Ze Hong (Peter Hong)
Some BIOS only use _OSI("Linux") to distinguish between Linux & Windows.
Apply Level/Low to UART trigger mode if Windows, Edge/High mode otherwise.
But since 2.6.23 the mainline kernel no longer returns true for
_OSI(“Linux”).

The default IRQ0~15 trigger mode in Linux is Edge/High mode without
ACPI MADT override. It mismatches IRQ mode and makes UART malfunctional on
such motherboard.

This patch will check the current IRQ mode and apply correct mode to UART.

The following link is F81216AD spec PDF:
http://html.alldatasheet.com/html-pdf/257956/FINTEK/F81216AD/5569/
25/F81216AD.html

LDN0~3
70h: IRQ channel & Mode register
Bit 6~5 :
00  : Active low level mode
01  : Active high edge mode
Bit 4   : Sharing Flag (0: not share/1: share)
Bit 3~0 : IRQ channel

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/tty/serial/8250/8250_fintek.c | 36 ++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_fintek.c 
b/drivers/tty/serial/8250/8250_fintek.c
index 870981d..737b4b3 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include  "8250.h"
 
 #define ADDR_PORT 0
@@ -30,6 +31,12 @@
 #define IO_ADDR2 0x60
 #define LDN 0x7
 
+#define IRQ_MODE   0x70
+#define IRQ_SHARE  BIT(4)
+#define IRQ_MODE_MASK  (BIT(6) | BIT(5))
+#define IRQ_LEVEL_LOW  0
+#define IRQ_EDGE_HIGH  BIT(5)
+
 #define RS485  0xF0
 #define RTS_INVERT BIT(5)
 #define RS485_URA BIT(4)
@@ -176,10 +183,37 @@ static int find_base_port(struct fintek_8250 *pdata, u16 
io_address)
return -ENODEV;
 }
 
+static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode)
+{
+   int status;
+   u8 tmp;
+
+   status = fintek_8250_enter_key(pdata->base_port, pdata->key);
+   if (status)
+   return status;
+
+   outb(LDN, pdata->base_port + ADDR_PORT);
+   outb(pdata->index, pdata->base_port + DATA_PORT);
+
+   outb(IRQ_MODE, pdata->base_port + ADDR_PORT);
+   tmp = inb(pdata->base_port + DATA_PORT);
+
+   tmp &= ~IRQ_MODE_MASK;
+   tmp |= IRQ_SHARE;
+   if (!level_mode)
+   tmp |= IRQ_EDGE_HIGH;
+
+   outb(tmp, pdata->base_port + DATA_PORT);
+   fintek_8250_exit_key(pdata->base_port);
+   return 0;
+}
+
 int fintek_8250_probe(struct uart_8250_port *uart)
 {
struct fintek_8250 *pdata;
struct fintek_8250 probe_data;
+   struct irq_data *irq_data = irq_get_irq_data(uart->port.irq);
+   bool level_mode = irqd_is_level_type(irq_data);
 
if (find_base_port(_data, uart->port.iobase))
return -ENODEV;
@@ -192,5 +226,5 @@ int fintek_8250_probe(struct uart_8250_port *uart)
uart->port.rs485_config = fintek_8250_rs485_config;
uart->port.private_data = pdata;
 
-   return 0;
+   return fintek_8250_set_irq_mode(pdata, level_mode);
 }
-- 
1.9.1



Re: [PATCH v3 5/6] mm/cma: remove MIGRATE_CMA

2016-05-26 Thread Chen Feng
Hi Joonsoo,

On 2016/5/26 14:22, js1...@gmail.com wrote:
> From: Joonsoo Kim 
> 
> Now, all reserved pages for CMA region are belong to the ZONE_CMA
> and there is no other type of pages. Therefore, we don't need to
> use MIGRATE_CMA to distinguish and handle differently for CMA pages
> and ordinary pages. Remove MIGRATE_CMA.
> 
> Unfortunately, this patch make free CMA counter incorrect because
> we count it when pages are on the MIGRATE_CMA. It will be fixed
> by next patch. I can squash next patch here but it makes changes
> complicated and hard to review so I separate that.
> 
> Signed-off-by: Joonsoo Kim 
> ---
>  include/linux/gfp.h|  3 +-
>  include/linux/mmzone.h | 22 -
>  include/linux/vmstat.h |  8 -
>  mm/cma.c   |  2 +-
>  mm/compaction.c| 10 ++
>  mm/hugetlb.c   |  2 +-
>  mm/page_alloc.c| 87 
> +-
>  mm/page_isolation.c|  5 ++-
>  mm/vmstat.c|  5 +--
>  9 files changed, 31 insertions(+), 113 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 4d6c008..1a3b869 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -559,8 +559,7 @@ static inline bool pm_suspended_storage(void)
>  
>  #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || 
> defined(CONFIG_CMA)
>  /* The below functions must be run on a range from a single zone. */
> -extern int alloc_contig_range(unsigned long start, unsigned long end,
> -   unsigned migratetype);
> +extern int alloc_contig_range(unsigned long start, unsigned long end);
>  extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
>  #endif
>  
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 54c92a6..236d0bd 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -41,22 +41,6 @@ enum {
>   MIGRATE_RECLAIMABLE,
>   MIGRATE_PCPTYPES,   /* the number of types on the pcp lists */
>   MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
> -#ifdef CONFIG_CMA
> - /*
> -  * MIGRATE_CMA migration type is designed to mimic the way
> -  * ZONE_MOVABLE works.  Only movable pages can be allocated
> -  * from MIGRATE_CMA pageblocks and page allocator never
> -  * implicitly change migration type of MIGRATE_CMA pageblock.
> -  *
> -  * The way to use it is to change migratetype of a range of
> -  * pageblocks to MIGRATE_CMA which can be done by
> -  * __free_pageblock_cma() function.  What is important though
> -  * is that a range of pageblocks must be aligned to
> -  * MAX_ORDER_NR_PAGES should biggest page be bigger then
> -  * a single pageblock.
> -  */
> - MIGRATE_CMA,
> -#endif
>  #ifdef CONFIG_MEMORY_ISOLATION
>   MIGRATE_ISOLATE,/* can't allocate from here */
>  #endif
> @@ -66,12 +50,6 @@ enum {
>  /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
>  extern char * const migratetype_names[MIGRATE_TYPES];
>  
> -#ifdef CONFIG_CMA
> -#  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
> -#else
> -#  define is_migrate_cma(migratetype) false
> -#endif
> -
>  #define for_each_migratetype_order(order, type) \
>   for (order = 0; order < MAX_ORDER; order++) \
>   for (type = 0; type < MIGRATE_TYPES; type++)
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 0aa613d..e0eb3e5 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -264,14 +264,6 @@ static inline void drain_zonestat(struct zone *zone,
>   struct per_cpu_pageset *pset) { }
>  #endif   /* CONFIG_SMP */
>  
> -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
> -  int migratetype)
> -{
> - __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> - if (is_migrate_cma(migratetype))
> - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
> -}
> -
>  extern const char * const vmstat_text[];
>  
>  #endif /* _LINUX_VMSTAT_H */
> diff --git a/mm/cma.c b/mm/cma.c
> index 8684f50..bd436e4 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -444,7 +444,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, 
> unsigned int align)
>  
>   pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
>   mutex_lock(_mutex);
> - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
> + ret = alloc_contig_range(pfn, pfn + count);
>   mutex_unlock(_mutex);
>   if (ret == 0) {
>   page = pfn_to_page(pfn);
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 1427366..acb1d1a 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -76,7 +76,7 @@ static void map_pages(struct list_head *list)
>  
>  static inline bool 

Re: [PATCH v3 5/6] mm/cma: remove MIGRATE_CMA

2016-05-26 Thread Chen Feng
Hi Joonsoo,

On 2016/5/26 14:22, js1...@gmail.com wrote:
> From: Joonsoo Kim 
> 
> Now, all reserved pages for CMA region are belong to the ZONE_CMA
> and there is no other type of pages. Therefore, we don't need to
> use MIGRATE_CMA to distinguish and handle differently for CMA pages
> and ordinary pages. Remove MIGRATE_CMA.
> 
> Unfortunately, this patch make free CMA counter incorrect because
> we count it when pages are on the MIGRATE_CMA. It will be fixed
> by next patch. I can squash next patch here but it makes changes
> complicated and hard to review so I separate that.
> 
> Signed-off-by: Joonsoo Kim 
> ---
>  include/linux/gfp.h|  3 +-
>  include/linux/mmzone.h | 22 -
>  include/linux/vmstat.h |  8 -
>  mm/cma.c   |  2 +-
>  mm/compaction.c| 10 ++
>  mm/hugetlb.c   |  2 +-
>  mm/page_alloc.c| 87 
> +-
>  mm/page_isolation.c|  5 ++-
>  mm/vmstat.c|  5 +--
>  9 files changed, 31 insertions(+), 113 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 4d6c008..1a3b869 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -559,8 +559,7 @@ static inline bool pm_suspended_storage(void)
>  
>  #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || 
> defined(CONFIG_CMA)
>  /* The below functions must be run on a range from a single zone. */
> -extern int alloc_contig_range(unsigned long start, unsigned long end,
> -   unsigned migratetype);
> +extern int alloc_contig_range(unsigned long start, unsigned long end);
>  extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
>  #endif
>  
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 54c92a6..236d0bd 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -41,22 +41,6 @@ enum {
>   MIGRATE_RECLAIMABLE,
>   MIGRATE_PCPTYPES,   /* the number of types on the pcp lists */
>   MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
> -#ifdef CONFIG_CMA
> - /*
> -  * MIGRATE_CMA migration type is designed to mimic the way
> -  * ZONE_MOVABLE works.  Only movable pages can be allocated
> -  * from MIGRATE_CMA pageblocks and page allocator never
> -  * implicitly change migration type of MIGRATE_CMA pageblock.
> -  *
> -  * The way to use it is to change migratetype of a range of
> -  * pageblocks to MIGRATE_CMA which can be done by
> -  * __free_pageblock_cma() function.  What is important though
> -  * is that a range of pageblocks must be aligned to
> -  * MAX_ORDER_NR_PAGES should biggest page be bigger then
> -  * a single pageblock.
> -  */
> - MIGRATE_CMA,
> -#endif
>  #ifdef CONFIG_MEMORY_ISOLATION
>   MIGRATE_ISOLATE,/* can't allocate from here */
>  #endif
> @@ -66,12 +50,6 @@ enum {
>  /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
>  extern char * const migratetype_names[MIGRATE_TYPES];
>  
> -#ifdef CONFIG_CMA
> -#  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
> -#else
> -#  define is_migrate_cma(migratetype) false
> -#endif
> -
>  #define for_each_migratetype_order(order, type) \
>   for (order = 0; order < MAX_ORDER; order++) \
>   for (type = 0; type < MIGRATE_TYPES; type++)
> diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
> index 0aa613d..e0eb3e5 100644
> --- a/include/linux/vmstat.h
> +++ b/include/linux/vmstat.h
> @@ -264,14 +264,6 @@ static inline void drain_zonestat(struct zone *zone,
>   struct per_cpu_pageset *pset) { }
>  #endif   /* CONFIG_SMP */
>  
> -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
> -  int migratetype)
> -{
> - __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
> - if (is_migrate_cma(migratetype))
> - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
> -}
> -
>  extern const char * const vmstat_text[];
>  
>  #endif /* _LINUX_VMSTAT_H */
> diff --git a/mm/cma.c b/mm/cma.c
> index 8684f50..bd436e4 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -444,7 +444,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, 
> unsigned int align)
>  
>   pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
>   mutex_lock(_mutex);
> - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
> + ret = alloc_contig_range(pfn, pfn + count);
>   mutex_unlock(_mutex);
>   if (ret == 0) {
>   page = pfn_to_page(pfn);
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 1427366..acb1d1a 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -76,7 +76,7 @@ static void map_pages(struct list_head *list)
>  
>  static inline bool migrate_async_suitable(int migratetype)
>  {
> - return 

[PATCH 1/2 v2] staging: dgnc: remove redundant NULL check for brd

2016-05-26 Thread Daeseok Youn
the "brd" value cannot be NULL in dgnc_finalize_board_init().
Because "brd" as a parameter of this function was already
checked for NULL.

the dgnc_finalize_board_init() as a static function was called
only from dgnc_found_board() function and brd->magic value
was assigned once in dgnc_found_board(). So it doesn't need to
check for DGNC_BOARD_MAGIC value.

Signed-off-by: Daeseok Youn 
---
V2: Adds more comments to Change Log.

 drivers/staging/dgnc/dgnc_driver.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.c 
b/drivers/staging/dgnc/dgnc_driver.c
index af2e835..22257d2 100644
--- a/drivers/staging/dgnc/dgnc_driver.c
+++ b/drivers/staging/dgnc/dgnc_driver.c
@@ -579,9 +579,6 @@ static int dgnc_finalize_board_init(struct dgnc_board *brd)
 {
int rc = 0;
 
-   if (!brd || brd->magic != DGNC_BOARD_MAGIC)
-   return -ENODEV;
-
if (brd->irq) {
rc = request_irq(brd->irq, brd->bd_ops->intr,
 IRQF_SHARED, "DGNC", brd);
-- 
1.9.1



[PATCH 1/2 v2] staging: dgnc: remove redundant NULL check for brd

2016-05-26 Thread Daeseok Youn
the "brd" value cannot be NULL in dgnc_finalize_board_init().
Because "brd" as a parameter of this function was already
checked for NULL.

the dgnc_finalize_board_init() as a static function was called
only from dgnc_found_board() function and brd->magic value
was assigned once in dgnc_found_board(). So it doesn't need to
check for DGNC_BOARD_MAGIC value.

Signed-off-by: Daeseok Youn 
---
V2: Adds more comments to Change Log.

 drivers/staging/dgnc/dgnc_driver.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.c 
b/drivers/staging/dgnc/dgnc_driver.c
index af2e835..22257d2 100644
--- a/drivers/staging/dgnc/dgnc_driver.c
+++ b/drivers/staging/dgnc/dgnc_driver.c
@@ -579,9 +579,6 @@ static int dgnc_finalize_board_init(struct dgnc_board *brd)
 {
int rc = 0;
 
-   if (!brd || brd->magic != DGNC_BOARD_MAGIC)
-   return -ENODEV;
-
if (brd->irq) {
rc = request_irq(brd->irq, brd->bd_ops->intr,
 IRQF_SHARED, "DGNC", brd);
-- 
1.9.1



[PATCH 2/2 V2] staging: dgnc: remove redundant null check in

2016-05-26 Thread Daeseok Youn
the "brd" was already checked for NULL before calling dgnc_do_remap().

the dgnc_do_remap() function was called only
from the dgnc_found_board() and the DGNC_BOARD_MAGIC value
was assigned to "brd->magic" in dgcn_found_board(). So it doesn't
need to check about magic value.

Signed-off-by: Daeseok Youn 
---
V2: Adds more comments to Change Log.

 drivers/staging/dgnc/dgnc_driver.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.c 
b/drivers/staging/dgnc/dgnc_driver.c
index 22257d2..1882ef5 100644
--- a/drivers/staging/dgnc/dgnc_driver.c
+++ b/drivers/staging/dgnc/dgnc_driver.c
@@ -599,9 +599,6 @@ static int dgnc_finalize_board_init(struct dgnc_board *brd)
  */
 static void dgnc_do_remap(struct dgnc_board *brd)
 {
-   if (!brd || brd->magic != DGNC_BOARD_MAGIC)
-   return;
-
brd->re_map_membase = ioremap(brd->membase, 0x1000);
 }
 
-- 
1.9.1



[PATCH 2/2 V2] staging: dgnc: remove redundant null check in

2016-05-26 Thread Daeseok Youn
the "brd" was already checked for NULL before calling dgnc_do_remap().

the dgnc_do_remap() function was called only
from the dgnc_found_board() and the DGNC_BOARD_MAGIC value
was assigned to "brd->magic" in dgcn_found_board(). So it doesn't
need to check about magic value.

Signed-off-by: Daeseok Youn 
---
V2: Adds more comments to Change Log.

 drivers/staging/dgnc/dgnc_driver.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_driver.c 
b/drivers/staging/dgnc/dgnc_driver.c
index 22257d2..1882ef5 100644
--- a/drivers/staging/dgnc/dgnc_driver.c
+++ b/drivers/staging/dgnc/dgnc_driver.c
@@ -599,9 +599,6 @@ static int dgnc_finalize_board_init(struct dgnc_board *brd)
  */
 static void dgnc_do_remap(struct dgnc_board *brd)
 {
-   if (!brd || brd->magic != DGNC_BOARD_MAGIC)
-   return;
-
brd->re_map_membase = ioremap(brd->membase, 0x1000);
 }
 
-- 
1.9.1



[PATCH] usb: core: fix a double free in the usb driver

2016-05-26 Thread Chung-Geol Kim
There is a double free problem in the usb driver.
This is caused by delayed deregister for scsi device.
<*> at Insert USB Storage
-  USB bus #1 register
 usb_create_hcd (primary-kref==1)
 * primary-bandwidth_mutex(alloc))
usb_get_hcd(primary-kref==2)
-  USB bus #2 register
usb_create_hcd (second-kref==1)
 * second-bandwidth_mutex==primary-bandwidth_mutex
 usb_get_hcd(second-kref==2)
-  scsi_device_get
usb_get_hcd(second-kref==3)

<*> at remove USB Storage (Normal)
-  scsi_device_put
usb_put_hcd(second-kref==2)
-  USB bus #2 deregister
 usb_release_dev(second-kref==1)
usb_release_dev(second-kref==0)  -> hcd_release()
-  USB bus #1 deregister
 usb_release_dev(primary-kref==1)
 usb_release_dev(primary-kref==0) -> hcd_release()
*(primary-bandwidth_mutex free)

at remove USB Storage 
-  USB bus #2 deregister
 usb_release_dev(second-kref==2)
usb_release_dev(second-kref==1)
-  USB bus #1 deregister
 usb_release_dev(primary-kref==1)
 usb_release_dev(primary-kref==0) -> hcd_release()
 *(primary-bandwidth_mutex free)
-  scsi_device_put
 usb_put_hcd(second-kref==0)  -> hcd_release(*)
 * at this, second->primary==0 therefore try to
free the primary-bandwidth_mutex.(already freed)

To fix this problem kfree(hcd->bandwidth_mutex);
should be executed at only (hcd->primary_hcd==hcd).

Signed-off-by: Chunggeol Kim 
---
drivers/usb/core/hcd.c |2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837a..60077f3 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2608,7 +2608,7 @@ static void hcd_release(struct kref *kref)
struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);

mutex_lock(_port_peer_mutex);
- if (usb_hcd_is_primary_hcd(hcd)) {
+ if (hcd == hcd->primary_hcd) {
kfree(hcd->address0_mutex);
kfree(hcd->bandwidth_mutex);
}
-- 
1.7.9.5

[PATCH] usb: core: fix a double free in the usb driver

2016-05-26 Thread Chung-Geol Kim
There is a double free problem in the usb driver.
This is caused by delayed deregister for scsi device.
<*> at Insert USB Storage
-  USB bus #1 register
 usb_create_hcd (primary-kref==1)
 * primary-bandwidth_mutex(alloc))
usb_get_hcd(primary-kref==2)
-  USB bus #2 register
usb_create_hcd (second-kref==1)
 * second-bandwidth_mutex==primary-bandwidth_mutex
 usb_get_hcd(second-kref==2)
-  scsi_device_get
usb_get_hcd(second-kref==3)

<*> at remove USB Storage (Normal)
-  scsi_device_put
usb_put_hcd(second-kref==2)
-  USB bus #2 deregister
 usb_release_dev(second-kref==1)
usb_release_dev(second-kref==0)  -> hcd_release()
-  USB bus #1 deregister
 usb_release_dev(primary-kref==1)
 usb_release_dev(primary-kref==0) -> hcd_release()
*(primary-bandwidth_mutex free)

at remove USB Storage 
-  USB bus #2 deregister
 usb_release_dev(second-kref==2)
usb_release_dev(second-kref==1)
-  USB bus #1 deregister
 usb_release_dev(primary-kref==1)
 usb_release_dev(primary-kref==0) -> hcd_release()
 *(primary-bandwidth_mutex free)
-  scsi_device_put
 usb_put_hcd(second-kref==0)  -> hcd_release(*)
 * at this, second->primary==0 therefore try to
free the primary-bandwidth_mutex.(already freed)

To fix this problem kfree(hcd->bandwidth_mutex);
should be executed at only (hcd->primary_hcd==hcd).

Signed-off-by: Chunggeol Kim 
---
drivers/usb/core/hcd.c |2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837a..60077f3 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2608,7 +2608,7 @@ static void hcd_release(struct kref *kref)
struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);

mutex_lock(_port_peer_mutex);
- if (usb_hcd_is_primary_hcd(hcd)) {
+ if (hcd == hcd->primary_hcd) {
kfree(hcd->address0_mutex);
kfree(hcd->bandwidth_mutex);
}
-- 
1.7.9.5

Re: [PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Joe Perches
dOn Thu, 2016-05-26 at 17:08 -0700, Long Li wrote:
> The block sector size should be in unit of 512 bytes, not in bytes.

Thanks.  The patch subject should use something like:

[PATCH] sd: Use the correct size to set block max sectors

to show what subsystem is being modified.

> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
[]
> @@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
>   if (sdkp->opt_xfer_blocks &&
>   sdkp->opt_xfer_blocks <= dev_max &&
>   sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
> - sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
> - rw_max = q->limits.io_opt =
> + sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
> + q->limits.io_opt =
>   sdkp->opt_xfer_blocks * sdp->sector_size;
> + rw_max = (q->limits.io_opt >> 9);
> + }
>   else
>   rw_max = BLK_DEF_MAX_SECTORS;

And style trivia:  it'd be more kernel style consistent as:

if (...
    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
q->limits.io_opt = sdkp->opt_xfer_blocks * sdp->sector_size;
rw_max = q->limits.io_opt >> 9;
} else {
rw_max = BLK_DEF_MAX_SECTORS;
}

ie: no parentheses necessary around the shifted value and
    braces around both arms.



Re: [PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Joe Perches
dOn Thu, 2016-05-26 at 17:08 -0700, Long Li wrote:
> The block sector size should be in unit of 512 bytes, not in bytes.

Thanks.  The patch subject should use something like:

[PATCH] sd: Use the correct size to set block max sectors

to show what subsystem is being modified.

> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
[]
> @@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
>   if (sdkp->opt_xfer_blocks &&
>   sdkp->opt_xfer_blocks <= dev_max &&
>   sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
> - sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
> - rw_max = q->limits.io_opt =
> + sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
> + q->limits.io_opt =
>   sdkp->opt_xfer_blocks * sdp->sector_size;
> + rw_max = (q->limits.io_opt >> 9);
> + }
>   else
>   rw_max = BLK_DEF_MAX_SECTORS;

And style trivia:  it'd be more kernel style consistent as:

if (...
    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
q->limits.io_opt = sdkp->opt_xfer_blocks * sdp->sector_size;
rw_max = q->limits.io_opt >> 9;
} else {
rw_max = BLK_DEF_MAX_SECTORS;
}

ie: no parentheses necessary around the shifted value and
    braces around both arms.



Re: [PATCH 1/2] staging: dgnc: remove redundant NULL check for brd

2016-05-26 Thread DaeSeok Youn
2016-05-26 21:29 GMT+09:00 Luis de Bethencourt :
> On 26/05/16 05:56, DaeSeok Youn wrote:
>> 2016-05-26 6:48 GMT+09:00 Luis de Bethencourt :
>>> On 20/05/16 10:51, Daeseok Youn wrote:
 the "brd" value cannot be NULL in dgnc_finalize_board_init().
 Because "brd" as a parameter of this function was already
 checked for NULL.

 Signed-off-by: Daeseok Youn 
 ---
  drivers/staging/dgnc/dgnc_driver.c | 3 ---
  1 file changed, 3 deletions(-)

 diff --git a/drivers/staging/dgnc/dgnc_driver.c 
 b/drivers/staging/dgnc/dgnc_driver.c
 index af2e835..22257d2 100644
 --- a/drivers/staging/dgnc/dgnc_driver.c
 +++ b/drivers/staging/dgnc/dgnc_driver.c
 @@ -579,9 +579,6 @@ static int dgnc_finalize_board_init(struct dgnc_board 
 *brd)
  {
   int rc = 0;

 - if (!brd || brd->magic != DGNC_BOARD_MAGIC)
 - return -ENODEV;
 -
   if (brd->irq) {
   rc = request_irq(brd->irq, brd->bd_ops->intr,
IRQF_SHARED, "DGNC", brd);

>>>
>>> This is partially correct, the check for brd being NULL is in line 371.
>> Hi Luis,
>>
>> Yes, right. but also brd was assigned the value DGNC_BOARD_MAGIC in line 384.
>> brd->magic = DGNC_BOARD_MAGIC;
>> and also dgnc_finalize_board_init() as a static function is only
>> called in dgnc_found_board(), right?
>>
>>>
>>> But there is a second check for brd->magic != DGNC_BOARD_MAGIC. Do you want
>>> to keep that one?
>> So.. I think it doesn't need to check about DGNC_BOARD_MAGIC.
>
> This is good. I was asking just to make sure it was your intention.
>
> Please add the reason to drop that second check in the commit message as 
> well. So people
> reading the git log can understand both parts. For both patches.
OK. I will update the change log and resend it.

Thanks.

regards,
Daeseok.
>
> Thanks for the fixes :)
>
> Luis
>
>>
>>>
>>> Also, how did you find this patch. It is useful to mention this in the 
>>> commit
>>> message if it was through some static analysis tool. For people using these 
>>> tools
>>> in the future.
>> There are some static analysis tool for checking linux kernel code.
>> But I didn't use
>> those tools for this patch. sometimes, I usually run "smatch" tool for
>> checking linux kernel
>> code.
>>
>> thanks.
>> regards,
>> Daeseok.
>>
>>>
>>> Thanks for the patch :)
>>> Luis
>


Re: [PATCH 1/2] staging: dgnc: remove redundant NULL check for brd

2016-05-26 Thread DaeSeok Youn
2016-05-26 21:29 GMT+09:00 Luis de Bethencourt :
> On 26/05/16 05:56, DaeSeok Youn wrote:
>> 2016-05-26 6:48 GMT+09:00 Luis de Bethencourt :
>>> On 20/05/16 10:51, Daeseok Youn wrote:
 the "brd" value cannot be NULL in dgnc_finalize_board_init().
 Because "brd" as a parameter of this function was already
 checked for NULL.

 Signed-off-by: Daeseok Youn 
 ---
  drivers/staging/dgnc/dgnc_driver.c | 3 ---
  1 file changed, 3 deletions(-)

 diff --git a/drivers/staging/dgnc/dgnc_driver.c 
 b/drivers/staging/dgnc/dgnc_driver.c
 index af2e835..22257d2 100644
 --- a/drivers/staging/dgnc/dgnc_driver.c
 +++ b/drivers/staging/dgnc/dgnc_driver.c
 @@ -579,9 +579,6 @@ static int dgnc_finalize_board_init(struct dgnc_board 
 *brd)
  {
   int rc = 0;

 - if (!brd || brd->magic != DGNC_BOARD_MAGIC)
 - return -ENODEV;
 -
   if (brd->irq) {
   rc = request_irq(brd->irq, brd->bd_ops->intr,
IRQF_SHARED, "DGNC", brd);

>>>
>>> This is partially correct, the check for brd being NULL is in line 371.
>> Hi Luis,
>>
>> Yes, right. but also brd was assigned the value DGNC_BOARD_MAGIC in line 384.
>> brd->magic = DGNC_BOARD_MAGIC;
>> and also dgnc_finalize_board_init() as a static function is only
>> called in dgnc_found_board(), right?
>>
>>>
>>> But there is a second check for brd->magic != DGNC_BOARD_MAGIC. Do you want
>>> to keep that one?
>> So.. I think it doesn't need to check about DGNC_BOARD_MAGIC.
>
> This is good. I was asking just to make sure it was your intention.
>
> Please add the reason to drop that second check in the commit message as 
> well. So people
> reading the git log can understand both parts. For both patches.
OK. I will update the change log and resend it.

Thanks.

regards,
Daeseok.
>
> Thanks for the fixes :)
>
> Luis
>
>>
>>>
>>> Also, how did you find this patch. It is useful to mention this in the 
>>> commit
>>> message if it was through some static analysis tool. For people using these 
>>> tools
>>> in the future.
>> There are some static analysis tool for checking linux kernel code.
>> But I didn't use
>> those tools for this patch. sometimes, I usually run "smatch" tool for
>> checking linux kernel
>> code.
>>
>> thanks.
>> regards,
>> Daeseok.
>>
>>>
>>> Thanks for the patch :)
>>> Luis
>


RE: [Intel-wired-lan] [RFC PATCH net] e1000e: keep vlan interfaces functional after rxvlan off

2016-05-26 Thread Brown, Aaron F
> From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On
> Behalf Of Jeff Kirsher
> Sent: Wednesday, May 18, 2016 2:40 PM
> To: Jarod Wilson ; linux-kernel@vger.kernel.org;
> Avargil, Raanan 
> Cc: net...@vger.kernel.org; intel-wired-...@lists.osuosl.org
> Subject: Re: [Intel-wired-lan] [RFC PATCH net] e1000e: keep vlan interfaces
> functional after rxvlan off
> 
> On Tue, 2016-05-17 at 15:03 -0400, Jarod Wilson wrote:
> > I've got a bug report about an e1000e interface, where a vlan interface
> > is
> > set up on top of it:
> >
> > $ ip link add link ens1f0 name ens1f0.99 type vlan id 99
> > $ ip link set ens1f0 up
> > $ ip link set ens1f0.99 up
> > $ ip addr add 192.168.99.92 dev ens1f0.99
> >
> > At this point, I can ping another host on vlan 99, ip 192.168.99.91.
> > However, if I do the following:
> >
> > $ ethtool -K ens1f0 rxvlan off
> >
> > Then no traffic passes on ens1f0.99. It comes back if I toggle rxvlan on
> > again. I'm not sure if this is actually intended behavior, or if there's
> > a
> > lack of software vlan stripping fallback, or what, but things continue to
> > work if I simply don't call e1000e_vlan_strip_disable() if there are
> > active vlans (plagiarizing a function from the e1000 driver here) on the
> > interface.
> >
> > Also slipped a related-ish fix to the kerneldoc text for
> > e1000e_vlan_strip_disable here...
> >
> > CC: Jeff Kirsher 
> > CC: intel-wired-...@lists.osuosl.org
> > CC: net...@vger.kernel.org
> > Signed-off-by: Jarod Wilson 
> > ---
> >  drivers/net/ethernet/intel/e1000e/netdev.c | 15 +--
> >  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> Raanan, please review this patch.  Even though it is an RFC I will be
> adding it to my queue for testing.
> http://patchwork.ozlabs.org/patch/623238/

Yup, without this patch disabling rxvlan offload does indeed break vlan 
connectivity and with the patch I can disable and re-enable rxvlan offloads as 
much as I care to.  It also makes it through my regression tests without 
problems.

Tested-by: Aaron Brown 

This is from functional - does it work - testing perspective so review is 
probably still in order.


RE: [Intel-wired-lan] [RFC PATCH net] e1000e: keep vlan interfaces functional after rxvlan off

2016-05-26 Thread Brown, Aaron F
> From: Intel-wired-lan [mailto:intel-wired-lan-boun...@lists.osuosl.org] On
> Behalf Of Jeff Kirsher
> Sent: Wednesday, May 18, 2016 2:40 PM
> To: Jarod Wilson ; linux-kernel@vger.kernel.org;
> Avargil, Raanan 
> Cc: net...@vger.kernel.org; intel-wired-...@lists.osuosl.org
> Subject: Re: [Intel-wired-lan] [RFC PATCH net] e1000e: keep vlan interfaces
> functional after rxvlan off
> 
> On Tue, 2016-05-17 at 15:03 -0400, Jarod Wilson wrote:
> > I've got a bug report about an e1000e interface, where a vlan interface
> > is
> > set up on top of it:
> >
> > $ ip link add link ens1f0 name ens1f0.99 type vlan id 99
> > $ ip link set ens1f0 up
> > $ ip link set ens1f0.99 up
> > $ ip addr add 192.168.99.92 dev ens1f0.99
> >
> > At this point, I can ping another host on vlan 99, ip 192.168.99.91.
> > However, if I do the following:
> >
> > $ ethtool -K ens1f0 rxvlan off
> >
> > Then no traffic passes on ens1f0.99. It comes back if I toggle rxvlan on
> > again. I'm not sure if this is actually intended behavior, or if there's
> > a
> > lack of software vlan stripping fallback, or what, but things continue to
> > work if I simply don't call e1000e_vlan_strip_disable() if there are
> > active vlans (plagiarizing a function from the e1000 driver here) on the
> > interface.
> >
> > Also slipped a related-ish fix to the kerneldoc text for
> > e1000e_vlan_strip_disable here...
> >
> > CC: Jeff Kirsher 
> > CC: intel-wired-...@lists.osuosl.org
> > CC: net...@vger.kernel.org
> > Signed-off-by: Jarod Wilson 
> > ---
> >  drivers/net/ethernet/intel/e1000e/netdev.c | 15 +--
> >  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> Raanan, please review this patch.  Even though it is an RFC I will be
> adding it to my queue for testing.
> http://patchwork.ozlabs.org/patch/623238/

Yup, without this patch disabling rxvlan offload does indeed break vlan 
connectivity and with the patch I can disable and re-enable rxvlan offloads as 
much as I care to.  It also makes it through my regression tests without 
problems.

Tested-by: Aaron Brown 

This is from functional - does it work - testing perspective so review is 
probably still in order.


[RFT v3] drm: use late_initcall() for amdkfd and radeon

2016-05-26 Thread Luis R. Rodriguez
To get KFD support in radeon we need the following
initialization to happen in this order, their
respective driver file that has its init routine
listed next to it:

0. AMD IOMMUv1:arch/x86/kernel/pci-dma.c
1. AMD IOMMUv2:drivers/iommu/amd_iommu_v2.c
2. AMD KFD:drivers/gpu/drm/amd/amdkfd/kfd_module.c
3. AMD Radeon: drivers/gpu/drm/radeon/radeon_drv.c

Order is rather implicit, but these drivers can currently
only do so much given the amount of leg room available.
Below are the respective init routines and how they are
initialized:

arch/x86/kernel/pci-dma.c   rootfs_initcall(pci_iommu_init);
drivers/iommu/amd_iommu_v2.cmodule_init(amd_iommu_v2_init);
drivers/gpu/drm/amd/amdkfd/kfd_module.c module_init(kfd_module_init);
drivers/gpu/drm/radeon/radeon_drv.c module_init(radeon_init);

When a driver is built-in module_init() folds to use
device_initcall(), and we have the following possible
orders:

#define pure_initcall(fn)__define_initcall(fn, 0)
#define core_initcall(fn)__define_initcall(fn, 1)
#define postcore_initcall(fn)__define_initcall(fn, 2)
#define arch_initcall(fn)__define_initcall(fn, 3)
#define subsys_initcall(fn)  __define_initcall(fn, 4)
#define fs_initcall(fn)  __define_initcall(fn, 5)
-
#define rootfs_initcall(fn)  __define_initcall(fn, rootfs)
#define device_initcall(fn)  __define_initcall(fn, 6)
#define late_initcall(fn)__define_initcall(fn, 7)

Since we start off from rootfs_initcall(), it gives us 3 more
levels of leg room to play with for order semantics, this isn't
enough to address all required levels of dependencies, this
is specially true given that AMD-KFD needs to be loaded before
the radeon driver -- -but this it not enforced by symbols.
If the AMD-KFD driver is not loaded prior to the radeon driver
because otherwise the radeon driver will not initialize the
AMD-KFD driver and you get no KFD functionality in userspace.

Commit 1bacc894c227fad8a7 ("drivers: Move iommu/ before gpu/ in
Makefile") works around some of the possibe races between
the AMD IOMMU v2 and GPU drivers by changing the link order.
This is fragile, however its the bets we can do, given that
making the GPU drivers use late_initcall() would also implicate
a similar race between them. That possible race is fortunatley
addressed given that the drm Makefile currently has amdkfd
linked prior to radeon:

drivers/gpu/drm/Makefile
...
obj-$(CONFIG_HSA_AMD) += amd/amdkfd/
obj-$(CONFIG_DRM_RADEON)+= radeon/
...

Changing amdkfd and radeon to late_initcall() however is
still the right call in orde to annotate explicitly a
delayed dependency requirement between the GPU drivers
and the IOMMUs.

We can't address the fragile nature of the link order
right now, but in the future that might be possible.

Signed-off-by: Luis R. Rodriguez 
---

Please note, the changes to drivers/Makefile are just
for the sake of forcing the possible race to occur,
if this works well the actual [PATCH] submission will
skip those changes as its pointless to remove those
work arounds as it stands, due to the limited nature
of the levels available for addressing requirements.

Also, if you are aware of further dependency hell
things like these -- please do let me know as I am
interested in looking at addressing them.

 drivers/Makefile| 6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 +-
 drivers/gpu/drm/radeon/radeon_drv.c | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/Makefile b/drivers/Makefile
index 0b6f3d60193d..0fbe3982041f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -50,10 +50,7 @@ obj-$(CONFIG_RESET_CONTROLLER)   += reset/
 obj-y  += tty/
 obj-y  += char/
 
-# iommu/ comes before gpu as gpu are using iommu controllers
-obj-$(CONFIG_IOMMU_SUPPORT)+= iommu/
-
-# gpu/ comes after char for AGP vs DRM startup and after iommu
+# gpu/ comes after char for AGP vs DRM startup
 obj-y  += gpu/
 
 obj-$(CONFIG_CONNECTOR)+= connector/
@@ -147,6 +144,7 @@ obj-y   += clk/
 
 obj-$(CONFIG_MAILBOX)  += mailbox/
 obj-$(CONFIG_HWSPINLOCK)   += hwspinlock/
+obj-$(CONFIG_IOMMU_SUPPORT)+= iommu/
 obj-$(CONFIG_REMOTEPROC)   += remoteproc/
 obj-$(CONFIG_RPMSG)+= rpmsg/
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 850a5623661f..3d1dab8a31c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -141,7 +141,7 @@ static void __exit kfd_module_exit(void)
dev_info(kfd_device, "Removed module\n");
 }
 
-module_init(kfd_module_init);
+late_initcall(kfd_module_init);
 module_exit(kfd_module_exit);
 
 

[RFT v3] drm: use late_initcall() for amdkfd and radeon

2016-05-26 Thread Luis R. Rodriguez
To get KFD support in radeon we need the following
initialization to happen in this order, their
respective driver file that has its init routine
listed next to it:

0. AMD IOMMUv1:arch/x86/kernel/pci-dma.c
1. AMD IOMMUv2:drivers/iommu/amd_iommu_v2.c
2. AMD KFD:drivers/gpu/drm/amd/amdkfd/kfd_module.c
3. AMD Radeon: drivers/gpu/drm/radeon/radeon_drv.c

Order is rather implicit, but these drivers can currently
only do so much given the amount of leg room available.
Below are the respective init routines and how they are
initialized:

arch/x86/kernel/pci-dma.c   rootfs_initcall(pci_iommu_init);
drivers/iommu/amd_iommu_v2.cmodule_init(amd_iommu_v2_init);
drivers/gpu/drm/amd/amdkfd/kfd_module.c module_init(kfd_module_init);
drivers/gpu/drm/radeon/radeon_drv.c module_init(radeon_init);

When a driver is built-in module_init() folds to use
device_initcall(), and we have the following possible
orders:

#define pure_initcall(fn)__define_initcall(fn, 0)
#define core_initcall(fn)__define_initcall(fn, 1)
#define postcore_initcall(fn)__define_initcall(fn, 2)
#define arch_initcall(fn)__define_initcall(fn, 3)
#define subsys_initcall(fn)  __define_initcall(fn, 4)
#define fs_initcall(fn)  __define_initcall(fn, 5)
-
#define rootfs_initcall(fn)  __define_initcall(fn, rootfs)
#define device_initcall(fn)  __define_initcall(fn, 6)
#define late_initcall(fn)__define_initcall(fn, 7)

Since we start off from rootfs_initcall(), it gives us 3 more
levels of leg room to play with for order semantics, this isn't
enough to address all required levels of dependencies, this
is specially true given that AMD-KFD needs to be loaded before
the radeon driver -- -but this it not enforced by symbols.
If the AMD-KFD driver is not loaded prior to the radeon driver
because otherwise the radeon driver will not initialize the
AMD-KFD driver and you get no KFD functionality in userspace.

Commit 1bacc894c227fad8a7 ("drivers: Move iommu/ before gpu/ in
Makefile") works around some of the possibe races between
the AMD IOMMU v2 and GPU drivers by changing the link order.
This is fragile, however its the bets we can do, given that
making the GPU drivers use late_initcall() would also implicate
a similar race between them. That possible race is fortunatley
addressed given that the drm Makefile currently has amdkfd
linked prior to radeon:

drivers/gpu/drm/Makefile
...
obj-$(CONFIG_HSA_AMD) += amd/amdkfd/
obj-$(CONFIG_DRM_RADEON)+= radeon/
...

Changing amdkfd and radeon to late_initcall() however is
still the right call in orde to annotate explicitly a
delayed dependency requirement between the GPU drivers
and the IOMMUs.

We can't address the fragile nature of the link order
right now, but in the future that might be possible.

Signed-off-by: Luis R. Rodriguez 
---

Please note, the changes to drivers/Makefile are just
for the sake of forcing the possible race to occur,
if this works well the actual [PATCH] submission will
skip those changes as its pointless to remove those
work arounds as it stands, due to the limited nature
of the levels available for addressing requirements.

Also, if you are aware of further dependency hell
things like these -- please do let me know as I am
interested in looking at addressing them.

 drivers/Makefile| 6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_module.c | 2 +-
 drivers/gpu/drm/radeon/radeon_drv.c | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/Makefile b/drivers/Makefile
index 0b6f3d60193d..0fbe3982041f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -50,10 +50,7 @@ obj-$(CONFIG_RESET_CONTROLLER)   += reset/
 obj-y  += tty/
 obj-y  += char/
 
-# iommu/ comes before gpu as gpu are using iommu controllers
-obj-$(CONFIG_IOMMU_SUPPORT)+= iommu/
-
-# gpu/ comes after char for AGP vs DRM startup and after iommu
+# gpu/ comes after char for AGP vs DRM startup
 obj-y  += gpu/
 
 obj-$(CONFIG_CONNECTOR)+= connector/
@@ -147,6 +144,7 @@ obj-y   += clk/
 
 obj-$(CONFIG_MAILBOX)  += mailbox/
 obj-$(CONFIG_HWSPINLOCK)   += hwspinlock/
+obj-$(CONFIG_IOMMU_SUPPORT)+= iommu/
 obj-$(CONFIG_REMOTEPROC)   += remoteproc/
 obj-$(CONFIG_RPMSG)+= rpmsg/
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 850a5623661f..3d1dab8a31c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -141,7 +141,7 @@ static void __exit kfd_module_exit(void)
dev_info(kfd_device, "Removed module\n");
 }
 
-module_init(kfd_module_init);
+late_initcall(kfd_module_init);
 module_exit(kfd_module_exit);
 
 MODULE_AUTHOR(KFD_DRIVER_AUTHOR);

Re: [PATCH v3 10/12] spi: add driver for J-Core SPI controller

2016-05-26 Thread Rich Felker
On Wed, May 25, 2016 at 11:17:08AM +0100, Mark Brown wrote:
> On Wed, May 25, 2016 at 05:43:03AM +, Rich Felker wrote:
> 
> > changes based on ml discussion of the v2 patch. The chipselect change
> > has not been made yet, except for rewriting the current logic to be
> > more clear. If the decision is that it's needed, I can do it easily,
> > but it needs testing still.
> 
> You need to fix that, it's not good to just decide to ignore review
> comments.  It is astonishing that you are doing this while expecting to
> bypass the normal process for merging things.  I'm not going to look at
> this version, please ensure you have addressed all the feedback from the
> previous review before any new version you submit.

I've made and tested a change that looks like what you want, with the
hw->csReg's chipselect bits set initially high in the probe function,
and only the requested bit being modified by the chipselect function.
Would you prefer I continue to submit this driver with new versions of
the patch series, or separate it out for further review on its own?

Rich


Re: [PATCH v3 10/12] spi: add driver for J-Core SPI controller

2016-05-26 Thread Rich Felker
On Wed, May 25, 2016 at 11:17:08AM +0100, Mark Brown wrote:
> On Wed, May 25, 2016 at 05:43:03AM +, Rich Felker wrote:
> 
> > changes based on ml discussion of the v2 patch. The chipselect change
> > has not been made yet, except for rewriting the current logic to be
> > more clear. If the decision is that it's needed, I can do it easily,
> > but it needs testing still.
> 
> You need to fix that, it's not good to just decide to ignore review
> comments.  It is astonishing that you are doing this while expecting to
> bypass the normal process for merging things.  I'm not going to look at
> this version, please ensure you have addressed all the feedback from the
> previous review before any new version you submit.

I've made and tested a change that looks like what you want, with the
hw->csReg's chipselect bits set initially high in the probe function,
and only the requested bit being modified by the chipselect function.
Would you prefer I continue to submit this driver with new versions of
the patch series, or separate it out for further review on its own?

Rich


Re: [RFC PATCH 1/2] mmc: dw_mmc: remove redundant of set_bit and clear_bit

2016-05-26 Thread Jaehoon Chung
Hi Shawn,

On 05/26/2016 12:07 PM, Shawn Lin wrote:
> dw_mci_get_cd have already dealed with these for
> both of internal card-detect and gpio card-detect.

s/dealed/dealt

This patch looks good to me. Could you resend the patch? not RFC.

Best Regards,
Jaehoon Chung

> 
> Signed-off-by: Shawn Lin 
> 
> ---
> 
>  drivers/mmc/host/dw_mmc.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index 829a6ee..cb30e91 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -2616,10 +2616,7 @@ static int dw_mci_init_slot(struct dw_mci *host, 
> unsigned int id)
>   mmc->max_seg_size = mmc->max_req_size;
>   }
>  
> - if (dw_mci_get_cd(mmc))
> - set_bit(DW_MMC_CARD_PRESENT, >flags);
> - else
> - clear_bit(DW_MMC_CARD_PRESENT, >flags);
> + dw_mci_get_cd(mmc);
>  
>   ret = mmc_add_host(mmc);
>   if (ret)
> 



Re: [RFC PATCH 2/2] mmc: dw_mmc: check card present before starting request

2016-05-26 Thread Jaehoon Chung
Hi Shawn,

On 05/26/2016 12:08 PM, Shawn Lin wrote:
> The main reason to add this check is to avoid unnecessary
> mmc_request if the card is removed. Although we have already
> check this in dw_mci_handle_cd for runtime usage of sd card and
> dw_mci_init_slot for noremovable devices, but there is a timing
> gap before it really calls dw_mci_get_cd as mmc_detect_change needs
> some delay here.

What's unnecessary mmc_request?

> 
> Another gain here is that we could save some checkings of card status
> after sd card been removed.
> 
> Signed-off-by: Shawn Lin 
> ---
> 
>  drivers/mmc/host/dw_mmc.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index cb30e91..2940d24 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -105,6 +105,7 @@ struct idmac_desc {
>  static bool dw_mci_reset(struct dw_mci *host);
>  static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
>  static int dw_mci_card_busy(struct mmc_host *mmc);
> +static int dw_mci_get_cd(struct mmc_host *mmc);
>  
>  #if defined(CONFIG_DEBUG_FS)
>  static int dw_mci_req_show(struct seq_file *s, void *v)
> @@ -1248,6 +1249,8 @@ static void dw_mci_request(struct mmc_host *mmc, struct 
> mmc_request *mrq)
>  
>   WARN_ON(slot->mrq);
>  
> + dw_mci_get_cd(mmc);
> +

If my understanding is right, it can be replaced 

if (!test_bit(DW_MMC_CARD_PRESENT, >flags))

to

if (!dw_mci_get_cd(mmc))


Best Regards,
Jaehoon Chung



>   /*
>* The check for card presence and queueing of the request must be
>* atomic, otherwise the card could be removed in between and the
> 



Re: [RFC PATCH 1/2] mmc: dw_mmc: remove redundant of set_bit and clear_bit

2016-05-26 Thread Jaehoon Chung
Hi Shawn,

On 05/26/2016 12:07 PM, Shawn Lin wrote:
> dw_mci_get_cd have already dealed with these for
> both of internal card-detect and gpio card-detect.

s/dealed/dealt

This patch looks good to me. Could you resend the patch? not RFC.

Best Regards,
Jaehoon Chung

> 
> Signed-off-by: Shawn Lin 
> 
> ---
> 
>  drivers/mmc/host/dw_mmc.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index 829a6ee..cb30e91 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -2616,10 +2616,7 @@ static int dw_mci_init_slot(struct dw_mci *host, 
> unsigned int id)
>   mmc->max_seg_size = mmc->max_req_size;
>   }
>  
> - if (dw_mci_get_cd(mmc))
> - set_bit(DW_MMC_CARD_PRESENT, >flags);
> - else
> - clear_bit(DW_MMC_CARD_PRESENT, >flags);
> + dw_mci_get_cd(mmc);
>  
>   ret = mmc_add_host(mmc);
>   if (ret)
> 



Re: [RFC PATCH 2/2] mmc: dw_mmc: check card present before starting request

2016-05-26 Thread Jaehoon Chung
Hi Shawn,

On 05/26/2016 12:08 PM, Shawn Lin wrote:
> The main reason to add this check is to avoid unnecessary
> mmc_request if the card is removed. Although we have already
> check this in dw_mci_handle_cd for runtime usage of sd card and
> dw_mci_init_slot for noremovable devices, but there is a timing
> gap before it really calls dw_mci_get_cd as mmc_detect_change needs
> some delay here.

What's unnecessary mmc_request?

> 
> Another gain here is that we could save some checkings of card status
> after sd card been removed.
> 
> Signed-off-by: Shawn Lin 
> ---
> 
>  drivers/mmc/host/dw_mmc.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
> index cb30e91..2940d24 100644
> --- a/drivers/mmc/host/dw_mmc.c
> +++ b/drivers/mmc/host/dw_mmc.c
> @@ -105,6 +105,7 @@ struct idmac_desc {
>  static bool dw_mci_reset(struct dw_mci *host);
>  static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
>  static int dw_mci_card_busy(struct mmc_host *mmc);
> +static int dw_mci_get_cd(struct mmc_host *mmc);
>  
>  #if defined(CONFIG_DEBUG_FS)
>  static int dw_mci_req_show(struct seq_file *s, void *v)
> @@ -1248,6 +1249,8 @@ static void dw_mci_request(struct mmc_host *mmc, struct 
> mmc_request *mrq)
>  
>   WARN_ON(slot->mrq);
>  
> + dw_mci_get_cd(mmc);
> +

If my understanding is right, it can be replaced 

if (!test_bit(DW_MMC_CARD_PRESENT, >flags))

to

if (!dw_mci_get_cd(mmc))


Best Regards,
Jaehoon Chung



>   /*
>* The check for card presence and queueing of the request must be
>* atomic, otherwise the card could be removed in between and the
> 



Re: [RFT v2] iommu/amd: use subsys_initcall() on amdv2 iommu

2016-05-26 Thread Luis R. Rodriguez
On Mon, Apr 25, 2016 at 12:23:51PM +0200, Joerg Roedel wrote:
> On Mon, Apr 18, 2016 at 02:03:50PM +0200, Luis R. Rodriguez wrote:
> > You said that with my patch you saw AMD IOMMUv2 kick off first,
> > that was intentional as I thought that's what you needed. Can
> > someone please describe the requirements?
> > 
> > Also what does drm use that you say has a conflict already? What
> > drm code are we talking about exactly ?
> 
> The required order is:
> 
>   1. AMD IOMMUv1 (in-kernel only, initialized at boot time after PCI)
>   2. AMD IOMMUv2 (in-kernel or as module, provides demand paging
>   and uses v1 interfaces to talk to the IOMMU)
>   3. AMD-KFD (Implements compute offloading to the GPU and
>   uses AMD IOMMUv2 functionality, also provides a
>   symbol for the radeon driver)
>   4. DRM with(Checks if the symbol provided by AMD-KFD is
>  Radeon   available at init time and does the KFD
>   initialization from there, because the GPU needs
>   to be up first)
> 
> So AMD IOMMUv2 does not initialize (but it does load to have the symbols
> available for drivers that optionally use its functionality) without the
> AMD IOMMUv1 driver, as it can't access the IOMMU hardware then.
> 
> AMD-KFD does not load without AMD IOMMUv2 being loaded, as it depends on
> its symbols. AMD-KFD on the other hand needs to be loaded before the
> radeon driver (but this it not enforced by symbols), because otherwise
> the radeon driver will not initialize the AMD-KFD driver.
> 
> When AMD-KFD is loaded and you load radeon then, you get the KFD
> functionality in the kernel. Then you can move on to the fun getting the
> userspace running and actually execute anything on the GPU. But thats
> another story.
> 
> I know what you think, and I agree: It's a big mess :)

Its no concern for me that its a mess, frankly most drivers are. This however
illustrates a semantic gap in the driver core when you have more than 3
dependencies after a rootfs_initcall(), given that the buck stops at
late_initcall(), only 3 levels above. With 4 dependencies you run out
of semantics to specify explicit requirements.

Ties can be disputed through link order though, but this is obviously fragile,
and the dependencies are then left implicit. Nothing vets for correctness,
either in software or through static analysers.

To summarize the specific code in question is (and order required):

AMD IOMMUv1:arch/x86/kernel/pci-dma.c   
rootfs_initcall(pci_iommu_init);
AMD IOMMUv2:drivers/iommu/amd_iommu_v2.c
module_init(amd_iommu_v2_init); (device_initcall())
AMD KFD:drivers/gpu/drm/amd/amdkfd/kfd_module.c 
module_init(kfd_module_init); (device_initcall())
AMD Radeon: drivers/gpu/drm/radeon/radeon_drv.c 
module_init(radeon_init); (device_initcall())

Commit 1bacc894c227fad8a7 ("drivers: Move iommu/ before gpu/ in Makefile") 
resolved
the race between IOMMU and GPU drivers this way, however an alternative is to 
make
the dependencies in levels explicit by making the amdkfd and radeon drivers both
use late_initcall(), this however is technically still racy specially since you
note that the amdkfd driver is not loaded prior to radeon through symbol
dependencies, if happens to be loaded it then you get KFD functionality,
otherwise you don't.

Making both amdkfd and radeon use late_initcall() would actually work now though
but that's only because the link order also happens to match the dependency
requirements:

drivers/gpu/drm/Makefile
...
obj-$(CONFIG_HSA_AMD) += amd/amdkfd/
obj-$(CONFIG_DRM_RADEON)+= radeon/  
...

Since we currently lack proper semantics to define clear dependencies for all
this reverting 1bacc894c227fad8a7 after using late_initcall() on both amdkfd
and radeon would not be useful other than to just test the race fix, given that
such work around would still be present also on drivers/gpu/drm/Makefile to
account for the race between amdkfd and radeon. Reverting 1bacc894c227fad8a7
after using late_initcall() on both amdkfd and radeon would just bump the
race work around another level.

Modifying both amdkfd and radeon to use late_initcall() however seems well
justified for now, and given the current state of affairs with link order
one should be able to then correctly build all these things as built-in
and it should work correctly.

I'm still not satisfied with the issues on semantics here though. A fix
for that, if desired, should be possible using linker-tables, currently
in RFCv2 [0] stage. Linker tables offer practically infinite number (as
long as a valid ELF section name, as the order level is part of the
section name) of order levels. It also would offer the opportunity
to build dependencies outside of the driver core layer, so you can
customize the dependency map per subsystem as you see fit.

So -- for now I'll 

Re: [RFT v2] iommu/amd: use subsys_initcall() on amdv2 iommu

2016-05-26 Thread Luis R. Rodriguez
On Mon, Apr 25, 2016 at 12:23:51PM +0200, Joerg Roedel wrote:
> On Mon, Apr 18, 2016 at 02:03:50PM +0200, Luis R. Rodriguez wrote:
> > You said that with my patch you saw AMD IOMMUv2 kick off first,
> > that was intentional as I thought that's what you needed. Can
> > someone please describe the requirements?
> > 
> > Also what does drm use that you say has a conflict already? What
> > drm code are we talking about exactly ?
> 
> The required order is:
> 
>   1. AMD IOMMUv1 (in-kernel only, initialized at boot time after PCI)
>   2. AMD IOMMUv2 (in-kernel or as module, provides demand paging
>   and uses v1 interfaces to talk to the IOMMU)
>   3. AMD-KFD (Implements compute offloading to the GPU and
>   uses AMD IOMMUv2 functionality, also provides a
>   symbol for the radeon driver)
>   4. DRM with(Checks if the symbol provided by AMD-KFD is
>  Radeon   available at init time and does the KFD
>   initialization from there, because the GPU needs
>   to be up first)
> 
> So AMD IOMMUv2 does not initialize (but it does load to have the symbols
> available for drivers that optionally use its functionality) without the
> AMD IOMMUv1 driver, as it can't access the IOMMU hardware then.
> 
> AMD-KFD does not load without AMD IOMMUv2 being loaded, as it depends on
> its symbols. AMD-KFD on the other hand needs to be loaded before the
> radeon driver (but this it not enforced by symbols), because otherwise
> the radeon driver will not initialize the AMD-KFD driver.
> 
> When AMD-KFD is loaded and you load radeon then, you get the KFD
> functionality in the kernel. Then you can move on to the fun getting the
> userspace running and actually execute anything on the GPU. But thats
> another story.
> 
> I know what you think, and I agree: It's a big mess :)

Its no concern for me that its a mess, frankly most drivers are. This however
illustrates a semantic gap in the driver core when you have more than 3
dependencies after a rootfs_initcall(), given that the buck stops at
late_initcall(), only 3 levels above. With 4 dependencies you run out
of semantics to specify explicit requirements.

Ties can be disputed through link order though, but this is obviously fragile,
and the dependencies are then left implicit. Nothing vets for correctness,
either in software or through static analysers.

To summarize the specific code in question is (and order required):

AMD IOMMUv1:arch/x86/kernel/pci-dma.c   
rootfs_initcall(pci_iommu_init);
AMD IOMMUv2:drivers/iommu/amd_iommu_v2.c
module_init(amd_iommu_v2_init); (device_initcall())
AMD KFD:drivers/gpu/drm/amd/amdkfd/kfd_module.c 
module_init(kfd_module_init); (device_initcall())
AMD Radeon: drivers/gpu/drm/radeon/radeon_drv.c 
module_init(radeon_init); (device_initcall())

Commit 1bacc894c227fad8a7 ("drivers: Move iommu/ before gpu/ in Makefile") 
resolved
the race between IOMMU and GPU drivers this way, however an alternative is to 
make
the dependencies in levels explicit by making the amdkfd and radeon drivers both
use late_initcall(), this however is technically still racy specially since you
note that the amdkfd driver is not loaded prior to radeon through symbol
dependencies, if happens to be loaded it then you get KFD functionality,
otherwise you don't.

Making both amdkfd and radeon use late_initcall() would actually work now though
but that's only because the link order also happens to match the dependency
requirements:

drivers/gpu/drm/Makefile
...
obj-$(CONFIG_HSA_AMD) += amd/amdkfd/
obj-$(CONFIG_DRM_RADEON)+= radeon/  
...

Since we currently lack proper semantics to define clear dependencies for all
this reverting 1bacc894c227fad8a7 after using late_initcall() on both amdkfd
and radeon would not be useful other than to just test the race fix, given that
such work around would still be present also on drivers/gpu/drm/Makefile to
account for the race between amdkfd and radeon. Reverting 1bacc894c227fad8a7
after using late_initcall() on both amdkfd and radeon would just bump the
race work around another level.

Modifying both amdkfd and radeon to use late_initcall() however seems well
justified for now, and given the current state of affairs with link order
one should be able to then correctly build all these things as built-in
and it should work correctly.

I'm still not satisfied with the issues on semantics here though. A fix
for that, if desired, should be possible using linker-tables, currently
in RFCv2 [0] stage. Linker tables offer practically infinite number (as
long as a valid ELF section name, as the order level is part of the
section name) of order levels. It also would offer the opportunity
to build dependencies outside of the driver core layer, so you can
customize the dependency map per subsystem as you see fit.

So -- for now I'll 

Re: [PATCH] ACPI / Thermal / video: fix max_level incorrect value

2016-05-26 Thread Zhang, Rui
On Thu, 2016-05-26 at 13:15 +0800, Aaron Lu wrote:
> On 05/26/2016 09:49 AM, valdis.kletni...@vt.edu wrote:
> > On Wed, 25 May 2016 13:15:26 +0800, Aaron Lu said:
> >> Valdis, can you please give the patch a try? Thanks.
> > 
> > Sorry, had a few days where actual work commitments and other
> > things got in the way... I tested this patch against next-20160524,
> > and can report that the problem is fixed, so feel free to
> > stick a "Tested-By:" on it
> 
> Thanks a lot for the confirm, here is the updated patch with your
> tested-by tag added.
> 
> From: Aaron Lu 
> Date: Sat, 21 May 2016 15:30:46 +0800
> Subject: [PATCH] ACPI / Thermal / video: fix max_level incorrect value
> 
> commit 059500940def("ACPI/video: export acpi_video_get_levels")
> mistakenly dropped the correct value of max_level and that caused the
> set_level function following failed and the acpi_video backlight interface
> didn't get created. Fix this by passing back the correct max_level value.
> 
> While at it, also fix the param used in acpi_video_device_lcd_query_levels
> where acpi_handle is expected but acpi_video_device is passed.
> 
> Reported-and-tested-by: Valdis Kletnieks 
> Signed-off-by: Aaron Lu 

Acked-by: Zhang Rui 
> ---
>  drivers/acpi/acpi_video.c | 9 ++---
>  drivers/thermal/int340x_thermal/int3406_thermal.c | 2 +-
>  include/acpi/video.h  | 6 --
>  3 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
> index 3d5b8a099351..c1d138e128cb 100644
> --- a/drivers/acpi/acpi_video.c
> +++ b/drivers/acpi/acpi_video.c
> @@ -754,7 +754,8 @@ static int acpi_video_bqc_quirk(struct acpi_video_device 
> *device,
>  }
>  
>  int acpi_video_get_levels(struct acpi_device *device,
> -   struct acpi_video_device_brightness **dev_br)
> +   struct acpi_video_device_brightness **dev_br,
> +   int *pmax_level)
>  {
>   union acpi_object *obj = NULL;
>   int i, max_level = 0, count = 0, level_ac_battery = 0;
> @@ -841,6 +842,8 @@ int acpi_video_get_levels(struct acpi_device *device,
>  
>   br->count = count;
>   *dev_br = br;
> + if (pmax_level)
> + *pmax_level = max_level;
>  
>  out:
>   kfree(obj);
> @@ -869,7 +872,7 @@ acpi_video_init_brightness(struct acpi_video_device 
> *device)
>   struct acpi_video_device_brightness *br = NULL;
>   int result = -EINVAL;
>  
> - result = acpi_video_get_levels(device->dev, );
> + result = acpi_video_get_levels(device->dev, , _level);
>   if (result)
>   return result;
>   device->brightness = br;
> @@ -1737,7 +1740,7 @@ static void acpi_video_run_bcl_for_osi(struct 
> acpi_video_bus *video)
>  
>   mutex_lock(>device_list_lock);
>   list_for_each_entry(dev, >video_device_list, entry) {
> - if (!acpi_video_device_lcd_query_levels(dev, ))
> + if (!acpi_video_device_lcd_query_levels(dev->dev->handle, 
> ))
>   kfree(levels);
>   }
>   mutex_unlock(>device_list_lock);
> diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c 
> b/drivers/thermal/int340x_thermal/int3406_thermal.c
> index 13d431cbd29e..a578cd257db4 100644
> --- a/drivers/thermal/int340x_thermal/int3406_thermal.c
> +++ b/drivers/thermal/int340x_thermal/int3406_thermal.c
> @@ -177,7 +177,7 @@ static int int3406_thermal_probe(struct platform_device 
> *pdev)
>   return -ENODEV;
>   d->raw_bd = bd;
>  
> - ret = acpi_video_get_levels(ACPI_COMPANION(>dev), >br);
> + ret = acpi_video_get_levels(ACPI_COMPANION(>dev), >br, NULL);
>   if (ret)
>   return ret;
>  
> diff --git a/include/acpi/video.h b/include/acpi/video.h
> index 70a41f742037..5731ccb42585 100644
> --- a/include/acpi/video.h
> +++ b/include/acpi/video.h
> @@ -51,7 +51,8 @@ extern void acpi_video_set_dmi_backlight_type(enum 
> acpi_backlight_type type);
>   */
>  extern bool acpi_video_handles_brightness_key_presses(void);
>  extern int acpi_video_get_levels(struct acpi_device *device,
> -  struct acpi_video_device_brightness **dev_br);
> +  struct acpi_video_device_brightness **dev_br,
> +  int *pmax_level);
>  #else
>  static inline int acpi_video_register(void) { return 0; }
>  static inline void acpi_video_unregister(void) { return; }
> @@ -72,7 +73,8 @@ static inline bool 
> acpi_video_handles_brightness_key_presses(void)
>   return false;
>  }
>  static inline int acpi_video_get_levels(struct acpi_device *device,
> - struct acpi_video_device_brightness **dev_br)
> + struct acpi_video_device_brightness **dev_br,
> + int *pmax_level)
>  {
>   return -ENODEV;
>  }



Re: [PATCH] ACPI / Thermal / video: fix max_level incorrect value

2016-05-26 Thread Zhang, Rui
On Thu, 2016-05-26 at 13:15 +0800, Aaron Lu wrote:
> On 05/26/2016 09:49 AM, valdis.kletni...@vt.edu wrote:
> > On Wed, 25 May 2016 13:15:26 +0800, Aaron Lu said:
> >> Valdis, can you please give the patch a try? Thanks.
> > 
> > Sorry, had a few days where actual work commitments and other
> > things got in the way... I tested this patch against next-20160524,
> > and can report that the problem is fixed, so feel free to
> > stick a "Tested-By:" on it
> 
> Thanks a lot for the confirm, here is the updated patch with your
> tested-by tag added.
> 
> From: Aaron Lu 
> Date: Sat, 21 May 2016 15:30:46 +0800
> Subject: [PATCH] ACPI / Thermal / video: fix max_level incorrect value
> 
> commit 059500940def("ACPI/video: export acpi_video_get_levels")
> mistakenly dropped the correct value of max_level and that caused the
> set_level function following failed and the acpi_video backlight interface
> didn't get created. Fix this by passing back the correct max_level value.
> 
> While at it, also fix the param used in acpi_video_device_lcd_query_levels
> where acpi_handle is expected but acpi_video_device is passed.
> 
> Reported-and-tested-by: Valdis Kletnieks 
> Signed-off-by: Aaron Lu 

Acked-by: Zhang Rui 
> ---
>  drivers/acpi/acpi_video.c | 9 ++---
>  drivers/thermal/int340x_thermal/int3406_thermal.c | 2 +-
>  include/acpi/video.h  | 6 --
>  3 files changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
> index 3d5b8a099351..c1d138e128cb 100644
> --- a/drivers/acpi/acpi_video.c
> +++ b/drivers/acpi/acpi_video.c
> @@ -754,7 +754,8 @@ static int acpi_video_bqc_quirk(struct acpi_video_device 
> *device,
>  }
>  
>  int acpi_video_get_levels(struct acpi_device *device,
> -   struct acpi_video_device_brightness **dev_br)
> +   struct acpi_video_device_brightness **dev_br,
> +   int *pmax_level)
>  {
>   union acpi_object *obj = NULL;
>   int i, max_level = 0, count = 0, level_ac_battery = 0;
> @@ -841,6 +842,8 @@ int acpi_video_get_levels(struct acpi_device *device,
>  
>   br->count = count;
>   *dev_br = br;
> + if (pmax_level)
> + *pmax_level = max_level;
>  
>  out:
>   kfree(obj);
> @@ -869,7 +872,7 @@ acpi_video_init_brightness(struct acpi_video_device 
> *device)
>   struct acpi_video_device_brightness *br = NULL;
>   int result = -EINVAL;
>  
> - result = acpi_video_get_levels(device->dev, );
> + result = acpi_video_get_levels(device->dev, , _level);
>   if (result)
>   return result;
>   device->brightness = br;
> @@ -1737,7 +1740,7 @@ static void acpi_video_run_bcl_for_osi(struct 
> acpi_video_bus *video)
>  
>   mutex_lock(>device_list_lock);
>   list_for_each_entry(dev, >video_device_list, entry) {
> - if (!acpi_video_device_lcd_query_levels(dev, ))
> + if (!acpi_video_device_lcd_query_levels(dev->dev->handle, 
> ))
>   kfree(levels);
>   }
>   mutex_unlock(>device_list_lock);
> diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c 
> b/drivers/thermal/int340x_thermal/int3406_thermal.c
> index 13d431cbd29e..a578cd257db4 100644
> --- a/drivers/thermal/int340x_thermal/int3406_thermal.c
> +++ b/drivers/thermal/int340x_thermal/int3406_thermal.c
> @@ -177,7 +177,7 @@ static int int3406_thermal_probe(struct platform_device 
> *pdev)
>   return -ENODEV;
>   d->raw_bd = bd;
>  
> - ret = acpi_video_get_levels(ACPI_COMPANION(>dev), >br);
> + ret = acpi_video_get_levels(ACPI_COMPANION(>dev), >br, NULL);
>   if (ret)
>   return ret;
>  
> diff --git a/include/acpi/video.h b/include/acpi/video.h
> index 70a41f742037..5731ccb42585 100644
> --- a/include/acpi/video.h
> +++ b/include/acpi/video.h
> @@ -51,7 +51,8 @@ extern void acpi_video_set_dmi_backlight_type(enum 
> acpi_backlight_type type);
>   */
>  extern bool acpi_video_handles_brightness_key_presses(void);
>  extern int acpi_video_get_levels(struct acpi_device *device,
> -  struct acpi_video_device_brightness **dev_br);
> +  struct acpi_video_device_brightness **dev_br,
> +  int *pmax_level);
>  #else
>  static inline int acpi_video_register(void) { return 0; }
>  static inline void acpi_video_unregister(void) { return; }
> @@ -72,7 +73,8 @@ static inline bool 
> acpi_video_handles_brightness_key_presses(void)
>   return false;
>  }
>  static inline int acpi_video_get_levels(struct acpi_device *device,
> - struct acpi_video_device_brightness **dev_br)
> + struct acpi_video_device_brightness **dev_br,
> + int *pmax_level)
>  {
>   return -ENODEV;
>  }



Re: [PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Fam Zheng
On Thu, 05/26 17:08, Long Li wrote:
> The block sector size should be in unit of 512 bytes, not in bytes.
> 
> Signed-off-by: Long Li 
> 
> ---
>  drivers/scsi/sd.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 428c03e..4bce17e 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
>   if (sdkp->opt_xfer_blocks &&
>   sdkp->opt_xfer_blocks <= dev_max &&
>   sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
> - sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
> - rw_max = q->limits.io_opt =
> + sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
> + q->limits.io_opt =
>   sdkp->opt_xfer_blocks * sdp->sector_size;
> + rw_max = (q->limits.io_opt >> 9);

Other than the superfluous parenthesis, looks good to me.

Fam

> + }
>   else
>   rw_max = BLK_DEF_MAX_SECTORS;
>  
> -- 
> 1.8.5.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Fam Zheng
On Thu, 05/26 17:08, Long Li wrote:
> The block sector size should be in unit of 512 bytes, not in bytes.
> 
> Signed-off-by: Long Li 
> 
> ---
>  drivers/scsi/sd.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 428c03e..4bce17e 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
>   if (sdkp->opt_xfer_blocks &&
>   sdkp->opt_xfer_blocks <= dev_max &&
>   sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
> - sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
> - rw_max = q->limits.io_opt =
> + sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
> + q->limits.io_opt =
>   sdkp->opt_xfer_blocks * sdp->sector_size;
> + rw_max = (q->limits.io_opt >> 9);

Other than the superfluous parenthesis, looks good to me.

Fam

> + }
>   else
>   rw_max = BLK_DEF_MAX_SECTORS;
>  
> -- 
> 1.8.5.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/23] all: syscall wrappers: add documentation

2016-05-26 Thread Yury Norov
On Thu, May 26, 2016 at 11:29:45PM +0100, Catalin Marinas wrote:
> On Thu, May 26, 2016 at 11:48:19PM +0300, Yury Norov wrote:
> > On Wed, May 25, 2016 at 02:28:21PM -0700, David Miller wrote:
> > > From: Arnd Bergmann 
> > > Date: Wed, 25 May 2016 23:01:06 +0200
> > > 
> > > > On Wednesday, May 25, 2016 1:50:39 PM CEST David Miller wrote:
> > > >> From: Arnd Bergmann 
> > > >> Date: Wed, 25 May 2016 22:47:33 +0200
> > > >> 
> > > >> > If we use the normal calling conventions, we could remove these 
> > > >> > overrides
> > > >> > along with the respective special-case handling in glibc. None of 
> > > >> > them
> > > >> > look particularly performance-sensitive, but I could be wrong there.
> > > >> 
> > > >> You could set the lowest bit in the system call entry pointer to 
> > > >> indicate
> > > >> the upper-half clears should be elided.
> > > > 
> > > > Right, but that would introduce an extra conditional branch in the 
> > > > syscall
> > > > hotpath, and likely eliminate the gains from passing the loff_t 
> > > > arguments
> > > > in a single register instead of a pair.
> > > 
> > > Ok, then, how much are you really gaining from avoiding a 'shift' and
> > > an 'or' to build the full 64-bit value?  3 cycles?  Maybe 4?
> > 
> > 4 cycles in kernel and ~same cost in glibc to create a pair.
> 
> It would take a single instruction per argument in the kernel to do
> shift+or and maybe 1-2 more instructions to move the remaining arguments
> in place (we do this for a few wrappers in arch/arm64/kernel/entry32.S).
> And the glibc counterpart.
> 
> > And 8 'mov's that exist for every syscall, even yield().
> > 
> > > And the executing the wrappers, those have a non-trivial cost too.
> > 
> > The cost is pretty trivial though. See kernel/compat_wrapper.o:
> > COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
> > 0:   a9bf7bfdstp x29, x30, [sp,#-16]!
> > 4:   910003fdmov x29, sp
> > 8:   2a0003e0mov w0, w0
> > c:   9400bl  0 
> > 10:  a8c17bfdldp x29, x30, [sp],#16
> > 14:  d65f03c0ret
> 
> I would say the above could be more expensive than 8 movs (16 bytes to
> write, read, a branch and a ret). You can also add the I-cache locality,
> having wrappers for each syscalls instead of a single place for zeroing
> the upper half (where no other wrapper is necessary).
> 
> Can we trick the compiler into doing a tail call optimisation. This
> could have simply been:
> 
> COMPAT_SYSCALL_WRAP2(creat, ...):
>   mov w0, w0
>   b   

What you talk about was in my initial version. But Heiko insisted on having all
wrappers together.
http://www.spinics.net/lists/linux-s390/msg11593.html

Grep your email for discussion.

> 
> > > Cost wise, this seems like it all cancels out in the end, but what
> > > do I know?
> > 
> > I think you know something, and I also think Heiko and other s390 guys
> > know something as well. So I'd like to listen their arguments here.
> > 
> > For me spark64 way is looking reasonable only because it's really simple
> > and takes less coding. I'll try it on some branch and share here what 
> > happened.
> 
> The kernel code will definitely look simpler ;). It would be good to see
> if there actually is any performance impact. Even with 16 more cycles on
> syscall entry, would they be lost in the noise? You don't need a full
> implementation, just some dummy mov x0, x0 on the entry path.
> 
> -- 
> Catalin


Re: [PATCH 01/23] all: syscall wrappers: add documentation

2016-05-26 Thread Yury Norov
On Thu, May 26, 2016 at 11:29:45PM +0100, Catalin Marinas wrote:
> On Thu, May 26, 2016 at 11:48:19PM +0300, Yury Norov wrote:
> > On Wed, May 25, 2016 at 02:28:21PM -0700, David Miller wrote:
> > > From: Arnd Bergmann 
> > > Date: Wed, 25 May 2016 23:01:06 +0200
> > > 
> > > > On Wednesday, May 25, 2016 1:50:39 PM CEST David Miller wrote:
> > > >> From: Arnd Bergmann 
> > > >> Date: Wed, 25 May 2016 22:47:33 +0200
> > > >> 
> > > >> > If we use the normal calling conventions, we could remove these 
> > > >> > overrides
> > > >> > along with the respective special-case handling in glibc. None of 
> > > >> > them
> > > >> > look particularly performance-sensitive, but I could be wrong there.
> > > >> 
> > > >> You could set the lowest bit in the system call entry pointer to 
> > > >> indicate
> > > >> the upper-half clears should be elided.
> > > > 
> > > > Right, but that would introduce an extra conditional branch in the 
> > > > syscall
> > > > hotpath, and likely eliminate the gains from passing the loff_t 
> > > > arguments
> > > > in a single register instead of a pair.
> > > 
> > > Ok, then, how much are you really gaining from avoiding a 'shift' and
> > > an 'or' to build the full 64-bit value?  3 cycles?  Maybe 4?
> > 
> > 4 cycles in kernel and ~same cost in glibc to create a pair.
> 
> It would take a single instruction per argument in the kernel to do
> shift+or and maybe 1-2 more instructions to move the remaining arguments
> in place (we do this for a few wrappers in arch/arm64/kernel/entry32.S).
> And the glibc counterpart.
> 
> > And 8 'mov's that exist for every syscall, even yield().
> > 
> > > And the executing the wrappers, those have a non-trivial cost too.
> > 
> > The cost is pretty trivial though. See kernel/compat_wrapper.o:
> > COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
> > 0:   a9bf7bfdstp x29, x30, [sp,#-16]!
> > 4:   910003fdmov x29, sp
> > 8:   2a0003e0mov w0, w0
> > c:   9400bl  0 
> > 10:  a8c17bfdldp x29, x30, [sp],#16
> > 14:  d65f03c0ret
> 
> I would say the above could be more expensive than 8 movs (16 bytes to
> write, read, a branch and a ret). You can also add the I-cache locality,
> having wrappers for each syscalls instead of a single place for zeroing
> the upper half (where no other wrapper is necessary).
> 
> Can we trick the compiler into doing a tail call optimisation. This
> could have simply been:
> 
> COMPAT_SYSCALL_WRAP2(creat, ...):
>   mov w0, w0
>   b   

What you talk about was in my initial version. But Heiko insisted on having all
wrappers together.
http://www.spinics.net/lists/linux-s390/msg11593.html

Grep your email for discussion.

> 
> > > Cost wise, this seems like it all cancels out in the end, but what
> > > do I know?
> > 
> > I think you know something, and I also think Heiko and other s390 guys
> > know something as well. So I'd like to listen their arguments here.
> > 
> > For me spark64 way is looking reasonable only because it's really simple
> > and takes less coding. I'll try it on some branch and share here what 
> > happened.
> 
> The kernel code will definitely look simpler ;). It would be good to see
> if there actually is any performance impact. Even with 16 more cycles on
> syscall entry, would they be lost in the noise? You don't need a full
> implementation, just some dummy mov x0, x0 on the entry path.
> 
> -- 
> Catalin


[PATCH 2/4] f2fs: inject to produce some orphan inodes

2016-05-26 Thread Jaegeuk Kim
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/f2fs.h  | 3 +++
 fs/f2fs/inode.c | 5 +
 fs/f2fs/super.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5daba19..c4697b7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -45,6 +45,7 @@ enum {
FAULT_ORPHAN,
FAULT_BLOCK,
FAULT_DIR_DEPTH,
+   FAULT_EVICT_INODE,
FAULT_MAX,
 };
 
@@ -74,6 +75,8 @@ static inline bool time_to_inject(int type)
return false;
else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type))
return false;
+   else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type))
+   return false;
 
atomic_inc(_fault.inject_ops);
if (atomic_read(_fault.inject_ops) >= f2fs_fault.inject_rate) {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index bdd814d..11cb60a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -345,6 +345,11 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
 
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+   if (time_to_inject(FAULT_EVICT_INODE))
+   goto no_delete;
+#endif
+
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a5b5739..27f76819e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = {
[FAULT_ORPHAN]  = "orphan",
[FAULT_BLOCK]   = "no more block",
[FAULT_DIR_DEPTH]   = "too big dir depth",
+   [FAULT_EVICT_INODE] = "evict_inode fail",
 };
 
 static void f2fs_build_fault_attr(unsigned int rate)
-- 
2.6.3



[PATCH 2/4] f2fs: inject to produce some orphan inodes

2016-05-26 Thread Jaegeuk Kim
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/f2fs.h  | 3 +++
 fs/f2fs/inode.c | 5 +
 fs/f2fs/super.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5daba19..c4697b7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -45,6 +45,7 @@ enum {
FAULT_ORPHAN,
FAULT_BLOCK,
FAULT_DIR_DEPTH,
+   FAULT_EVICT_INODE,
FAULT_MAX,
 };
 
@@ -74,6 +75,8 @@ static inline bool time_to_inject(int type)
return false;
else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type))
return false;
+   else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type))
+   return false;
 
atomic_inc(_fault.inject_ops);
if (atomic_read(_fault.inject_ops) >= f2fs_fault.inject_rate) {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index bdd814d..11cb60a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -345,6 +345,11 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
 
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+   if (time_to_inject(FAULT_EVICT_INODE))
+   goto no_delete;
+#endif
+
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a5b5739..27f76819e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = {
[FAULT_ORPHAN]  = "orphan",
[FAULT_BLOCK]   = "no more block",
[FAULT_DIR_DEPTH]   = "too big dir depth",
+   [FAULT_EVICT_INODE] = "evict_inode fail",
 };
 
 static void f2fs_build_fault_attr(unsigned int rate)
-- 
2.6.3



[PATCH 4/4] f2fs: remove two steps to flush dirty data pages

2016-05-26 Thread Jaegeuk Kim
If there is no cold page, we don't need to do a loop to flush dirty
data pages.

On /dev/pmem0,

1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync
 Before : 1.1 GB/s
 After  : 1.2 GB/s

2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048
 Before : 2.2 GB/s
 After  : 2.3 GB/s

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 85ceb2b..5dcd8db 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1326,10 +1326,9 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
int cycled;
int range_whole = 0;
int tag;
-   int step = 0;
 
pagevec_init(, 0);
-next:
+
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -1384,9 +1383,6 @@ continue_unlock:
goto continue_unlock;
}
 
-   if (step == is_cold_data(page))
-   goto continue_unlock;
-
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
@@ -1421,11 +1417,6 @@ continue_unlock:
cond_resched();
}
 
-   if (step < 1) {
-   step++;
-   goto next;
-   }
-
if (!cycled && !done) {
cycled = 1;
index = 0;
-- 
2.6.3



[PATCH 4/4] f2fs: remove two steps to flush dirty data pages

2016-05-26 Thread Jaegeuk Kim
If there is no cold page, we don't need to do a loop to flush dirty
data pages.

On /dev/pmem0,

1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync
 Before : 1.1 GB/s
 After  : 1.2 GB/s

2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048
 Before : 2.2 GB/s
 After  : 2.3 GB/s

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 85ceb2b..5dcd8db 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1326,10 +1326,9 @@ static int f2fs_write_cache_pages(struct address_space 
*mapping,
int cycled;
int range_whole = 0;
int tag;
-   int step = 0;
 
pagevec_init(, 0);
-next:
+
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
@@ -1384,9 +1383,6 @@ continue_unlock:
goto continue_unlock;
}
 
-   if (step == is_cold_data(page))
-   goto continue_unlock;
-
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
@@ -1421,11 +1417,6 @@ continue_unlock:
cond_resched();
}
 
-   if (step < 1) {
-   step++;
-   goto next;
-   }
-
if (!cycled && !done) {
cycled = 1;
index = 0;
-- 
2.6.3



[PATCH 3/4] f2fs: do not skip writing data pages

2016-05-26 Thread Jaegeuk Kim
For data pages, let's try to flush as much as possible in background.

On /dev/pmem0,

1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync
 Before : 800 MB/s
 After  : 1.1 GB/s

2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048
 Before : 1.3 GB/s
 After  : 2.2 GB/s

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c| 11 +--
 fs/f2fs/segment.h |  4 +---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7132b02..85ceb2b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1444,7 +1444,6 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
-   long diff;
 
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -1469,14 +1468,14 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
 
trace_f2fs_writepages(mapping->host, wbc, DATA);
 
-   diff = nr_pages_to_write(sbi, DATA, wbc);
-
ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
-   f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+   /*
+* if some pages were truncated, we cannot guarantee its mapping->host
+* to detect pending bios.
+*/
+   f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
remove_dirty_inode(inode);
-
-   wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
 
 skip_write:
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5d016a1..890bb28d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -728,9 +728,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info 
*sbi, int type,
 
nr_to_write = wbc->nr_to_write;
 
-   if (type == DATA)
-   desired = 4096;
-   else if (type == NODE)
+   if (type == NODE)
desired = 3 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(sbi);
-- 
2.6.3



[PATCH 1/4] f2fs: propagate error given by f2fs_find_entry

2016-05-26 Thread Jaegeuk Kim
If we get ENOMEM or EIO in f2fs_find_entry, we should stop right away.
Otherwise, for example, we can get duplicate directory entry by ->chash and
->clevel.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/dir.c| 23 ---
 fs/f2fs/inline.c |  4 +++-
 fs/f2fs/namei.c  |  5 +
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 24d1308..ae37543 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode 
*dir,
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
-   room = true;
-   continue;
+   if (PTR_ERR(dentry_page) == -ENOENT) {
+   room = true;
+   continue;
+   } else {
+   *res_page = dentry_page;
+   break;
+   }
}
 
de = find_in_block(dentry_page, fname, namehash, _slots,
@@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct fscrypt_name fname;
int err;
 
-   *res_page = NULL;
-
err = fscrypt_setup_filename(dir, child, 1, );
-   if (err)
+   if (err) {
+   *res_page = ERR_PTR(-ENOMEM);
return NULL;
+   }
 
if (f2fs_has_inline_dentry(dir)) {
+   *res_page = NULL;
de = find_in_inline_dir(dir, , res_page);
goto out;
}
 
-   if (npages == 0)
+   if (npages == 0) {
+   *res_page = NULL;
goto out;
+   }
 
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -247,8 +255,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
}
 
for (level = 0; level < max_depth; level++) {
+   *res_page = NULL;
de = find_in_level(dir, level, , res_page);
-   if (de)
+   if (de || IS_ERR(*res_page))
break;
}
 out:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 77c9c24..1eb3043 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -286,8 +286,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode 
*dir,
f2fs_hash_t namehash;
 
ipage = get_node_page(sbi, dir->i_ino);
-   if (IS_ERR(ipage))
+   if (IS_ERR(ipage)) {
+   *res_page = ipage;
return NULL;
+   }
 
namehash = f2fs_dentry_hash();
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 496f4e3..3f6119e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -232,6 +232,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
+   goto out;
} else {
err = __f2fs_add_link(dir, , NULL, dir->i_ino, S_IFDIR);
if (err)
@@ -242,6 +245,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
} else {
err = __f2fs_add_link(dir, , NULL, pino, S_IFDIR);
}
-- 
2.6.3



[PATCH 3/4] f2fs: do not skip writing data pages

2016-05-26 Thread Jaegeuk Kim
For data pages, let's try to flush as much as possible in background.

On /dev/pmem0,

1. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048 conv=fsync
 Before : 800 MB/s
 After  : 1.1 GB/s

2. dd if=/dev/zero of=/mnt/test/testfile bs=1M count=2048
 Before : 1.3 GB/s
 After  : 2.2 GB/s

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c| 11 +--
 fs/f2fs/segment.h |  4 +---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7132b02..85ceb2b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1444,7 +1444,6 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret;
-   long diff;
 
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -1469,14 +1468,14 @@ static int f2fs_write_data_pages(struct address_space 
*mapping,
 
trace_f2fs_writepages(mapping->host, wbc, DATA);
 
-   diff = nr_pages_to_write(sbi, DATA, wbc);
-
ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
-   f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+   /*
+* if some pages were truncated, we cannot guarantee its mapping->host
+* to detect pending bios.
+*/
+   f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
remove_dirty_inode(inode);
-
-   wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
 
 skip_write:
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5d016a1..890bb28d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -728,9 +728,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info 
*sbi, int type,
 
nr_to_write = wbc->nr_to_write;
 
-   if (type == DATA)
-   desired = 4096;
-   else if (type == NODE)
+   if (type == NODE)
desired = 3 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(sbi);
-- 
2.6.3



[PATCH 1/4] f2fs: propagate error given by f2fs_find_entry

2016-05-26 Thread Jaegeuk Kim
If we get ENOMEM or EIO in f2fs_find_entry, we should stop right away.
Otherwise, for example, we can get duplicate directory entry by ->chash and
->clevel.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/dir.c| 23 ---
 fs/f2fs/inline.c |  4 +++-
 fs/f2fs/namei.c  |  5 +
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 24d1308..ae37543 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode 
*dir,
/* no need to allocate new dentry pages to all the indices */
dentry_page = find_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
-   room = true;
-   continue;
+   if (PTR_ERR(dentry_page) == -ENOENT) {
+   room = true;
+   continue;
+   } else {
+   *res_page = dentry_page;
+   break;
+   }
}
 
de = find_in_block(dentry_page, fname, namehash, _slots,
@@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct fscrypt_name fname;
int err;
 
-   *res_page = NULL;
-
err = fscrypt_setup_filename(dir, child, 1, );
-   if (err)
+   if (err) {
+   *res_page = ERR_PTR(-ENOMEM);
return NULL;
+   }
 
if (f2fs_has_inline_dentry(dir)) {
+   *res_page = NULL;
de = find_in_inline_dir(dir, , res_page);
goto out;
}
 
-   if (npages == 0)
+   if (npages == 0) {
+   *res_page = NULL;
goto out;
+   }
 
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -247,8 +255,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
}
 
for (level = 0; level < max_depth; level++) {
+   *res_page = NULL;
de = find_in_level(dir, level, , res_page);
-   if (de)
+   if (de || IS_ERR(*res_page))
break;
}
 out:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 77c9c24..1eb3043 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -286,8 +286,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode 
*dir,
f2fs_hash_t namehash;
 
ipage = get_node_page(sbi, dir->i_ino);
-   if (IS_ERR(ipage))
+   if (IS_ERR(ipage)) {
+   *res_page = ipage;
return NULL;
+   }
 
namehash = f2fs_dentry_hash();
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 496f4e3..3f6119e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -232,6 +232,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
+   goto out;
} else {
err = __f2fs_add_link(dir, , NULL, dir->i_ino, S_IFDIR);
if (err)
@@ -242,6 +245,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t 
pino)
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
+   } else if (IS_ERR(page)) {
+   err = PTR_ERR(page);
} else {
err = __f2fs_add_link(dir, , NULL, pino, S_IFDIR);
}
-- 
2.6.3



Re: [GIT PULL] xfs: updates for 4.7-rc1

2016-05-26 Thread Dave Chinner
On Thu, May 26, 2016 at 10:19:13AM -0700, Linus Torvalds wrote:
> On Wed, May 25, 2016 at 11:13 PM, Dave Chinner  wrote:
> >
> > Just yell if this is not OK and I'll drop those branches for this
> > merge and resend the pull request
> 
> i'm ok with the late branches, it's not like xfs has been a problem spot.

Still, I'll try to avoid them because it reduces testing time.

> However:
> 
> > are available in the git repository at:
> >
> >   git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git for-next
> 
> Your pull request mentions the 'for-next' branch, but I think you
> *meant* to send me the "xfs-for-linus-4.7-rc1" tag which points to the
> same commit and has your summary in it.



Mea culpa. I ran this command after creating the tag:

$ git request-pull v4.6 
git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git 
tags/xfs-for-linus-4.7-rc1 > ~/tmp/t.txt

And I didn't check the output closely enough. I forgot to push the
tag to the upstream repo before running request-pull.

Git often makes it very easy to make mistakes whilst simultaneously
making it hard to notice you've made a mistake. :(

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


Re: [GIT PULL] xfs: updates for 4.7-rc1

2016-05-26 Thread Dave Chinner
On Thu, May 26, 2016 at 10:19:13AM -0700, Linus Torvalds wrote:
> On Wed, May 25, 2016 at 11:13 PM, Dave Chinner  wrote:
> >
> > Just yell if this is not OK and I'll drop those branches for this
> > merge and resend the pull request
> 
> i'm ok with the late branches, it's not like xfs has been a problem spot.

Still, I'll try to avoid them because it reduces testing time.

> However:
> 
> > are available in the git repository at:
> >
> >   git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git for-next
> 
> Your pull request mentions the 'for-next' branch, but I think you
> *meant* to send me the "xfs-for-linus-4.7-rc1" tag which points to the
> same commit and has your summary in it.



Mea culpa. I ran this command after creating the tag:

$ git request-pull v4.6 
git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git 
tags/xfs-for-linus-4.7-rc1 > ~/tmp/t.txt

And I didn't check the output closely enough. I forgot to push the
tag to the upstream repo before running request-pull.

Git often makes it very easy to make mistakes whilst simultaneously
making it hard to notice you've made a mistake. :(

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com


[PATCH 1/2] timer: Export destroy_hrtimer_on_stack()

2016-05-26 Thread Guenter Roeck
hrtimer_init_on_stack() needs a matching call to
destroy_hrtimer_on_stack(), so both need to be exported.

Signed-off-by: Guenter Roeck 
---
 kernel/time/hrtimer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8c7392c4fdbd..e99df0ff1d42 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -425,6 +425,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
debug_object_free(timer, _debug_descr);
 }
+EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
 
 #else
 static inline void debug_hrtimer_init(struct hrtimer *timer) { }
-- 
2.5.0



[PATCH 2/2] net: pktgen: Call destroy_hrtimer_on_stack()

2016-05-26 Thread Guenter Roeck
If CONFIG_DEBUG_OBJECTS_TIMERS=y, hrtimer_init_on_stack() requires
a matching call to destroy_hrtimer_on_stack() to clean up timer
debug objects.

Signed-off-by: Guenter Roeck 
---
 net/core/pktgen.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8604ae245960..8b02df0d354d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2245,10 +2245,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t 
spin_until)
hrtimer_set_expires(, spin_until);
 
remaining = ktime_to_ns(hrtimer_expires_remaining());
-   if (remaining <= 0) {
-   pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
-   return;
-   }
+   if (remaining <= 0)
+   goto out;
 
start_time = ktime_get();
if (remaining < 10) {
@@ -2273,7 +2271,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t 
spin_until)
}
 
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+out:
pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+   destroy_hrtimer_on_stack();
 }
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
-- 
2.5.0



[PATCH 1/2] timer: Export destroy_hrtimer_on_stack()

2016-05-26 Thread Guenter Roeck
hrtimer_init_on_stack() needs a matching call to
destroy_hrtimer_on_stack(), so both need to be exported.

Signed-off-by: Guenter Roeck 
---
 kernel/time/hrtimer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8c7392c4fdbd..e99df0ff1d42 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -425,6 +425,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
debug_object_free(timer, _debug_descr);
 }
+EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
 
 #else
 static inline void debug_hrtimer_init(struct hrtimer *timer) { }
-- 
2.5.0



[PATCH 2/2] net: pktgen: Call destroy_hrtimer_on_stack()

2016-05-26 Thread Guenter Roeck
If CONFIG_DEBUG_OBJECTS_TIMERS=y, hrtimer_init_on_stack() requires
a matching call to destroy_hrtimer_on_stack() to clean up timer
debug objects.

Signed-off-by: Guenter Roeck 
---
 net/core/pktgen.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8604ae245960..8b02df0d354d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2245,10 +2245,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t 
spin_until)
hrtimer_set_expires(, spin_until);
 
remaining = ktime_to_ns(hrtimer_expires_remaining());
-   if (remaining <= 0) {
-   pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
-   return;
-   }
+   if (remaining <= 0)
+   goto out;
 
start_time = ktime_get();
if (remaining < 10) {
@@ -2273,7 +2271,9 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t 
spin_until)
}
 
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
+out:
pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
+   destroy_hrtimer_on_stack();
 }
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
-- 
2.5.0



Re: How should I use kernel-defined i2c structs in this driver

2016-05-26 Thread Antti Palosaari

On 05/26/2016 04:59 PM, Andrey Utkin wrote:

Could anybody please give a hint - which kernel-defined i2c objects, and how
many of them, I need to define and use to substitute these driver-defined
functions i2c_read(), i2c_write() ?
https://github.com/bluecherrydvr/linux/blob/release/tw5864/1.16/drivers/media/pci/tw5864/tw5864-config.c
In a word, there's 4 chips with different addresses, to which this code
communicates via main chip's dedicated registers.
Do i need a single i2c_adapter or several?
Do i need i2c_client entities?
where should I put what is named "devid" here?

Thanks in advance.


It depends how those are connected at hardware level. Quickly looking I 
think "devid" is here to select proper I2C adapter. So I think there is 
4 I2C adapters and each of those adapter has 1 slave device. Is that 
correct? If yes, then register 4 I2C adapters and register single client 
for each of those adapters.


regards
Antti



--
http://palosaari.fi/


Re: How should I use kernel-defined i2c structs in this driver

2016-05-26 Thread Antti Palosaari

On 05/26/2016 04:59 PM, Andrey Utkin wrote:

Could anybody please give a hint - which kernel-defined i2c objects, and how
many of them, I need to define and use to substitute these driver-defined
functions i2c_read(), i2c_write() ?
https://github.com/bluecherrydvr/linux/blob/release/tw5864/1.16/drivers/media/pci/tw5864/tw5864-config.c
In a word, there's 4 chips with different addresses, to which this code
communicates via main chip's dedicated registers.
Do i need a single i2c_adapter or several?
Do i need i2c_client entities?
where should I put what is named "devid" here?

Thanks in advance.


It depends how those are connected at hardware level. Quickly looking I 
think "devid" is here to select proper I2C adapter. So I think there is 
4 I2C adapters and each of those adapter has 1 slave device. Is that 
correct? If yes, then register 4 I2C adapters and register single client 
for each of those adapters.


regards
Antti



--
http://palosaari.fi/


Re: [RFT v2] iommu/amd: use subsys_initcall() on amdv2 iommu

2016-05-26 Thread Luis R. Rodriguez
On Tue, Apr 19, 2016 at 10:02:52AM +0800, Wan Zongshun wrote:
> 
> You have to take carefully to arrange the calling sequence for
> iommuv1, iommuv2, kfd module, and drm like the following sequence :
> v1 ->v2->kfd, drm.
> 
> iommuv1 -- rootfs_initcall(fn)
> IOMMUV2 -- device_initcall(fn)
> kfd module -- late_initcall(fn)
> drm -- late_initcall(fn)

Thanks, it turns out this is not exactly enough, given as 
Joerg notes:

--
AMD-KFD on the other hand needs to be loaded before the 
 
radeon driver (but this it not enforced by symbols), because otherwise  
  
the radeon driver will not initialize the AMD-KFD driver. 
---

We have a theoretical race still possible between the kfd module
and the drm driver. I'll reply to Joerg's e-mail with more feedback.

  Luis


Re: [RFT v2] iommu/amd: use subsys_initcall() on amdv2 iommu

2016-05-26 Thread Luis R. Rodriguez
On Tue, Apr 19, 2016 at 10:02:52AM +0800, Wan Zongshun wrote:
> 
> You have to take carefully to arrange the calling sequence for
> iommuv1, iommuv2, kfd module, and drm like the following sequence :
> v1 ->v2->kfd, drm.
> 
> iommuv1 -- rootfs_initcall(fn)
> IOMMUV2 -- device_initcall(fn)
> kfd module -- late_initcall(fn)
> drm -- late_initcall(fn)

Thanks, it turns out this is not exactly enough, given as 
Joerg notes:

--
AMD-KFD on the other hand needs to be loaded before the 
 
radeon driver (but this it not enforced by symbols), because otherwise  
  
the radeon driver will not initialize the AMD-KFD driver. 
---

We have a theoretical race still possible between the kfd module
and the drm driver. I'll reply to Joerg's e-mail with more feedback.

  Luis


[PATCH][RT] netpoll: Always take poll_lock when doing polling

2016-05-26 Thread Steven Rostedt
[ Alison, can you try this patch ]

This uses netpoll_poll_lock()/unlock() to synchronize netpoll and napi
poll operations. Without this method, the synchronization is done by
looping on NAPI_STATE_SCHED 'bitset'. This method works fine on a non-rt
kernel because a softirq can not be preempted, and the thread poll is
called with local_bh_disable() which prevents softirqs from running and
preempting it. But on rt, this code can be preempted.  Thus, the code may
be preempted out while holding the NAPI_STATE_SCHED 'bitset', opening a
window for a livelock.

For example:

   

   napi_schedule_prep()
test_and_set_bit(NAPI_STATE_SCHED, >state)

   

   sk_busy_loop()

  do {
   rc = busy_poll()
   ret = napi_schedule_prep()
return !test_and_set_bit(NAPI_STATE_SCHED, >state)
 
   if (!ret) return 0
   
  } while (...) /* for ever */

This isn't a problem in non PREEMPT_RT because the napi_schedule_prep()
can not be preempted. But because it can in PREEMPT_RT, we need to add
some extra locking. The netpoll_poll_lock() works well here, but they need
to be added around any call to busy_poll().

Using IS_ENABLED(CONFIG_PREEMPT_RT_FULL) will allow gcc to optimize out
the extra calls to poll_lock.

Tested-by: "Luis Claudio R. Goncalves" 
Reviewed-by: Daniel Bristot de Oliveira 
Signed-off-by: Steven Rostedt 
---
 include/linux/netpoll.h |2 +-
 include/net/busy_poll.h |   14 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

Index: linux-rt.git/include/linux/netpoll.h
===
--- linux-rt.git.orig/include/linux/netpoll.h   2016-05-26 18:31:09.183150389 
-0400
+++ linux-rt.git/include/linux/netpoll.h2016-05-26 18:52:02.657014280 
-0400
@@ -77,7 +77,7 @@ static inline void *netpoll_poll_lock(st
 {
struct net_device *dev = napi->dev;
 
-   if (dev && dev->npinfo) {
+   if (dev && (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) || dev->npinfo)) {
spin_lock(>poll_lock);
napi->poll_owner = smp_processor_id();
return napi;
Index: linux-rt.git/include/net/busy_poll.h
===
--- linux-rt.git.orig/include/net/busy_poll.h   2016-05-26 18:31:09.183150389 
-0400
+++ linux-rt.git/include/net/busy_poll.h2016-05-26 19:10:12.134266713 
-0400
@@ -25,6 +25,7 @@
 #define _LINUX_NET_BUSY_POLL_H
 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -97,7 +98,18 @@ static inline bool sk_busy_loop(struct s
goto out;
 
do {
-   rc = ops->ndo_busy_poll(napi);
+   /* When RT is enabled, napi_schedule_prep() can be preempted
+* with NAPI_STATE_SCHED set, causing the busy_poll() function
+* to always return zero, and this loop may never exit.
+* In that case, we must always take the netpoll_poll_lock.
+*/
+   if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
+   void *have = netpoll_poll_lock(napi);
+   rc = ops->ndo_busy_poll(napi);
+   netpoll_poll_unlock(have);
+   } else {
+   rc = ops->ndo_busy_poll(napi);
+   }
 
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */


[PATCH][RT] netpoll: Always take poll_lock when doing polling

2016-05-26 Thread Steven Rostedt
[ Alison, can you try this patch ]

This uses netpoll_poll_lock()/unlock() to synchronize netpoll and napi
poll operations. Without this method, the synchronization is done by
looping on NAPI_STATE_SCHED 'bitset'. This method works fine on a non-rt
kernel because a softirq can not be preempted, and the thread poll is
called with local_bh_disable() which prevents softirqs from running and
preempting it. But on rt, this code can be preempted.  Thus, the code may
be preempted out while holding the NAPI_STATE_SCHED 'bitset', opening a
window for a livelock.

For example:

   

   napi_schedule_prep()
test_and_set_bit(NAPI_STATE_SCHED, >state)

   

   sk_busy_loop()

  do {
   rc = busy_poll()
   ret = napi_schedule_prep()
return !test_and_set_bit(NAPI_STATE_SCHED, >state)
 
   if (!ret) return 0
   
  } while (...) /* for ever */

This isn't a problem in non PREEMPT_RT because the napi_schedule_prep()
can not be preempted. But because it can in PREEMPT_RT, we need to add
some extra locking. The netpoll_poll_lock() works well here, but they need
to be added around any call to busy_poll().

Using IS_ENABLED(CONFIG_PREEMPT_RT_FULL) will allow gcc to optimize out
the extra calls to poll_lock.

Tested-by: "Luis Claudio R. Goncalves" 
Reviewed-by: Daniel Bristot de Oliveira 
Signed-off-by: Steven Rostedt 
---
 include/linux/netpoll.h |2 +-
 include/net/busy_poll.h |   14 +-
 2 files changed, 14 insertions(+), 2 deletions(-)

Index: linux-rt.git/include/linux/netpoll.h
===
--- linux-rt.git.orig/include/linux/netpoll.h   2016-05-26 18:31:09.183150389 
-0400
+++ linux-rt.git/include/linux/netpoll.h2016-05-26 18:52:02.657014280 
-0400
@@ -77,7 +77,7 @@ static inline void *netpoll_poll_lock(st
 {
struct net_device *dev = napi->dev;
 
-   if (dev && dev->npinfo) {
+   if (dev && (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) || dev->npinfo)) {
spin_lock(>poll_lock);
napi->poll_owner = smp_processor_id();
return napi;
Index: linux-rt.git/include/net/busy_poll.h
===
--- linux-rt.git.orig/include/net/busy_poll.h   2016-05-26 18:31:09.183150389 
-0400
+++ linux-rt.git/include/net/busy_poll.h2016-05-26 19:10:12.134266713 
-0400
@@ -25,6 +25,7 @@
 #define _LINUX_NET_BUSY_POLL_H
 
 #include 
+#include 
 #include 
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -97,7 +98,18 @@ static inline bool sk_busy_loop(struct s
goto out;
 
do {
-   rc = ops->ndo_busy_poll(napi);
+   /* When RT is enabled, napi_schedule_prep() can be preempted
+* with NAPI_STATE_SCHED set, causing the busy_poll() function
+* to always return zero, and this loop may never exit.
+* In that case, we must always take the netpoll_poll_lock.
+*/
+   if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) {
+   void *have = netpoll_poll_lock(napi);
+   rc = ops->ndo_busy_poll(napi);
+   netpoll_poll_unlock(have);
+   } else {
+   rc = ops->ndo_busy_poll(napi);
+   }
 
if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */


[PATCH] Staging: comedi: ni_atmio: fixed spacing and comment style issues

2016-05-26 Thread Steven Laabs
Fixed coding style issues.

Signed-off-by: Steven Laabs 
---
 drivers/staging/comedi/drivers/ni_atmio.c | 180 +++---
 1 file changed, 91 insertions(+), 89 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_atmio.c 
b/drivers/staging/comedi/drivers/ni_atmio.c
index 95435b8..f957790 100644
--- a/drivers/staging/comedi/drivers/ni_atmio.c
+++ b/drivers/staging/comedi/drivers/ni_atmio.c
@@ -1,93 +1,95 @@
 /*
-comedi/drivers/ni_atmio.c
-Hardware driver for NI AT-MIO E series cards
-
-COMEDI - Linux Control and Measurement Device Interface
-Copyright (C) 1997-2001 David A. Schleef 
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-*/
-/*
-Driver: ni_atmio
-Description: National Instruments AT-MIO-E series
-Author: ds
-Devices: [National Instruments] AT-MIO-16E-1 (ni_atmio),
-  AT-MIO-16E-2, AT-MIO-16E-10, AT-MIO-16DE-10, AT-MIO-64E-3,
-  AT-MIO-16XE-50, AT-MIO-16XE-10, AT-AI-16XE-10
-Status: works
-Updated: Thu May  1 20:03:02 CDT 2003
-
-The driver has 2.6 kernel isapnp support, and
-will automatically probe for a supported board if the
-I/O base is left unspecified with comedi_config.
-However, many of
-the isapnp id numbers are unknown.  If your board is not
-recognized, please send the output of 'cat /proc/isapnp'
-(you may need to modprobe the isa-pnp module for
-/proc/isapnp to exist) so the
-id numbers for your board can be added to the driver.
-
-Otherwise, you can use the isapnptools package to configure
-your board.  Use isapnp to
-configure the I/O base and IRQ for the board, and then pass
-the same values as
-parameters in comedi_config.  A sample isapnp.conf file is included
-in the etc/ directory of Comedilib.
-
-Comedilib includes a utility to autocalibrate these boards.  The
-boards seem to boot into a state where the all calibration DACs
-are at one extreme of their range, thus the default calibration
-is terrible.  Calibration at boot is strongly encouraged.
-
-To use the extended digital I/O on some of the boards, enable the
-8255 driver when configuring the Comedi source tree.
-
-External triggering is supported for some events.  The channel index
-(scan_begin_arg, etc.) maps to PFI0 - PFI9.
-
-Some of the more esoteric triggering possibilities of these boards
-are not supported.
-*/
-/*
-   The real guts of the driver is in ni_mio_common.c, which is included
-   both here and in ni_pcimio.c
-
-   Interrupt support added by Truxton Fulton 
-
-   References for specifications:
-
-  340747b.pdf  Register Level Programmer Manual (obsolete)
-  340747c.pdf  Register Level Programmer Manual (new)
-  DAQ-STC reference manual
-
-   Other possibly relevant info:
-
-  320517c.pdf  User manual (obsolete)
-  320517f.pdf  User manual (new)
-  320889a.pdf  delete
-  320906c.pdf  maximum signal ratings
-  321066a.pdf  about 16x
-  321791a.pdf  discontinuation of at-mio-16e-10 rev. c
-  321808a.pdf  about at-mio-16e-10 rev P
-  321837a.pdf  discontinuation of at-mio-16de-10 rev d
-  321838a.pdf  about at-mio-16de-10 rev N
-
-   ISSUES:
-
-   need to deal with external reference for DAC, and other DAC
-   properties in board properties
+ *comedi/drivers/ni_atmio.c
+ *Hardware driver for NI AT-MIO E series cards
+ *
+ *COMEDI - Linux Control and Measurement Device Interface
+ *Copyright (C) 1997-2001 David A. Schleef 
+ *
+ *This program is free software; you can redistribute it and/or modify
+ *it under the terms of the GNU General Public License as published by
+ *the Free Software Foundation; either version 2 of the License, or
+ *(at your option) any later version.
+ *
+ *This program is distributed in the hope that it will be useful,
+ *but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *GNU General Public License for more details.
+ */
 
-   deal with at-mio-16de-10 revision D to N changes, etc.
+/*
+ * Driver: ni_atmio
+ * Description: National Instruments AT-MIO-E series
+ * Author: ds
+ * Devices: [National Instruments] AT-MIO-16E-1 (ni_atmio),
+ *   AT-MIO-16E-2, AT-MIO-16E-10, AT-MIO-16DE-10, AT-MIO-64E-3,
+ *   AT-MIO-16XE-50, AT-MIO-16XE-10, AT-AI-16XE-10
+ * Status: works
+ * Updated: Thu May  1 20:03:02 CDT 2003
+ *
+ * The driver has 2.6 kernel isapnp support, 

[PATCH] Staging: comedi: ni_atmio: fixed spacing and comment style issues

2016-05-26 Thread Steven Laabs
Fixed coding style issues.

Signed-off-by: Steven Laabs 
---
 drivers/staging/comedi/drivers/ni_atmio.c | 180 +++---
 1 file changed, 91 insertions(+), 89 deletions(-)

diff --git a/drivers/staging/comedi/drivers/ni_atmio.c 
b/drivers/staging/comedi/drivers/ni_atmio.c
index 95435b8..f957790 100644
--- a/drivers/staging/comedi/drivers/ni_atmio.c
+++ b/drivers/staging/comedi/drivers/ni_atmio.c
@@ -1,93 +1,95 @@
 /*
-comedi/drivers/ni_atmio.c
-Hardware driver for NI AT-MIO E series cards
-
-COMEDI - Linux Control and Measurement Device Interface
-Copyright (C) 1997-2001 David A. Schleef 
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-*/
-/*
-Driver: ni_atmio
-Description: National Instruments AT-MIO-E series
-Author: ds
-Devices: [National Instruments] AT-MIO-16E-1 (ni_atmio),
-  AT-MIO-16E-2, AT-MIO-16E-10, AT-MIO-16DE-10, AT-MIO-64E-3,
-  AT-MIO-16XE-50, AT-MIO-16XE-10, AT-AI-16XE-10
-Status: works
-Updated: Thu May  1 20:03:02 CDT 2003
-
-The driver has 2.6 kernel isapnp support, and
-will automatically probe for a supported board if the
-I/O base is left unspecified with comedi_config.
-However, many of
-the isapnp id numbers are unknown.  If your board is not
-recognized, please send the output of 'cat /proc/isapnp'
-(you may need to modprobe the isa-pnp module for
-/proc/isapnp to exist) so the
-id numbers for your board can be added to the driver.
-
-Otherwise, you can use the isapnptools package to configure
-your board.  Use isapnp to
-configure the I/O base and IRQ for the board, and then pass
-the same values as
-parameters in comedi_config.  A sample isapnp.conf file is included
-in the etc/ directory of Comedilib.
-
-Comedilib includes a utility to autocalibrate these boards.  The
-boards seem to boot into a state where the all calibration DACs
-are at one extreme of their range, thus the default calibration
-is terrible.  Calibration at boot is strongly encouraged.
-
-To use the extended digital I/O on some of the boards, enable the
-8255 driver when configuring the Comedi source tree.
-
-External triggering is supported for some events.  The channel index
-(scan_begin_arg, etc.) maps to PFI0 - PFI9.
-
-Some of the more esoteric triggering possibilities of these boards
-are not supported.
-*/
-/*
-   The real guts of the driver is in ni_mio_common.c, which is included
-   both here and in ni_pcimio.c
-
-   Interrupt support added by Truxton Fulton 
-
-   References for specifications:
-
-  340747b.pdf  Register Level Programmer Manual (obsolete)
-  340747c.pdf  Register Level Programmer Manual (new)
-  DAQ-STC reference manual
-
-   Other possibly relevant info:
-
-  320517c.pdf  User manual (obsolete)
-  320517f.pdf  User manual (new)
-  320889a.pdf  delete
-  320906c.pdf  maximum signal ratings
-  321066a.pdf  about 16x
-  321791a.pdf  discontinuation of at-mio-16e-10 rev. c
-  321808a.pdf  about at-mio-16e-10 rev P
-  321837a.pdf  discontinuation of at-mio-16de-10 rev d
-  321838a.pdf  about at-mio-16de-10 rev N
-
-   ISSUES:
-
-   need to deal with external reference for DAC, and other DAC
-   properties in board properties
+ *comedi/drivers/ni_atmio.c
+ *Hardware driver for NI AT-MIO E series cards
+ *
+ *COMEDI - Linux Control and Measurement Device Interface
+ *Copyright (C) 1997-2001 David A. Schleef 
+ *
+ *This program is free software; you can redistribute it and/or modify
+ *it under the terms of the GNU General Public License as published by
+ *the Free Software Foundation; either version 2 of the License, or
+ *(at your option) any later version.
+ *
+ *This program is distributed in the hope that it will be useful,
+ *but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *GNU General Public License for more details.
+ */
 
-   deal with at-mio-16de-10 revision D to N changes, etc.
+/*
+ * Driver: ni_atmio
+ * Description: National Instruments AT-MIO-E series
+ * Author: ds
+ * Devices: [National Instruments] AT-MIO-16E-1 (ni_atmio),
+ *   AT-MIO-16E-2, AT-MIO-16E-10, AT-MIO-16DE-10, AT-MIO-64E-3,
+ *   AT-MIO-16XE-50, AT-MIO-16XE-10, AT-AI-16XE-10
+ * Status: works
+ * Updated: Thu May  1 20:03:02 CDT 2003
+ *
+ * The driver has 2.6 kernel isapnp support, and
+ * will automatically probe for a supported board if the
+ * I/O 

Re: [PATCH] clk: rockchip: add a dummy clock for the watchdog pclk on rk3399

2016-05-26 Thread Heiko Stuebner
Am Mittwoch, 25. Mai 2016, 16:51:56 schrieb Xing Zheng:
> Like rk3288, the pclk supplying the watchdog is controlled via the
> SGRF register area. Additionally the SGRF isn't even writable in
> every boot mode.
> 
> But still the clock control is available and in the future someone
> might want to use it. Therefore define a simple clock for the time
> being so that the watchdog driver can read its rate.
> 
> Signed-off-by: Xing Zheng 

applied for 4.8

Thanks
Heiko


Re: [PATCH] clk: rockchip: add a dummy clock for the watchdog pclk on rk3399

2016-05-26 Thread Heiko Stuebner
Am Mittwoch, 25. Mai 2016, 16:51:56 schrieb Xing Zheng:
> Like rk3288, the pclk supplying the watchdog is controlled via the
> SGRF register area. Additionally the SGRF isn't even writable in
> every boot mode.
> 
> But still the clock control is available and in the future someone
> might want to use it. Therefore define a simple clock for the time
> being so that the watchdog driver can read its rate.
> 
> Signed-off-by: Xing Zheng 

applied for 4.8

Thanks
Heiko


Re: [PATCH] mm: check the return value of lookup_page_ext for all call sites

2016-05-26 Thread Shi, Yang

On 5/25/2016 5:37 PM, Minchan Kim wrote:

On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:

On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:

Per the discussion with Joonsoo Kim [1], we need check the return value of
lookup_page_ext() for all call sites since it might return NULL in some cases,
although it is unlikely, i.e. memory hotplug.

Tested with ltp with "page_owner=0".

[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE

Signed-off-by: Yang Shi 


I didn't read code code in detail to see how page_ext memory space
allocated in boot code and memory hotplug but to me, it's not good
to check NULL whenever we calls lookup_page_ext.

More dangerous thing is now page_ext is used by optionable feature(ie, not
critical for system stability) but if we want to use page_ext as
another important tool for the system in future,
it could be a serious problem.

Can we put some hooks of page_ext into memory-hotplug so guarantee
that page_ext memory space is allocated with memmap space at the
same time? IOW, once every PFN wakers find a page is valid, page_ext
is valid, too so lookup_page_ext never returns NULL on valid page
by design.

I hope we consider this direction, too.


Yang, Could you think about this?


Thanks a lot for the suggestion. Sorry for the late reply, I was busy on 
preparing patches. I do agree this is a direction we should look into, 
but I haven't got time to think about it deeper. I hope Joonsoo could 
chime in too since he is the original author for page extension.




Even, your patch was broken, I think.
It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because
lookup_page_ext doesn't return NULL in that case.


Actually, I think the #ifdef should be removed if lookup_page_ext() is 
possible to return NULL. It sounds not make sense returning NULL only 
when DEBUG_VM is enabled. It should return NULL no matter what debug 
config is selected. If Joonsoo agrees with me I'm going to come up with 
a patch to fix it.


Regards,
Yang





Thanks.


---
 include/linux/page_idle.h | 43 ---
 mm/page_alloc.c   |  6 ++
 mm/page_owner.c   | 27 +++
 mm/page_poison.c  |  8 +++-
 mm/vmstat.c   |  2 ++
 5 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index bf268fa..8f5d4ad 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops;

 static inline bool page_is_young(struct page *page)
 {
-   return test_bit(PAGE_EXT_YOUNG, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline void set_page_young(struct page *page)
 {
-   set_bit(PAGE_EXT_YOUNG, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   set_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline bool test_and_clear_page_young(struct page *page)
 {
-   return test_and_clear_bit(PAGE_EXT_YOUNG,
- _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_and_clear_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline bool page_is_idle(struct page *page)
 {
-   return test_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_bit(PAGE_EXT_IDLE, _ext->flags);
 }

 static inline void set_page_idle(struct page *page)
 {
-   set_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   set_bit(PAGE_EXT_IDLE, _ext->flags);
 }

 static inline void clear_page_idle(struct page *page)
 {
-   clear_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   clear_bit(PAGE_EXT_IDLE, _ext->flags);
 }
 #endif /* CONFIG_64BIT */

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f8f3bfc..d27e8b9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct 
page *page,
return;

page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext))
+   return;
+
__set_bit(PAGE_EXT_DEBUG_GUARD, _ext->flags);

INIT_LIST_HEAD(>lru);
@@ -673,6 +676,9 @@ 

Re: [PATCH] mm: check the return value of lookup_page_ext for all call sites

2016-05-26 Thread Shi, Yang

On 5/25/2016 5:37 PM, Minchan Kim wrote:

On Tue, May 24, 2016 at 11:58:11AM +0900, Minchan Kim wrote:

On Mon, May 23, 2016 at 10:16:08AM -0700, Yang Shi wrote:

Per the discussion with Joonsoo Kim [1], we need check the return value of
lookup_page_ext() for all call sites since it might return NULL in some cases,
although it is unlikely, i.e. memory hotplug.

Tested with ltp with "page_owner=0".

[1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE

Signed-off-by: Yang Shi 


I didn't read code code in detail to see how page_ext memory space
allocated in boot code and memory hotplug but to me, it's not good
to check NULL whenever we calls lookup_page_ext.

More dangerous thing is now page_ext is used by optionable feature(ie, not
critical for system stability) but if we want to use page_ext as
another important tool for the system in future,
it could be a serious problem.

Can we put some hooks of page_ext into memory-hotplug so guarantee
that page_ext memory space is allocated with memmap space at the
same time? IOW, once every PFN wakers find a page is valid, page_ext
is valid, too so lookup_page_ext never returns NULL on valid page
by design.

I hope we consider this direction, too.


Yang, Could you think about this?


Thanks a lot for the suggestion. Sorry for the late reply, I was busy on 
preparing patches. I do agree this is a direction we should look into, 
but I haven't got time to think about it deeper. I hope Joonsoo could 
chime in too since he is the original author for page extension.




Even, your patch was broken, I think.
It doesn't work with !CONFIG_DEBUG_VM && !CONFIG_PAGE_POISONING because
lookup_page_ext doesn't return NULL in that case.


Actually, I think the #ifdef should be removed if lookup_page_ext() is 
possible to return NULL. It sounds not make sense returning NULL only 
when DEBUG_VM is enabled. It should return NULL no matter what debug 
config is selected. If Joonsoo agrees with me I'm going to come up with 
a patch to fix it.


Regards,
Yang





Thanks.


---
 include/linux/page_idle.h | 43 ---
 mm/page_alloc.c   |  6 ++
 mm/page_owner.c   | 27 +++
 mm/page_poison.c  |  8 +++-
 mm/vmstat.c   |  2 ++
 5 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index bf268fa..8f5d4ad 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops;

 static inline bool page_is_young(struct page *page)
 {
-   return test_bit(PAGE_EXT_YOUNG, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline void set_page_young(struct page *page)
 {
-   set_bit(PAGE_EXT_YOUNG, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   set_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline bool test_and_clear_page_young(struct page *page)
 {
-   return test_and_clear_bit(PAGE_EXT_YOUNG,
- _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_and_clear_bit(PAGE_EXT_YOUNG, _ext->flags);
 }

 static inline bool page_is_idle(struct page *page)
 {
-   return test_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return false;
+
+   return test_bit(PAGE_EXT_IDLE, _ext->flags);
 }

 static inline void set_page_idle(struct page *page)
 {
-   set_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   set_bit(PAGE_EXT_IDLE, _ext->flags);
 }

 static inline void clear_page_idle(struct page *page)
 {
-   clear_bit(PAGE_EXT_IDLE, _page_ext(page)->flags);
+   struct page_ext *page_ext;
+   page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext)
+   return;
+
+   clear_bit(PAGE_EXT_IDLE, _ext->flags);
 }
 #endif /* CONFIG_64BIT */

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f8f3bfc..d27e8b9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct 
page *page,
return;

page_ext = lookup_page_ext(page);
+   if (unlikely(!page_ext))
+   return;
+
__set_bit(PAGE_EXT_DEBUG_GUARD, _ext->flags);

INIT_LIST_HEAD(>lru);
@@ -673,6 +676,9 @@ static inline void 

[PATCH V4] brcmfmac: print errors if creating interface fails

2016-05-26 Thread Rafał Miłecki
This is helpful for debugging. Without this all I was getting from "iw"
command on failed creating of P2P interface was:
> command failed: Too many open files in system (-23)

Signed-off-by: Rafał Miłecki 
---
V2: s/in/if/ in commit message
V3: Add one more error message as suggested by Arend
V4: Also update brcmf_cfg80211_add_iface & print error for AP
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 8 ++--
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c  | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 3d09d23..e7975a3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -669,14 +669,18 @@ static struct wireless_dev 
*brcmf_cfg80211_add_iface(struct wiphy *wiphy,
return ERR_PTR(-EOPNOTSUPP);
case NL80211_IFTYPE_AP:
wdev = brcmf_ap_add_vif(wiphy, name, flags, params);
-   if (!IS_ERR(wdev))
+   if (IS_ERR(wdev))
+   brcmf_err("Failed to create AP interface %s: %d\n", 
name, PTR_ERR(wdev));
+   else
brcmf_cfg80211_update_proto_addr_mode(wdev);
return wdev;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_P2P_DEVICE:
wdev = brcmf_p2p_add_vif(wiphy, name, name_assign_type, type, 
flags, params);
-   if (!IS_ERR(wdev))
+   if (IS_ERR(wdev))
+   brcmf_err("Failed to create P2P interface %s: %d\n", 
name, PTR_ERR(wdev));
+   else
brcmf_cfg80211_update_proto_addr_mode(wdev);
return wdev;
case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 1652a48..bc26aec 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2031,7 +2031,7 @@ static int brcmf_p2p_request_p2p_if(struct brcmf_p2p_info 
*p2p,
err = brcmf_fil_iovar_data_set(ifp, "p2p_ifadd", _request,
   sizeof(if_request));
if (err)
-   return err;
+   brcmf_err("p2p_ifadd failed %d\n", err);
 
return err;
 }
@@ -2185,6 +2185,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy 
*wiphy, const char *name,
err = brcmf_p2p_request_p2p_if(>p2p, ifp, cfg->p2p.int_addr,
   iftype);
if (err) {
+   brcmf_err("Failed to request P2P virtual interface %s\n", name);
brcmf_cfg80211_arm_vif_event(cfg, NULL);
goto fail;
}
-- 
1.8.4.5



[PATCH V4] brcmfmac: print errors if creating interface fails

2016-05-26 Thread Rafał Miłecki
This is helpful for debugging. Without this all I was getting from "iw"
command on failed creating of P2P interface was:
> command failed: Too many open files in system (-23)

Signed-off-by: Rafał Miłecki 
---
V2: s/in/if/ in commit message
V3: Add one more error message as suggested by Arend
V4: Also update brcmf_cfg80211_add_iface & print error for AP
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 8 ++--
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c  | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 3d09d23..e7975a3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -669,14 +669,18 @@ static struct wireless_dev 
*brcmf_cfg80211_add_iface(struct wiphy *wiphy,
return ERR_PTR(-EOPNOTSUPP);
case NL80211_IFTYPE_AP:
wdev = brcmf_ap_add_vif(wiphy, name, flags, params);
-   if (!IS_ERR(wdev))
+   if (IS_ERR(wdev))
+   brcmf_err("Failed to create AP interface %s: %d\n", 
name, PTR_ERR(wdev));
+   else
brcmf_cfg80211_update_proto_addr_mode(wdev);
return wdev;
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_P2P_DEVICE:
wdev = brcmf_p2p_add_vif(wiphy, name, name_assign_type, type, 
flags, params);
-   if (!IS_ERR(wdev))
+   if (IS_ERR(wdev))
+   brcmf_err("Failed to create P2P interface %s: %d\n", 
name, PTR_ERR(wdev));
+   else
brcmf_cfg80211_update_proto_addr_mode(wdev);
return wdev;
case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c 
b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 1652a48..bc26aec 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2031,7 +2031,7 @@ static int brcmf_p2p_request_p2p_if(struct brcmf_p2p_info 
*p2p,
err = brcmf_fil_iovar_data_set(ifp, "p2p_ifadd", _request,
   sizeof(if_request));
if (err)
-   return err;
+   brcmf_err("p2p_ifadd failed %d\n", err);
 
return err;
 }
@@ -2185,6 +2185,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy 
*wiphy, const char *name,
err = brcmf_p2p_request_p2p_if(>p2p, ifp, cfg->p2p.int_addr,
   iftype);
if (err) {
+   brcmf_err("Failed to request P2P virtual interface %s\n", name);
brcmf_cfg80211_arm_vif_event(cfg, NULL);
goto fail;
}
-- 
1.8.4.5



Re: [PATCH] seccomp: plug syscall-dodging ptrace hole

2016-05-26 Thread Jann Horn
On Thu, May 26, 2016 at 02:04:50PM -0700, Kees Cook wrote:
> One problem with seccomp was that ptrace could be used to change a
> syscall after seccomp filtering had completed. This was a well documented
> limitation, and it was recommended to block ptrace when defining a filter
> to avoid this problem. This can be quite a limitation for containers or
> other places where ptrace is desired even under seccomp filters.
> 
> Since seccomp filtering has been split into pre-trace and trace phases
> (phase1 and phase2 respectively), it's possible to re-run phase1 seccomp
> after ptrace. This makes that change, and updates the test suite for
> both SECCOMP_RET_TRACE and PTRACE_SYSCALL manipulation.

Looks good to me. As far as I can tell, there are no codepaths that allow
manipulation of syscall arguments via ptrace register modification without
going through tracehook_report_syscall_entry() or seccomp_phase2(), and
the checks look good, too.


> Signed-off-by: Kees Cook 
> ---
>  include/linux/seccomp.h   |   6 +
>  include/linux/tracehook.h |   8 +-
>  kernel/seccomp.c  |  42 ++
>  tools/testing/selftests/seccomp/seccomp_bpf.c | 176 
> --
>  4 files changed, 220 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 2296e6b2f690..e2b72394c200 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -85,6 +85,7 @@ static inline int seccomp_mode(struct seccomp *s)
>  #ifdef CONFIG_SECCOMP_FILTER
>  extern void put_seccomp_filter(struct task_struct *tsk);
>  extern void get_seccomp_filter(struct task_struct *tsk);
> +extern int seccomp_phase1_recheck(void);
>  #else  /* CONFIG_SECCOMP_FILTER */
>  static inline void put_seccomp_filter(struct task_struct *tsk)
>  {
> @@ -94,6 +95,11 @@ static inline void get_seccomp_filter(struct task_struct 
> *tsk)
>  {
>   return;
>  }
> +
> +static inline int seccomp_phase1_recheck(void)
> +{
> + return 0;
> +}
>  #endif /* CONFIG_SECCOMP_FILTER */
>  
>  #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
> diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
> index 26c152122a42..69b584d88508 100644
> --- a/include/linux/tracehook.h
> +++ b/include/linux/tracehook.h
> @@ -48,6 +48,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -100,7 +101,12 @@ static inline int ptrace_report_syscall(struct pt_regs 
> *regs)
>  static inline __must_check int tracehook_report_syscall_entry(
>   struct pt_regs *regs)
>  {
> - return ptrace_report_syscall(regs);
> + int skip;
> +
> + skip = ptrace_report_syscall(regs);
> + if (skip)
> + return skip;
> + return seccomp_phase1_recheck();
>  }
>  
>  /**
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 7002796f14a4..6eaa3a1c5edb 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -665,6 +665,46 @@ u32 seccomp_phase1(struct seccomp_data *sd)
>  }
>  
>  /**
> + * seccomp_phase1_recheck() - recheck phase1 in the context of ptrace
> + *
> + * This re-runs phase 1 seccomp checks in the case where ptrace may have
> + * just changed things out from under us.
> + *
> + * Returns 0 if the syscall should be processed or -1 to skip the syscall.
> + */
> +int seccomp_phase1_recheck(void)
> +{
> + u32 action;
> +
> + /* If we're not under seccomp, continue normally. */
> + if (!test_thread_flag(TIF_SECCOMP))
> + return 0;
> +
> + /* Pass NULL struct seccomp_data to force reload after ptrace. */
> + action = seccomp_phase1(NULL);
> + switch (action) {
> + case SECCOMP_PHASE1_OK:
> + /* Passes seccomp, continue normally. */
> + break;
> + case SECCOMP_PHASE1_SKIP:
> + /* Skip the syscall. */
> + return -1;
> + default:
> + if ((action & SECCOMP_RET_ACTION) != SECCOMP_RET_TRACE) {
> + /* Impossible return value: kill the process. */
> + do_exit(SIGSYS);
> + }
> + /*
> +  * We've hit a trace request, but ptrace already put us
> +  * into this state, so just continue.
> +  */
> + break;
> + }
> +
> + return 0;
> +}
> +
> +/**
>   * seccomp_phase2() - finish slow path seccomp work for the current syscall
>   * @phase1_result: The return value from seccomp_phase1()
>   *
> @@ -701,6 +741,8 @@ int seccomp_phase2(u32 phase1_result)
>   do_exit(SIGSYS);
>   if (syscall_get_nr(current, regs) < 0)
>   return -1;  /* Explicit request to skip. */
> + if (seccomp_phase1_recheck() < 0)
> + return -1;
>  
>   return 0;
>  }
[...]


signature.asc
Description: Digital signature


Re: [PATCH] seccomp: plug syscall-dodging ptrace hole

2016-05-26 Thread Jann Horn
On Thu, May 26, 2016 at 02:04:50PM -0700, Kees Cook wrote:
> One problem with seccomp was that ptrace could be used to change a
> syscall after seccomp filtering had completed. This was a well documented
> limitation, and it was recommended to block ptrace when defining a filter
> to avoid this problem. This can be quite a limitation for containers or
> other places where ptrace is desired even under seccomp filters.
> 
> Since seccomp filtering has been split into pre-trace and trace phases
> (phase1 and phase2 respectively), it's possible to re-run phase1 seccomp
> after ptrace. This makes that change, and updates the test suite for
> both SECCOMP_RET_TRACE and PTRACE_SYSCALL manipulation.

Looks good to me. As far as I can tell, there are no codepaths that allow
manipulation of syscall arguments via ptrace register modification without
going through tracehook_report_syscall_entry() or seccomp_phase2(), and
the checks look good, too.


> Signed-off-by: Kees Cook 
> ---
>  include/linux/seccomp.h   |   6 +
>  include/linux/tracehook.h |   8 +-
>  kernel/seccomp.c  |  42 ++
>  tools/testing/selftests/seccomp/seccomp_bpf.c | 176 
> --
>  4 files changed, 220 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index 2296e6b2f690..e2b72394c200 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -85,6 +85,7 @@ static inline int seccomp_mode(struct seccomp *s)
>  #ifdef CONFIG_SECCOMP_FILTER
>  extern void put_seccomp_filter(struct task_struct *tsk);
>  extern void get_seccomp_filter(struct task_struct *tsk);
> +extern int seccomp_phase1_recheck(void);
>  #else  /* CONFIG_SECCOMP_FILTER */
>  static inline void put_seccomp_filter(struct task_struct *tsk)
>  {
> @@ -94,6 +95,11 @@ static inline void get_seccomp_filter(struct task_struct 
> *tsk)
>  {
>   return;
>  }
> +
> +static inline int seccomp_phase1_recheck(void)
> +{
> + return 0;
> +}
>  #endif /* CONFIG_SECCOMP_FILTER */
>  
>  #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
> diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
> index 26c152122a42..69b584d88508 100644
> --- a/include/linux/tracehook.h
> +++ b/include/linux/tracehook.h
> @@ -48,6 +48,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -100,7 +101,12 @@ static inline int ptrace_report_syscall(struct pt_regs 
> *regs)
>  static inline __must_check int tracehook_report_syscall_entry(
>   struct pt_regs *regs)
>  {
> - return ptrace_report_syscall(regs);
> + int skip;
> +
> + skip = ptrace_report_syscall(regs);
> + if (skip)
> + return skip;
> + return seccomp_phase1_recheck();
>  }
>  
>  /**
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 7002796f14a4..6eaa3a1c5edb 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -665,6 +665,46 @@ u32 seccomp_phase1(struct seccomp_data *sd)
>  }
>  
>  /**
> + * seccomp_phase1_recheck() - recheck phase1 in the context of ptrace
> + *
> + * This re-runs phase 1 seccomp checks in the case where ptrace may have
> + * just changed things out from under us.
> + *
> + * Returns 0 if the syscall should be processed or -1 to skip the syscall.
> + */
> +int seccomp_phase1_recheck(void)
> +{
> + u32 action;
> +
> + /* If we're not under seccomp, continue normally. */
> + if (!test_thread_flag(TIF_SECCOMP))
> + return 0;
> +
> + /* Pass NULL struct seccomp_data to force reload after ptrace. */
> + action = seccomp_phase1(NULL);
> + switch (action) {
> + case SECCOMP_PHASE1_OK:
> + /* Passes seccomp, continue normally. */
> + break;
> + case SECCOMP_PHASE1_SKIP:
> + /* Skip the syscall. */
> + return -1;
> + default:
> + if ((action & SECCOMP_RET_ACTION) != SECCOMP_RET_TRACE) {
> + /* Impossible return value: kill the process. */
> + do_exit(SIGSYS);
> + }
> + /*
> +  * We've hit a trace request, but ptrace already put us
> +  * into this state, so just continue.
> +  */
> + break;
> + }
> +
> + return 0;
> +}
> +
> +/**
>   * seccomp_phase2() - finish slow path seccomp work for the current syscall
>   * @phase1_result: The return value from seccomp_phase1()
>   *
> @@ -701,6 +741,8 @@ int seccomp_phase2(u32 phase1_result)
>   do_exit(SIGSYS);
>   if (syscall_get_nr(current, regs) < 0)
>   return -1;  /* Explicit request to skip. */
> + if (seccomp_phase1_recheck() < 0)
> + return -1;
>  
>   return 0;
>  }
[...]


signature.asc
Description: Digital signature


mmotm 2016-05-26-15-51 uploaded

2016-05-26 Thread akpm
The mm-of-the-moment snapshot 2016-05-26-15-51 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.6:
(patches marked "*" will be included in linux-next)

  origin.patch
* seqlock-fix-raw_read_seqcount_latch.patch
* mm-make-config_deferred_struct_page_init-depends-on-flatmem-explicitly.patch
* mm-kasan-remove-unused-reserved-field-from-struct-kasan_alloc_meta.patch
* mm-slub-remove-unused-virt_to_obj.patch
* ocfs2-fix-improper-handling-of-return-errno.patch
* memcg-fix-mem_cgroup_out_of_memory-return-value.patch
* mm-oom_reaper-do-not-mmput-synchronously-from-the-oom-reaper-context-fix.patch
* dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch
* update-mm-zsmalloc-dont-fail-if-cant-create-debugfs-info.patch
* drivers-pinctrl-intel-pinctrl-baytrailc-fix-build-with-gcc-44.patch
  i-need-old-gcc.patch
  arch-alpha-kernel-systblss-remove-debug-check.patch
* 
direct-io-fix-direct-write-stale-data-exposure-from-concurrent-buffered-read.patch
* mm-oom-do-not-reap-task-if-there-are-live-threads-in-threadgroup.patch
* maintainers-add-kexec_corec-and-kexec_filec.patch
* maintainers-kdump-maintainers-update.patch
* mm-use-early_pfn_to_nid-in-page_ext_init.patch
* mm-use-early_pfn_to_nid-in-register_page_bootmem_info_node.patch
* oom_reaper-close-race-with-exiting-task.patch
* oom_reaper-close-race-with-exiting-task-checkpatch-fixes.patch
* mm-thp-avoid-false-positive-vm_bug_on_page-in-page_move_anon_rmap.patch
* mm-cma-silence-warnings-due-to-max-usage.patch
* mm-cma-silence-warnings-due-to-max-usage-checkpatch-fixes.patch
* mm-memcontrol-fix-the-margin-computation-in-mem_cgroup_margin.patch
* mm-memcontrol-move-comments-for-get_mctgt_type-to-proper-position.patch
* mm-disable-deferred_struct_page_init-on-no_bootmem.patch
* mm-fix-overflow-in-vm_map_ram.patch
* kdump-fix-dmesg-gdbmacro-to-work-with-record-based-printk.patch
* 
memcg-add-rcu-locking-around-css_for_each_descendant_pre-in-memcg_offline_kmem.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* fs-ext4-fsyncc-generic_file_fsync-call-based-on-barrier-flag.patch
* ocfs2-fix-a-redundant-re-initialization.patch
* ocfs2-o2hb-add-negotiate-timer.patch
* ocfs2-o2hb-add-nego_timeout-message.patch
* ocfs2-o2hb-add-negotiate_approve-message.patch
* ocfs2-o2hb-add-some-user-debug-log.patch
* ocfs2-o2hb-dont-negotiate-if-last-hb-fail.patch
* ocfs2-o2hb-fix-hb-hung-time.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
  mm.patch
* mm-memcontrol-remove-the-useless-parameter-for-mc_handle_swap_pte.patch
* mm-init-fix-zone-boundary-creation.patch
* mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites.patch
* 

mmotm 2016-05-26-15-51 uploaded

2016-05-26 Thread akpm
The mm-of-the-moment snapshot 2016-05-26-15-51 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.6:
(patches marked "*" will be included in linux-next)

  origin.patch
* seqlock-fix-raw_read_seqcount_latch.patch
* mm-make-config_deferred_struct_page_init-depends-on-flatmem-explicitly.patch
* mm-kasan-remove-unused-reserved-field-from-struct-kasan_alloc_meta.patch
* mm-slub-remove-unused-virt_to_obj.patch
* ocfs2-fix-improper-handling-of-return-errno.patch
* memcg-fix-mem_cgroup_out_of_memory-return-value.patch
* mm-oom_reaper-do-not-mmput-synchronously-from-the-oom-reaper-context-fix.patch
* dma-debug-avoid-spinlock-recursion-when-disabling-dma-debug.patch
* update-mm-zsmalloc-dont-fail-if-cant-create-debugfs-info.patch
* drivers-pinctrl-intel-pinctrl-baytrailc-fix-build-with-gcc-44.patch
  i-need-old-gcc.patch
  arch-alpha-kernel-systblss-remove-debug-check.patch
* 
direct-io-fix-direct-write-stale-data-exposure-from-concurrent-buffered-read.patch
* mm-oom-do-not-reap-task-if-there-are-live-threads-in-threadgroup.patch
* maintainers-add-kexec_corec-and-kexec_filec.patch
* maintainers-kdump-maintainers-update.patch
* mm-use-early_pfn_to_nid-in-page_ext_init.patch
* mm-use-early_pfn_to_nid-in-register_page_bootmem_info_node.patch
* oom_reaper-close-race-with-exiting-task.patch
* oom_reaper-close-race-with-exiting-task-checkpatch-fixes.patch
* mm-thp-avoid-false-positive-vm_bug_on_page-in-page_move_anon_rmap.patch
* mm-cma-silence-warnings-due-to-max-usage.patch
* mm-cma-silence-warnings-due-to-max-usage-checkpatch-fixes.patch
* mm-memcontrol-fix-the-margin-computation-in-mem_cgroup_margin.patch
* mm-memcontrol-move-comments-for-get_mctgt_type-to-proper-position.patch
* mm-disable-deferred_struct_page_init-on-no_bootmem.patch
* mm-fix-overflow-in-vm_map_ram.patch
* kdump-fix-dmesg-gdbmacro-to-work-with-record-based-printk.patch
* 
memcg-add-rcu-locking-around-css_for_each_descendant_pre-in-memcg_offline_kmem.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* fs-ext4-fsyncc-generic_file_fsync-call-based-on-barrier-flag.patch
* ocfs2-fix-a-redundant-re-initialization.patch
* ocfs2-o2hb-add-negotiate-timer.patch
* ocfs2-o2hb-add-nego_timeout-message.patch
* ocfs2-o2hb-add-negotiate_approve-message.patch
* ocfs2-o2hb-add-some-user-debug-log.patch
* ocfs2-o2hb-dont-negotiate-if-last-hb-fail.patch
* ocfs2-o2hb-fix-hb-hung-time.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
  mm.patch
* mm-memcontrol-remove-the-useless-parameter-for-mc_handle_swap_pte.patch
* mm-init-fix-zone-boundary-creation.patch
* mm-check-the-return-value-of-lookup_page_ext-for-all-call-sites.patch
* 

Re: can't boot with reiserfs on linux-4.6.0+

2016-05-26 Thread Jeff Chua
On Thu, May 26, 2016 at 3:46 PM, Hillf Danton  wrote:
>> > See if this fixes your reproducer.
>> >
>> > diff --git a/fs/xattr.c b/fs/xattr.c
>> > index b11945e..49b8eab 100644
>> > --- a/fs/xattr.c
>> > +++ b/fs/xattr.c
>> > @@ -667,6 +667,9 @@ xattr_resolve_name(const struct xattr_handler 
>> > **handlers, const char **name)
>> >  {
>> > const struct xattr_handler *handler;
>> >
>> > +   if (!handlers)
>> > +   return NULL;
>> > +
>> > if (!*name)
>> > return NULL;
>> >
>>
>> Tried, but doesn't work.
>>
> See if this fixes your reproducer.
>
> --- linux-4.6/fs/xattr.cMon May 16 06:43:13 2016
> +++ b/fs/xattr.cThu May 26 15:36:14 2016
> @@ -667,8 +667,8 @@ xattr_resolve_name(const struct xattr_ha
>  {
> const struct xattr_handler *handler;
>
> -   if (!*name)
> -   return NULL;
> +   if (!handlers || !*name)
> +   return ERR_PTR(-EINVAL);
>
> for_each_xattr_handler(handlers, handler) {
> const char *n;
> --

Hillf,

That worked and it's already in Linus's latest tree.

Thanks for sharing.

Jeff


Re: can't boot with reiserfs on linux-4.6.0+

2016-05-26 Thread Jeff Chua
On Thu, May 26, 2016 at 3:46 PM, Hillf Danton  wrote:
>> > See if this fixes your reproducer.
>> >
>> > diff --git a/fs/xattr.c b/fs/xattr.c
>> > index b11945e..49b8eab 100644
>> > --- a/fs/xattr.c
>> > +++ b/fs/xattr.c
>> > @@ -667,6 +667,9 @@ xattr_resolve_name(const struct xattr_handler 
>> > **handlers, const char **name)
>> >  {
>> > const struct xattr_handler *handler;
>> >
>> > +   if (!handlers)
>> > +   return NULL;
>> > +
>> > if (!*name)
>> > return NULL;
>> >
>>
>> Tried, but doesn't work.
>>
> See if this fixes your reproducer.
>
> --- linux-4.6/fs/xattr.cMon May 16 06:43:13 2016
> +++ b/fs/xattr.cThu May 26 15:36:14 2016
> @@ -667,8 +667,8 @@ xattr_resolve_name(const struct xattr_ha
>  {
> const struct xattr_handler *handler;
>
> -   if (!*name)
> -   return NULL;
> +   if (!handlers || !*name)
> +   return ERR_PTR(-EINVAL);
>
> for_each_xattr_handler(handlers, handler) {
> const char *n;
> --

Hillf,

That worked and it's already in Linus's latest tree.

Thanks for sharing.

Jeff


Re: [PATCH v5 0/1] ARM64: ACPI: Update documentation for latest specification version

2016-05-26 Thread Al Stone
On 05/17/2016 10:30 AM, Al Stone wrote:
> On 05/16/2016 05:44 PM, Alexey Klimov wrote:
>> On Mon, May 2, 2016 at 09:19 PM, Al Stone wrote:
>>> On 04/25/2016 03:21 PM, Al Stone wrote:
 The ACPI 6.1 specification was recently released at the end of January
 2016, but the arm64 kernel documentation for the use of ACPI was written
 for the 5.1 version of the spec.  There were significant additions to the
 spec that had not yet been mentioned -- for example, the 6.0 mechanisms
 added to make it easier to define processors and low power idle states,
 as well as the 6.1 addition allowing regular interrupts (not just from
 GPIO) be used to signal ACPI general purpose events.

 This patch reflects going back through and examining the specs in detail
 and updating content appropriately.  Whilst there, a few odds and ends of
 typos were caught as well.  This brings the documentation up to date with
 ACPI 6.1 for arm64.

 Changes for v5:
-- Miscellaneous typos and corrections (Lorenzo Pieralisi)
-- Add linux-acpi@ ML to the distribution list (Alexey Klimov)
-- Corrections to CPPC information (Alexey Klimov)
-- ACK from Lorenzo Pieralisi
-- Updated bibliographic info (Al Stone)

 Changes for v4:
-- Clarify that IORT can sometimes be optional (Jon Masters).
-- Remove "Use as needed" descriptions of ACPI objects; they provide
   no substantive information and doing so simplifies maintenance of
   this document over time.  These have been replaced with a simpler
   notice that states that unless otherwise noted, do what the ACPI
   specification says is needed.
-- Corrected the _OSI object usage recommendation; it described kernel
   behavior that does not exist (Al Stone).

 Changes for v3:
-- Clarify use of _LPI/_RDI (Vikas Sajjan)
-- Whitespace cleanup as pointed out by checkpatch

 Changes for v2:
-- Clean up white space (Harb Abdulhahmid)
-- Clarification on _CCA usage (Harb Abdulhamid)
-- IORT moved to required from recommended (Hanjun Guo)
-- Clarify IORT description (Hanjun Guo)


 Al Stone (1):
   ARM64: ACPI: Update documentation for latest specification version

  Documentation/arm64/acpi_object_usage.txt | 343 
 --
  Documentation/arm64/arm-acpi.txt  |  40 ++--
  2 files changed, 213 insertions(+), 170 deletions(-)

>>>
>>> Ping?  If there are no further comments, can this be pulled in through
>>> either the documentation or arm64 tree?
>>>
>>> Thanks.
>>
>> Hi Al,
>> sorry for delay.
>>
>> CPPC and PCC corrections look fine. Thanks.
>>
>>
>> This comment is not to block your patch (maybe some to-do):
>> I greped sources and your patch and I don't see description of _PSD object.
>> This P-state dependancy object is optional but it's presense and correct data
>> are extremely useful for CPPC and can potentially descrease number of 
>> performance
>> changing requests.
>>
>> ACPI spec in section about CPPC tells that it may use _PSD (page 503 if I 
>> remember
>> correctly) to specify domain belongings of CPUs.
>>
>> You may consider to add description of _PSD object later.
>>
>> Best regards,
>> Alexey.
>>
> 
> Hrm.  Thanks, Alexey.  I'll take a look.  _PSD may be in one of
> the gray areas where we expect people to read the spec and follow
> it properly, but it may make sense to be very explicit about what
> they need to do to use it properly.  Perhaps this would make a good
> FWTS test, too.
> 

Yet another ping...

Just in case it is not clear, Alexey's comment and my reply here are
things that *might* need to be done in the future.  This version of the
patch I believe is sufficient for now, based on the comments received.

Lorenzo has ACKd; Hanjun has reviewed.  Do I need Will and/or Catalin
to ACK?  Any others?

-- 
ciao,
al
---
Al Stone
Software Engineer
Red Hat, Inc.
a...@redhat.com
---


Re: [PATCH v5 0/1] ARM64: ACPI: Update documentation for latest specification version

2016-05-26 Thread Al Stone
On 05/17/2016 10:30 AM, Al Stone wrote:
> On 05/16/2016 05:44 PM, Alexey Klimov wrote:
>> On Mon, May 2, 2016 at 09:19 PM, Al Stone wrote:
>>> On 04/25/2016 03:21 PM, Al Stone wrote:
 The ACPI 6.1 specification was recently released at the end of January
 2016, but the arm64 kernel documentation for the use of ACPI was written
 for the 5.1 version of the spec.  There were significant additions to the
 spec that had not yet been mentioned -- for example, the 6.0 mechanisms
 added to make it easier to define processors and low power idle states,
 as well as the 6.1 addition allowing regular interrupts (not just from
 GPIO) be used to signal ACPI general purpose events.

 This patch reflects going back through and examining the specs in detail
 and updating content appropriately.  Whilst there, a few odds and ends of
 typos were caught as well.  This brings the documentation up to date with
 ACPI 6.1 for arm64.

 Changes for v5:
-- Miscellaneous typos and corrections (Lorenzo Pieralisi)
-- Add linux-acpi@ ML to the distribution list (Alexey Klimov)
-- Corrections to CPPC information (Alexey Klimov)
-- ACK from Lorenzo Pieralisi
-- Updated bibliographic info (Al Stone)

 Changes for v4:
-- Clarify that IORT can sometimes be optional (Jon Masters).
-- Remove "Use as needed" descriptions of ACPI objects; they provide
   no substantive information and doing so simplifies maintenance of
   this document over time.  These have been replaced with a simpler
   notice that states that unless otherwise noted, do what the ACPI
   specification says is needed.
-- Corrected the _OSI object usage recommendation; it described kernel
   behavior that does not exist (Al Stone).

 Changes for v3:
-- Clarify use of _LPI/_RDI (Vikas Sajjan)
-- Whitespace cleanup as pointed out by checkpatch

 Changes for v2:
-- Clean up white space (Harb Abdulhahmid)
-- Clarification on _CCA usage (Harb Abdulhamid)
-- IORT moved to required from recommended (Hanjun Guo)
-- Clarify IORT description (Hanjun Guo)


 Al Stone (1):
   ARM64: ACPI: Update documentation for latest specification version

  Documentation/arm64/acpi_object_usage.txt | 343 
 --
  Documentation/arm64/arm-acpi.txt  |  40 ++--
  2 files changed, 213 insertions(+), 170 deletions(-)

>>>
>>> Ping?  If there are no further comments, can this be pulled in through
>>> either the documentation or arm64 tree?
>>>
>>> Thanks.
>>
>> Hi Al,
>> sorry for delay.
>>
>> CPPC and PCC corrections look fine. Thanks.
>>
>>
>> This comment is not to block your patch (maybe some to-do):
>> I greped sources and your patch and I don't see description of _PSD object.
>> This P-state dependancy object is optional but it's presense and correct data
>> are extremely useful for CPPC and can potentially descrease number of 
>> performance
>> changing requests.
>>
>> ACPI spec in section about CPPC tells that it may use _PSD (page 503 if I 
>> remember
>> correctly) to specify domain belongings of CPUs.
>>
>> You may consider to add description of _PSD object later.
>>
>> Best regards,
>> Alexey.
>>
> 
> Hrm.  Thanks, Alexey.  I'll take a look.  _PSD may be in one of
> the gray areas where we expect people to read the spec and follow
> it properly, but it may make sense to be very explicit about what
> they need to do to use it properly.  Perhaps this would make a good
> FWTS test, too.
> 

Yet another ping...

Just in case it is not clear, Alexey's comment and my reply here are
things that *might* need to be done in the future.  This version of the
patch I believe is sufficient for now, based on the comments received.

Lorenzo has ACKd; Hanjun has reviewed.  Do I need Will and/or Catalin
to ACK?  Any others?

-- 
ciao,
al
---
Al Stone
Software Engineer
Red Hat, Inc.
a...@redhat.com
---


Re: [PATCH] iommu/vt-d: reduce extra first level entry in iommu->domains

2016-05-26 Thread Wei Yang
On Thu, May 26, 2016 at 11:11:51AM +0100, Robin Murphy wrote:
>On 25/05/16 22:43, Wei Yang wrote:
>>On Wed, May 25, 2016 at 11:17:49AM +0100, Robin Murphy wrote:
>>>On 25/05/16 00:06, Wei Yang wrote:
Hi, Joerg

Not sure whether you think this calculation is correct.

If I missed something for this " + 1" in your formula, I am glad to hear 
your
explanation. So that I could learn something from you :-)
>>>
>>>I'm not familiar enough with this aspect of the driver to confirm whether the
>>>change is appropriate or not, but it does seem worth noting that using
>>>DIV_ROUND_UP would be an even neater approach.
>>>
>>
>>Hi, Robin,
>>
>>Thanks for your comment.
>>
>>Yes, I agree DIV_ROUND_UP would make the code more easy to read.
>>
>>I have thought about using DIV_ROUND_UP, while from the definition
>>DIV_ROUND_UP use operation "/", and ALIGN use bit operation. So the change in
>>my patch chooses the second one and tries to keep the efficiency.
>
>The efficiency of what, though?
>
>It's an unsigned division by a constant power of two, which GCC implements
>with a shift instruction regardless of optimisation - and at -O1 and above
>the machine code generated for either form of expression is completely
>identical (try it and see!).
>

Thanks.

Looks my knowledge of the compiler is an ancient one :-)

I haven't thought about to compare the generated code. This is really a good
test before making the decision. Next time I would try this before choosing
one.

>On the other hand, the small amount of time and cognitive effort it took to
>parse "ALIGN(x, 256) >> 8" as "divide by 256, rounding up" compared to simply
>seeing "DIV_ROUND_UP(x, 256)" and knowing instantly what's intended,
>certainly makes it less efficient to _maintain_; thus it's exactly the kind
>of thing to which Dijkstra's famous quotation applies.
>
>Does that count towards learning something? ;)
>

Really~

I am really happy to see your comments which help me to be more mature on the
solution. I owe you a favor :-) If you would come to Shanghai, I would like to
take you around~

>Robin.
>
Have a good day~

On Sat, May 21, 2016 at 02:41:51AM +, Wei Yang wrote:
>In commit <8bf478163e69> ("iommu/vt-d: Split up iommu->domains array"), it
>it splits iommu->domains in two levels. Each first level contains 256
>entries of second level. In case of the ndomains is exact a multiple of
>256, it would have one more extra first level entry for current
>implementation.
>
>This patch refines this calculation to reduce the extra first level entry.
>
>Signed-off-by: Wei Yang 
>---
>drivers/iommu/intel-iommu.c |4 ++--
>1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>index e3061d3..2204ca4 100644
>--- a/drivers/iommu/intel-iommu.c
>+++ b/drivers/iommu/intel-iommu.c
>@@ -1634,7 +1634,7 @@ static int iommu_init_domains(struct intel_iommu 
>*iommu)
>   return -ENOMEM;
>   }
>
>-  size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
>+  size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
>   iommu->domains = kzalloc(size, GFP_KERNEL);
>
>   if (iommu->domains) {
>@@ -1699,7 +1699,7 @@ static void disable_dmar_iommu(struct intel_iommu 
>*iommu)
>static void free_dmar_iommu(struct intel_iommu *iommu)
>{
>   if ((iommu->domains) && (iommu->domain_ids)) {
>-  int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
>+  int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
>   int i;
>
>   for (i = 0; i < elems; i++)
>--
>1.7.9.5

>>

-- 
Wei Yang
Help you, Help me


Re: [PATCH] iommu/vt-d: reduce extra first level entry in iommu->domains

2016-05-26 Thread Wei Yang
On Thu, May 26, 2016 at 11:11:51AM +0100, Robin Murphy wrote:
>On 25/05/16 22:43, Wei Yang wrote:
>>On Wed, May 25, 2016 at 11:17:49AM +0100, Robin Murphy wrote:
>>>On 25/05/16 00:06, Wei Yang wrote:
Hi, Joerg

Not sure whether you think this calculation is correct.

If I missed something for this " + 1" in your formula, I am glad to hear 
your
explanation. So that I could learn something from you :-)
>>>
>>>I'm not familiar enough with this aspect of the driver to confirm whether the
>>>change is appropriate or not, but it does seem worth noting that using
>>>DIV_ROUND_UP would be an even neater approach.
>>>
>>
>>Hi, Robin,
>>
>>Thanks for your comment.
>>
>>Yes, I agree DIV_ROUND_UP would make the code more easy to read.
>>
>>I have thought about using DIV_ROUND_UP, while from the definition
>>DIV_ROUND_UP use operation "/", and ALIGN use bit operation. So the change in
>>my patch chooses the second one and tries to keep the efficiency.
>
>The efficiency of what, though?
>
>It's an unsigned division by a constant power of two, which GCC implements
>with a shift instruction regardless of optimisation - and at -O1 and above
>the machine code generated for either form of expression is completely
>identical (try it and see!).
>

Thanks.

Looks my knowledge of the compiler is an ancient one :-)

I haven't thought about to compare the generated code. This is really a good
test before making the decision. Next time I would try this before choosing
one.

>On the other hand, the small amount of time and cognitive effort it took to
>parse "ALIGN(x, 256) >> 8" as "divide by 256, rounding up" compared to simply
>seeing "DIV_ROUND_UP(x, 256)" and knowing instantly what's intended,
>certainly makes it less efficient to _maintain_; thus it's exactly the kind
>of thing to which Dijkstra's famous quotation applies.
>
>Does that count towards learning something? ;)
>

Really~

I am really happy to see your comments which help me to be more mature on the
solution. I owe you a favor :-) If you would come to Shanghai, I would like to
take you around~

>Robin.
>
Have a good day~

On Sat, May 21, 2016 at 02:41:51AM +, Wei Yang wrote:
>In commit <8bf478163e69> ("iommu/vt-d: Split up iommu->domains array"), it
>it splits iommu->domains in two levels. Each first level contains 256
>entries of second level. In case of the ndomains is exact a multiple of
>256, it would have one more extra first level entry for current
>implementation.
>
>This patch refines this calculation to reduce the extra first level entry.
>
>Signed-off-by: Wei Yang 
>---
>drivers/iommu/intel-iommu.c |4 ++--
>1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>index e3061d3..2204ca4 100644
>--- a/drivers/iommu/intel-iommu.c
>+++ b/drivers/iommu/intel-iommu.c
>@@ -1634,7 +1634,7 @@ static int iommu_init_domains(struct intel_iommu 
>*iommu)
>   return -ENOMEM;
>   }
>
>-  size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
>+  size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
>   iommu->domains = kzalloc(size, GFP_KERNEL);
>
>   if (iommu->domains) {
>@@ -1699,7 +1699,7 @@ static void disable_dmar_iommu(struct intel_iommu 
>*iommu)
>static void free_dmar_iommu(struct intel_iommu *iommu)
>{
>   if ((iommu->domains) && (iommu->domain_ids)) {
>-  int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
>+  int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
>   int i;
>
>   for (i = 0; i < elems; i++)
>--
>1.7.9.5

>>

-- 
Wei Yang
Help you, Help me


Re: [PATCH 01/23] all: syscall wrappers: add documentation

2016-05-26 Thread Catalin Marinas
On Thu, May 26, 2016 at 11:48:19PM +0300, Yury Norov wrote:
> On Wed, May 25, 2016 at 02:28:21PM -0700, David Miller wrote:
> > From: Arnd Bergmann 
> > Date: Wed, 25 May 2016 23:01:06 +0200
> > 
> > > On Wednesday, May 25, 2016 1:50:39 PM CEST David Miller wrote:
> > >> From: Arnd Bergmann 
> > >> Date: Wed, 25 May 2016 22:47:33 +0200
> > >> 
> > >> > If we use the normal calling conventions, we could remove these 
> > >> > overrides
> > >> > along with the respective special-case handling in glibc. None of them
> > >> > look particularly performance-sensitive, but I could be wrong there.
> > >> 
> > >> You could set the lowest bit in the system call entry pointer to indicate
> > >> the upper-half clears should be elided.
> > > 
> > > Right, but that would introduce an extra conditional branch in the syscall
> > > hotpath, and likely eliminate the gains from passing the loff_t arguments
> > > in a single register instead of a pair.
> > 
> > Ok, then, how much are you really gaining from avoiding a 'shift' and
> > an 'or' to build the full 64-bit value?  3 cycles?  Maybe 4?
> 
> 4 cycles in kernel and ~same cost in glibc to create a pair.

It would take a single instruction per argument in the kernel to do
shift+or and maybe 1-2 more instructions to move the remaining arguments
in place (we do this for a few wrappers in arch/arm64/kernel/entry32.S).
And the glibc counterpart.

> And 8 'mov's that exist for every syscall, even yield().
> 
> > And the executing the wrappers, those have a non-trivial cost too.
> 
> The cost is pretty trivial though. See kernel/compat_wrapper.o:
> COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
> 0:   a9bf7bfdstp x29, x30, [sp,#-16]!
> 4:   910003fdmov x29, sp
> 8:   2a0003e0mov w0, w0
> c:   9400bl  0 
> 10:  a8c17bfdldp x29, x30, [sp],#16
> 14:  d65f03c0ret

I would say the above could be more expensive than 8 movs (16 bytes to
write, read, a branch and a ret). You can also add the I-cache locality,
having wrappers for each syscalls instead of a single place for zeroing
the upper half (where no other wrapper is necessary).

Can we trick the compiler into doing a tail call optimisation. This
could have simply been:

COMPAT_SYSCALL_WRAP2(creat, ...):
mov w0, w0
b   

> > Cost wise, this seems like it all cancels out in the end, but what
> > do I know?
> 
> I think you know something, and I also think Heiko and other s390 guys
> know something as well. So I'd like to listen their arguments here.
> 
> For me spark64 way is looking reasonable only because it's really simple
> and takes less coding. I'll try it on some branch and share here what 
> happened.

The kernel code will definitely look simpler ;). It would be good to see
if there actually is any performance impact. Even with 16 more cycles on
syscall entry, would they be lost in the noise? You don't need a full
implementation, just some dummy mov x0, x0 on the entry path.

-- 
Catalin


Re: [PATCH 01/23] all: syscall wrappers: add documentation

2016-05-26 Thread Catalin Marinas
On Thu, May 26, 2016 at 11:48:19PM +0300, Yury Norov wrote:
> On Wed, May 25, 2016 at 02:28:21PM -0700, David Miller wrote:
> > From: Arnd Bergmann 
> > Date: Wed, 25 May 2016 23:01:06 +0200
> > 
> > > On Wednesday, May 25, 2016 1:50:39 PM CEST David Miller wrote:
> > >> From: Arnd Bergmann 
> > >> Date: Wed, 25 May 2016 22:47:33 +0200
> > >> 
> > >> > If we use the normal calling conventions, we could remove these 
> > >> > overrides
> > >> > along with the respective special-case handling in glibc. None of them
> > >> > look particularly performance-sensitive, but I could be wrong there.
> > >> 
> > >> You could set the lowest bit in the system call entry pointer to indicate
> > >> the upper-half clears should be elided.
> > > 
> > > Right, but that would introduce an extra conditional branch in the syscall
> > > hotpath, and likely eliminate the gains from passing the loff_t arguments
> > > in a single register instead of a pair.
> > 
> > Ok, then, how much are you really gaining from avoiding a 'shift' and
> > an 'or' to build the full 64-bit value?  3 cycles?  Maybe 4?
> 
> 4 cycles in kernel and ~same cost in glibc to create a pair.

It would take a single instruction per argument in the kernel to do
shift+or and maybe 1-2 more instructions to move the remaining arguments
in place (we do this for a few wrappers in arch/arm64/kernel/entry32.S).
And the glibc counterpart.

> And 8 'mov's that exist for every syscall, even yield().
> 
> > And the executing the wrappers, those have a non-trivial cost too.
> 
> The cost is pretty trivial though. See kernel/compat_wrapper.o:
> COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
> 0:   a9bf7bfdstp x29, x30, [sp,#-16]!
> 4:   910003fdmov x29, sp
> 8:   2a0003e0mov w0, w0
> c:   9400bl  0 
> 10:  a8c17bfdldp x29, x30, [sp],#16
> 14:  d65f03c0ret

I would say the above could be more expensive than 8 movs (16 bytes to
write, read, a branch and a ret). You can also add the I-cache locality,
having wrappers for each syscalls instead of a single place for zeroing
the upper half (where no other wrapper is necessary).

Can we trick the compiler into doing a tail call optimisation. This
could have simply been:

COMPAT_SYSCALL_WRAP2(creat, ...):
mov w0, w0
b   

> > Cost wise, this seems like it all cancels out in the end, but what
> > do I know?
> 
> I think you know something, and I also think Heiko and other s390 guys
> know something as well. So I'd like to listen their arguments here.
> 
> For me spark64 way is looking reasonable only because it's really simple
> and takes less coding. I'll try it on some branch and share here what 
> happened.

The kernel code will definitely look simpler ;). It would be good to see
if there actually is any performance impact. Even with 16 more cycles on
syscall entry, would they be lost in the noise? You don't need a full
implementation, just some dummy mov x0, x0 on the entry path.

-- 
Catalin


[PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Long Li
The block sector size should be in unit of 512 bytes, not in bytes.

Signed-off-by: Long Li 

---
 drivers/scsi/sd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 428c03e..4bce17e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (sdkp->opt_xfer_blocks &&
sdkp->opt_xfer_blocks <= dev_max &&
sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-   sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
-   rw_max = q->limits.io_opt =
+   sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
+   q->limits.io_opt =
sdkp->opt_xfer_blocks * sdp->sector_size;
+   rw_max = (q->limits.io_opt >> 9);
+   }
else
rw_max = BLK_DEF_MAX_SECTORS;
 
-- 
1.8.5.6



[PATCH] Use the correct size to set block max sectors

2016-05-26 Thread Long Li
The block sector size should be in unit of 512 bytes, not in bytes.

Signed-off-by: Long Li 

---
 drivers/scsi/sd.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 428c03e..4bce17e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2862,9 +2862,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (sdkp->opt_xfer_blocks &&
sdkp->opt_xfer_blocks <= dev_max &&
sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-   sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
-   rw_max = q->limits.io_opt =
+   sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE) {
+   q->limits.io_opt =
sdkp->opt_xfer_blocks * sdp->sector_size;
+   rw_max = (q->limits.io_opt >> 9);
+   }
else
rw_max = BLK_DEF_MAX_SECTORS;
 
-- 
1.8.5.6



Regression in i915 in kernel 4.6.0-git - bisected to f21a21983ef13a031

2016-05-26 Thread Larry Finger
The latest mainline kernel (commit 3f59de0) shows a regression. The symptom is 
that as soon as the kernel is started, the display is blanked, and it is never 
turned on again. This problem was bisected to commit 
f21a21983ef13a031250c4c3f6018e29a549d0f1
("drm/i915: Splitting intel_dp_detect"). The adapter in question is the 
integrated graphics controller on a Toshiba Tecra A50 laptop. The output of 
"lspci -nnv" for that controller is as follows:



00:02.0 VGA compatible controller [0300]: Intel Corporation 4th Gen Core 
Processor Integrated Graphics Controller [8086:0416] (rev 06) (prog-if 00 [VGA 
controller])

Subsystem: Toshiba America Info Systems Device [1179:0002]
Flags: bus master, fast devsel, latency 0, IRQ 27
Memory at e000 (64-bit, non-prefetchable) [size=4M]
Memory at d000 (64-bit, prefetchable) [size=256M]
I/O ports at 4000 [size=64]
Expansion ROM at  [disabled]
Capabilities: [90] MSI: Enable+ Count=1/1 Maskable- 64bit-
Capabilities: [d0] Power Management version 2
Capabilities: [a4] PCI Advanced Features
Kernel driver in use: i915
Kernel modules: i915

If the kernel is booted with "nomodeset", the display is normal.

I will be happy to test any patches, or to answer any further questions.

Thanks,

Larry


Regression in i915 in kernel 4.6.0-git - bisected to f21a21983ef13a031

2016-05-26 Thread Larry Finger
The latest mainline kernel (commit 3f59de0) shows a regression. The symptom is 
that as soon as the kernel is started, the display is blanked, and it is never 
turned on again. This problem was bisected to commit 
f21a21983ef13a031250c4c3f6018e29a549d0f1
("drm/i915: Splitting intel_dp_detect"). The adapter in question is the 
integrated graphics controller on a Toshiba Tecra A50 laptop. The output of 
"lspci -nnv" for that controller is as follows:



00:02.0 VGA compatible controller [0300]: Intel Corporation 4th Gen Core 
Processor Integrated Graphics Controller [8086:0416] (rev 06) (prog-if 00 [VGA 
controller])

Subsystem: Toshiba America Info Systems Device [1179:0002]
Flags: bus master, fast devsel, latency 0, IRQ 27
Memory at e000 (64-bit, non-prefetchable) [size=4M]
Memory at d000 (64-bit, prefetchable) [size=256M]
I/O ports at 4000 [size=64]
Expansion ROM at  [disabled]
Capabilities: [90] MSI: Enable+ Count=1/1 Maskable- 64bit-
Capabilities: [d0] Power Management version 2
Capabilities: [a4] PCI Advanced Features
Kernel driver in use: i915
Kernel modules: i915

If the kernel is booted with "nomodeset", the display is normal.

I will be happy to test any patches, or to answer any further questions.

Thanks,

Larry


Re: [PATCH 3/4] dell-wmi: Add information about other WMI event codes

2016-05-26 Thread Gabriele Mazzotta
On 22/05/2016 13:36, Pali Rohár wrote:
> ACPI DSDT tables have defined other WMI codes, but does not contain any
> description when those codes are emitted. Some other codes can be found in
> logs on internet. In this patch are all which I saw, but lot of them are
> not tested properly (e.g. for duplicate events with AT keyboard). Now we
> have all WMI event codes at one place and in future after proper testing
> those codes can be correctly enabled or disabled...
> 
> Signed-off-by: Pali Rohár 
> ---
>  drivers/platform/x86/dell-wmi.c |   32 
>  1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
> index 363d927..7aac1dc 100644
> --- a/drivers/platform/x86/dell-wmi.c
> +++ b/drivers/platform/x86/dell-wmi.c
> @@ -110,6 +110,9 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>   /* BIOS error detected */
>   { KE_IGNORE, 0xe00d, { KEY_RESERVED } },
>  
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
> +

I'm interested in knowing what's the meaning of this 0xe00e. This
event is sent multiple times when I suspend/resume my laptop and
it's definitely not a keypress.

Anyway, I've been using this patch set and didn't notice any issue, so

Tested-by: Gabriele Mazzotta 

>   /* Wifi Catcher */
>   { KE_KEY,0xe011, { KEY_PROG2 } },
>  
> @@ -118,21 +121,45 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>  
>   { KE_IGNORE, 0xe020, { KEY_MUTE } },
>  
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe023, { KEY_RESERVED } }, */
> +
> + /* Untested, Dell Instant Launch key on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe024, { KEY_RESERVED } }, */
> +
>   /* Dell Instant Launch key */
>   { KE_KEY,0xe025, { KEY_PROG4 } },
>  
>   /* Audio panel key */
>   { KE_IGNORE, 0xe026, { KEY_RESERVED } },
>  
> + /* Untested, Multimedia key on Dell Vostro 3560 */
> + /* { KE_IGNORE, 0xe028, { KEY_RESERVED } }, */
> +
>   /* Dell Instant Launch key */
>   { KE_KEY,0xe029, { KEY_PROG4 } },
>  
> + /* Untested, Windows Mobility Center button on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe02a, { KEY_RESERVED } }, */
> +
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe02b, { KEY_RESERVED } }, */
> +
> + /* Untested, Dell Audio With Preset Switch button on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe02c, { KEY_RESERVED } }, */
> +
>   { KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
>   { KE_IGNORE, 0xe030, { KEY_VOLUMEUP } },
>   { KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } },
>   { KE_IGNORE, 0xe034, { KEY_KBDILLUMDOWN } },
>   { KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } },
>  
> + /* NIC Link is Up */
> + { KE_IGNORE, 0xe043, { KEY_RESERVED } },
> +
> + /* NIC Link is Down */
> + { KE_IGNORE, 0xe044, { KEY_RESERVED } },
> +
>   /*
>* This entry is very suspicious!
>* Originally Matthew Garrett created this dell-wmi driver specially for
> @@ -145,7 +172,12 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>*/
>   { KE_IGNORE, 0xe045, { KEY_NUMLOCK } },
>  
> + /* Scroll lock and also going to tablet mode on portable devices */
>   { KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } },
> +
> + /* Untested, going from tablet mode on portable devices */
> + /* { KE_IGNORE, 0xe047, { KEY_RESERVED } }, */
> +
>   { KE_IGNORE, 0xe0f7, { KEY_MUTE } },
>   { KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } },
>   { KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } },
> 


Re: [PATCH 3/4] dell-wmi: Add information about other WMI event codes

2016-05-26 Thread Gabriele Mazzotta
On 22/05/2016 13:36, Pali Rohár wrote:
> ACPI DSDT tables have defined other WMI codes, but does not contain any
> description when those codes are emitted. Some other codes can be found in
> logs on internet. In this patch are all which I saw, but lot of them are
> not tested properly (e.g. for duplicate events with AT keyboard). Now we
> have all WMI event codes at one place and in future after proper testing
> those codes can be correctly enabled or disabled...
> 
> Signed-off-by: Pali Rohár 
> ---
>  drivers/platform/x86/dell-wmi.c |   32 
>  1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
> index 363d927..7aac1dc 100644
> --- a/drivers/platform/x86/dell-wmi.c
> +++ b/drivers/platform/x86/dell-wmi.c
> @@ -110,6 +110,9 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>   /* BIOS error detected */
>   { KE_IGNORE, 0xe00d, { KEY_RESERVED } },
>  
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
> +

I'm interested in knowing what's the meaning of this 0xe00e. This
event is sent multiple times when I suspend/resume my laptop and
it's definitely not a keypress.

Anyway, I've been using this patch set and didn't notice any issue, so

Tested-by: Gabriele Mazzotta 

>   /* Wifi Catcher */
>   { KE_KEY,0xe011, { KEY_PROG2 } },
>  
> @@ -118,21 +121,45 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>  
>   { KE_IGNORE, 0xe020, { KEY_MUTE } },
>  
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe023, { KEY_RESERVED } }, */
> +
> + /* Untested, Dell Instant Launch key on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe024, { KEY_RESERVED } }, */
> +
>   /* Dell Instant Launch key */
>   { KE_KEY,0xe025, { KEY_PROG4 } },
>  
>   /* Audio panel key */
>   { KE_IGNORE, 0xe026, { KEY_RESERVED } },
>  
> + /* Untested, Multimedia key on Dell Vostro 3560 */
> + /* { KE_IGNORE, 0xe028, { KEY_RESERVED } }, */
> +
>   /* Dell Instant Launch key */
>   { KE_KEY,0xe029, { KEY_PROG4 } },
>  
> + /* Untested, Windows Mobility Center button on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe02a, { KEY_RESERVED } }, */
> +
> + /* Unknown, defined in ACPI DSDT */
> + /* { KE_IGNORE, 0xe02b, { KEY_RESERVED } }, */
> +
> + /* Untested, Dell Audio With Preset Switch button on Inspiron 7520 */
> + /* { KE_IGNORE, 0xe02c, { KEY_RESERVED } }, */
> +
>   { KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
>   { KE_IGNORE, 0xe030, { KEY_VOLUMEUP } },
>   { KE_IGNORE, 0xe033, { KEY_KBDILLUMUP } },
>   { KE_IGNORE, 0xe034, { KEY_KBDILLUMDOWN } },
>   { KE_IGNORE, 0xe03a, { KEY_CAPSLOCK } },
>  
> + /* NIC Link is Up */
> + { KE_IGNORE, 0xe043, { KEY_RESERVED } },
> +
> + /* NIC Link is Down */
> + { KE_IGNORE, 0xe044, { KEY_RESERVED } },
> +
>   /*
>* This entry is very suspicious!
>* Originally Matthew Garrett created this dell-wmi driver specially for
> @@ -145,7 +172,12 @@ static const struct key_entry dell_wmi_legacy_keymap[] 
> __initconst = {
>*/
>   { KE_IGNORE, 0xe045, { KEY_NUMLOCK } },
>  
> + /* Scroll lock and also going to tablet mode on portable devices */
>   { KE_IGNORE, 0xe046, { KEY_SCROLLLOCK } },
> +
> + /* Untested, going from tablet mode on portable devices */
> + /* { KE_IGNORE, 0xe047, { KEY_RESERVED } }, */
> +
>   { KE_IGNORE, 0xe0f7, { KEY_MUTE } },
>   { KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } },
>   { KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } },
> 


RE: [PATCH V3 1/2] i2c: qup: Fix broken dma when CONFIG_DEBUG_SG is enabled

2016-05-26 Thread Sricharan
Hi,

>> sg_set_buf expects that the buf parameter passed in should be from
>> lowmem and a valid pageframe. This is not true for pages from
>> dma_alloc_coherent which can be carveouts, hence the check fails.
>
>OK, given you mean dma_pool_alloc here, the check fails for the
>pageframe because of the pool? Is my understanding correct?
>
Yes right. Since those are carveouts, there is no valid pageframe,
 so the check fails.

>> Change allocation of sg buffers from dma_coherent memory to kzalloc
>> to fix the issue.
>
>But why can you drop the coherency?
>
 The coherency is not dropped here. dma_map/unmap used
 makes the buffer coherent before passing it to
 dmaengine. Previously it was not required.
 I can add this in description if its not clear.

 >> @@ -1268,6 +1260,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
>>  }
>>  }
>>
>> +idx = 0;
>> +
>
>This looks like an unrelated change.
Ha, wrong. This should have been in a separate patch. This was to fix a
initialization issue in dma mode. Sorry, should not have been here, will
move it to out.

Regards,
 Sricharan




RE: [PATCH V3 1/2] i2c: qup: Fix broken dma when CONFIG_DEBUG_SG is enabled

2016-05-26 Thread Sricharan
Hi,

>> sg_set_buf expects that the buf parameter passed in should be from
>> lowmem and a valid pageframe. This is not true for pages from
>> dma_alloc_coherent which can be carveouts, hence the check fails.
>
>OK, given you mean dma_pool_alloc here, the check fails for the
>pageframe because of the pool? Is my understanding correct?
>
Yes right. Since those are carveouts, there is no valid pageframe,
 so the check fails.

>> Change allocation of sg buffers from dma_coherent memory to kzalloc
>> to fix the issue.
>
>But why can you drop the coherency?
>
 The coherency is not dropped here. dma_map/unmap used
 makes the buffer coherent before passing it to
 dmaengine. Previously it was not required.
 I can add this in description if its not clear.

 >> @@ -1268,6 +1260,8 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
>>  }
>>  }
>>
>> +idx = 0;
>> +
>
>This looks like an unrelated change.
Ha, wrong. This should have been in a separate patch. This was to fix a
initialization issue in dma mode. Sorry, should not have been here, will
move it to out.

Regards,
 Sricharan




Re: [PATCH v2 08/13] ACPICA: Hardware: Add optimized access bit width support

2016-05-26 Thread Boris Ostrovsky
On 05/26/2016 12:55 PM, Boris Ostrovsky wrote:
> On 05/26/2016 12:26 PM, Jan Beulich wrote:
> Boris Ostrovsky  05/25/16 9:17 PM >>>
>>> On 05/05/2016 12:58 AM, Lv Zheng wrote:
 +static u8
 +acpi_hw_get_access_bit_width(struct acpi_generic_address *reg, u8 
 max_bit_width)
 +{
 +u64 address;
 +
 +if (!reg->access_width) {
 +/*
 + * Detect old register descriptors where only the bit_width field
 + * makes senses. The target address is copied to handle possible
 + * alignment issues.
 + */
 +ACPI_MOVE_64_TO_64(, >address);
 +if (!reg->bit_offset && reg->bit_width &&
 +ACPI_IS_POWER_OF_TWO(reg->bit_width) &&
 +ACPI_IS_ALIGNED(reg->bit_width, 8) &&
 +ACPI_IS_ALIGNED(address, reg->bit_width)) {
 +return (reg->bit_width);
 +} else {
 +if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
 +return (32);
>>> This (together with "... Add access_width/bit_offset support in
>>> acpi_hw_write") breaks Xen guests using older QEMU which doesn't support
>>> 4-byte IO accesses.
>>>
>>> Why not return "reg->bit_width?:max_bit_width" ? This will preserve
>>> original behavior.
>> Did you figure out why we get here in the first place, instead of taking the
>> first "return"? I.e. isn't the issue the apparently wrong use of the second
>> ACPI_IS_ALIGNED() above? Afaict it ought to be
>> ACPI_IS_ALIGNED(address, reg->bit_width / 8)...
> We are trying to access address 0x...b004 (PM1a control) so yes, fixing
> alignment check would probably resolve the problem that we are seeing now.
>
> However, for compatibility purposes we may consider not doing any checks
> and simply return bit_width if access_width is not available.


Interestingly enough I bisected what I thought was a completely
different problem to this patch as well.

Something is messed up with 32-bit dom0 booting (so no QEMU) on one
machine in my pool.  First error that I see is
pcieport :00:02.0: can't find IRQ for PCI INT A; probably buggy
MP table

I'll look at this tomorrow, hopefully.

-boris






Re: [PATCH v2 08/13] ACPICA: Hardware: Add optimized access bit width support

2016-05-26 Thread Boris Ostrovsky
On 05/26/2016 12:55 PM, Boris Ostrovsky wrote:
> On 05/26/2016 12:26 PM, Jan Beulich wrote:
> Boris Ostrovsky  05/25/16 9:17 PM >>>
>>> On 05/05/2016 12:58 AM, Lv Zheng wrote:
 +static u8
 +acpi_hw_get_access_bit_width(struct acpi_generic_address *reg, u8 
 max_bit_width)
 +{
 +u64 address;
 +
 +if (!reg->access_width) {
 +/*
 + * Detect old register descriptors where only the bit_width field
 + * makes senses. The target address is copied to handle possible
 + * alignment issues.
 + */
 +ACPI_MOVE_64_TO_64(, >address);
 +if (!reg->bit_offset && reg->bit_width &&
 +ACPI_IS_POWER_OF_TWO(reg->bit_width) &&
 +ACPI_IS_ALIGNED(reg->bit_width, 8) &&
 +ACPI_IS_ALIGNED(address, reg->bit_width)) {
 +return (reg->bit_width);
 +} else {
 +if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
 +return (32);
>>> This (together with "... Add access_width/bit_offset support in
>>> acpi_hw_write") breaks Xen guests using older QEMU which doesn't support
>>> 4-byte IO accesses.
>>>
>>> Why not return "reg->bit_width?:max_bit_width" ? This will preserve
>>> original behavior.
>> Did you figure out why we get here in the first place, instead of taking the
>> first "return"? I.e. isn't the issue the apparently wrong use of the second
>> ACPI_IS_ALIGNED() above? Afaict it ought to be
>> ACPI_IS_ALIGNED(address, reg->bit_width / 8)...
> We are trying to access address 0x...b004 (PM1a control) so yes, fixing
> alignment check would probably resolve the problem that we are seeing now.
>
> However, for compatibility purposes we may consider not doing any checks
> and simply return bit_width if access_width is not available.


Interestingly enough I bisected what I thought was a completely
different problem to this patch as well.

Something is messed up with 32-bit dom0 booting (so no QEMU) on one
machine in my pool.  First error that I see is
pcieport :00:02.0: can't find IRQ for PCI INT A; probably buggy
MP table

I'll look at this tomorrow, hopefully.

-boris






[PATCH v6r2 11/12] zsmalloc: page migration support

2016-05-26 Thread Minchan Kim
Follow up Sergey's review

>From 2deede28c91910a9d3493feae30bed507e72f213 Mon Sep 17 00:00:00 2001
From: Minchan Kim 
Date: Thu, 5 May 2016 00:01:03 +0900
Subject: [PATCH v6r2] zsmalloc: page migration support

This patch introduces run-time migration feature for zspage.

For migration, VM uses page.lru field so it would be better to not use
page.next field which is unified with page.lru for own purpose.
For that, firstly, we can get first object offset of the page via
runtime calculation instead of using page.index so we can use
page.index as link for page chaining instead of page.next.

In case of huge object, it stores handle to page.index instead of
next link of page chaining because huge object doesn't need to next
link for page chaining. So get_next_page need to identify huge
object to return NULL. For it, this patch uses PG_owner_priv_1 flag
of the page flag.

For migration, it supports three functions

* zs_page_isolate

It isolates a zspage which includes a subpage VM want to migrate
from class so anyone cannot allocate new object from the zspage.

We could try to isolate a zspage by the number of subpage so
subsequent isolation trial of other subpage of the zpsage shouldn't
fail. For that, we introduce zspage.isolated count. With that,
zs_page_isolate can know whether zspage is already isolated or not
for migration so if it is isolated for migration, subsequent
isolation trial can be successful without trying further isolation.

* zs_page_migrate

First of all, it holds write-side zspage->lock to prevent migrate other
subpage in zspage. Then, lock all objects in the page VM want to migrate.
The reason we should lock all objects in the page is due to race between
zs_map_object and zs_page_migrate.

zs_map_object   zs_page_migrate

pin_tag(handle)
obj = handle_to_obj(handle)
obj_to_location(obj, , _idx);

write_lock(>lock)
if (!trypin_tag(handle))
goto unpin_object

zspage = get_zspage(page);
read_lock(>lock);

If zs_page_migrate doesn't do trypin_tag, zs_map_object's page can
be stale by migration so it goes crash.

If it locks all of objects successfully, it copies content from
old page to new one, finally, create new zspage chain with new page.
And if it's last isolated subpage in the zspage, put the zspage back
to class.

* zs_page_putback

It returns isolated zspage to right fullness_group list if it fails to
migrate a page. If it find a zspage is ZS_EMPTY, it queues zspage
freeing to workqueue. See below about async zspage freeing.

This patch introduces asynchronous zspage free. The reason to need it
is we need page_lock to clear PG_movable but unfortunately,
zs_free path should be atomic so the apporach is try to grab page_lock.
If it got page_lock of all of pages successfully, it can free zspage
immediately. Otherwise, it queues free request and free zspage via
workqueue in process context.

If zs_free finds the zspage is isolated when it try to free zspage,
it delays the freeing until zs_page_putback finds it so it will free
free the zspage finally.

In this patch, we expand fullness_list from ZS_EMPTY to ZS_FULL.
First of all, it will use ZS_EMPTY list for delay freeing.
And with adding ZS_FULL list, it makes to identify whether zspage is
isolated or not via list_empty(>list) test.

Cc: Sergey Senozhatsky 
Signed-off-by: Minchan Kim 
---
 include/uapi/linux/magic.h |   1 +
 mm/zsmalloc.c  | 793 ++---
 2 files changed, 672 insertions(+), 122 deletions(-)

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index d829ce63529d..e398beac67b8 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -81,5 +81,6 @@
 /* Since UDF 2.01 is ISO 13346 based... */
 #define UDF_SUPER_MAGIC0x15013346
 #define BALLOON_KVM_MAGIC  0x13661366
+#define ZSMALLOC_MAGIC 0x58295829
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c6fb543cfb98..a80100db16d6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -17,14 +17,14 @@
  *
  * Usage of struct page fields:
  * page->private: points to zspage
- * page->index: offset of the first object starting in this page.
- * For the first page, this is always 0, so we use this field
- * to store handle for huge object.
- * page->next: links together all component pages of a zspage
+ * page->freelist(index): links together all component pages of a zspage
+ * For the huge page, this is always 0, so we use this field
+ * to store handle.
  *
  * Usage of struct page flags:
  * PG_private: identifies the first component page
  * PG_private2: identifies the last component page
+ * PG_owner_priv_1: indentifies the huge component page
  *
 

[PATCH v6r2 11/12] zsmalloc: page migration support

2016-05-26 Thread Minchan Kim
Follow up Sergey's review

>From 2deede28c91910a9d3493feae30bed507e72f213 Mon Sep 17 00:00:00 2001
From: Minchan Kim 
Date: Thu, 5 May 2016 00:01:03 +0900
Subject: [PATCH v6r2] zsmalloc: page migration support

This patch introduces run-time migration feature for zspage.

For migration, VM uses page.lru field so it would be better to not use
page.next field which is unified with page.lru for own purpose.
For that, firstly, we can get first object offset of the page via
runtime calculation instead of using page.index so we can use
page.index as link for page chaining instead of page.next.

In case of huge object, it stores handle to page.index instead of
next link of page chaining because huge object doesn't need to next
link for page chaining. So get_next_page need to identify huge
object to return NULL. For it, this patch uses PG_owner_priv_1 flag
of the page flag.

For migration, it supports three functions

* zs_page_isolate

It isolates a zspage which includes a subpage VM want to migrate
from class so anyone cannot allocate new object from the zspage.

We could try to isolate a zspage by the number of subpage so
subsequent isolation trial of other subpage of the zpsage shouldn't
fail. For that, we introduce zspage.isolated count. With that,
zs_page_isolate can know whether zspage is already isolated or not
for migration so if it is isolated for migration, subsequent
isolation trial can be successful without trying further isolation.

* zs_page_migrate

First of all, it holds write-side zspage->lock to prevent migrate other
subpage in zspage. Then, lock all objects in the page VM want to migrate.
The reason we should lock all objects in the page is due to race between
zs_map_object and zs_page_migrate.

zs_map_object   zs_page_migrate

pin_tag(handle)
obj = handle_to_obj(handle)
obj_to_location(obj, , _idx);

write_lock(>lock)
if (!trypin_tag(handle))
goto unpin_object

zspage = get_zspage(page);
read_lock(>lock);

If zs_page_migrate doesn't do trypin_tag, zs_map_object's page can
be stale by migration so it goes crash.

If it locks all of objects successfully, it copies content from
old page to new one, finally, create new zspage chain with new page.
And if it's last isolated subpage in the zspage, put the zspage back
to class.

* zs_page_putback

It returns isolated zspage to right fullness_group list if it fails to
migrate a page. If it find a zspage is ZS_EMPTY, it queues zspage
freeing to workqueue. See below about async zspage freeing.

This patch introduces asynchronous zspage free. The reason to need it
is we need page_lock to clear PG_movable but unfortunately,
zs_free path should be atomic so the apporach is try to grab page_lock.
If it got page_lock of all of pages successfully, it can free zspage
immediately. Otherwise, it queues free request and free zspage via
workqueue in process context.

If zs_free finds the zspage is isolated when it try to free zspage,
it delays the freeing until zs_page_putback finds it so it will free
free the zspage finally.

In this patch, we expand fullness_list from ZS_EMPTY to ZS_FULL.
First of all, it will use ZS_EMPTY list for delay freeing.
And with adding ZS_FULL list, it makes to identify whether zspage is
isolated or not via list_empty(>list) test.

Cc: Sergey Senozhatsky 
Signed-off-by: Minchan Kim 
---
 include/uapi/linux/magic.h |   1 +
 mm/zsmalloc.c  | 793 ++---
 2 files changed, 672 insertions(+), 122 deletions(-)

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index d829ce63529d..e398beac67b8 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -81,5 +81,6 @@
 /* Since UDF 2.01 is ISO 13346 based... */
 #define UDF_SUPER_MAGIC0x15013346
 #define BALLOON_KVM_MAGIC  0x13661366
+#define ZSMALLOC_MAGIC 0x58295829
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c6fb543cfb98..a80100db16d6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -17,14 +17,14 @@
  *
  * Usage of struct page fields:
  * page->private: points to zspage
- * page->index: offset of the first object starting in this page.
- * For the first page, this is always 0, so we use this field
- * to store handle for huge object.
- * page->next: links together all component pages of a zspage
+ * page->freelist(index): links together all component pages of a zspage
+ * For the huge page, this is always 0, so we use this field
+ * to store handle.
  *
  * Usage of struct page flags:
  * PG_private: identifies the first component page
  * PG_private2: identifies the last component page
+ * PG_owner_priv_1: indentifies the huge component page
  *
  */
 
@@ -49,6 +49,11 @@
 #include 
 #include 
 #include 
+#include 

Re: [PATCH] clk: rockchip: fix cclk error handing

2016-05-26 Thread Heiko Stuebner
Am Donnerstag, 26. Mai 2016, 21:49:08 schrieb Xing Zheng:
> It maybe due to a copy-paste error the error handing should be
> cclk not clk.
> 
> Reported-by: Lin Huang 
> Signed-off-by: Xing Zheng 

applied to my clk-fixes branch after adapting the commit message a bit.


Thanks for catching that
Heiko


Re: [PATCH] clk: rockchip: fix cclk error handing

2016-05-26 Thread Heiko Stuebner
Am Donnerstag, 26. Mai 2016, 21:49:08 schrieb Xing Zheng:
> It maybe due to a copy-paste error the error handing should be
> cclk not clk.
> 
> Reported-by: Lin Huang 
> Signed-off-by: Xing Zheng 

applied to my clk-fixes branch after adapting the commit message a bit.


Thanks for catching that
Heiko


Re: [GIT PULL] Ceph updates for 4.7-rc1

2016-05-26 Thread Sage Weil
On Thu, 26 May 2016, Linus Torvalds wrote:
> On Thu, May 26, 2016 at 11:31 AM, Linus Torvalds
>  wrote:
> >
> > Pulled and then immediately unpulled again.
> 
> .. and having thought it over, I ended up re-pulling again, so now
> it's going through my build test.
> 
> Consider this discussion a strong encouragement to *not* do this in
> the future - sending me pull requests at the end of the merge window
> without them having been in linux-next is a no-no, unless those pull
> requests are small and trivial (or have fixes that I'd pull even
> outside the merge window, of course).

Thank you!  We'll be sure we include things in -next well beforehand next 
time around, especially if it's a big diff like this one.

One point of clarification, though: in the past I've squashed down fixes 
discovered during testing if the branch hasn't hit a stable tree yet 
(e.g., your tree).  AIUI this is(was?) standard procedure for things in 
-next.  Do you want us to avoid squashing if we are creeping up on pull 
request time, or are you primarily interested in, say, seeing that what 
has been in -next for a while is substantially the same as what you pull, 
and has perhaps been there unmodified for at least a few days?  Or would 
you rather see fixup patches if we identify issues in the last few days of 
testing?

Thanks-
sage


Re: [GIT PULL] Ceph updates for 4.7-rc1

2016-05-26 Thread Sage Weil
On Thu, 26 May 2016, Linus Torvalds wrote:
> On Thu, May 26, 2016 at 11:31 AM, Linus Torvalds
>  wrote:
> >
> > Pulled and then immediately unpulled again.
> 
> .. and having thought it over, I ended up re-pulling again, so now
> it's going through my build test.
> 
> Consider this discussion a strong encouragement to *not* do this in
> the future - sending me pull requests at the end of the merge window
> without them having been in linux-next is a no-no, unless those pull
> requests are small and trivial (or have fixes that I'd pull even
> outside the merge window, of course).

Thank you!  We'll be sure we include things in -next well beforehand next 
time around, especially if it's a big diff like this one.

One point of clarification, though: in the past I've squashed down fixes 
discovered during testing if the branch hasn't hit a stable tree yet 
(e.g., your tree).  AIUI this is(was?) standard procedure for things in 
-next.  Do you want us to avoid squashing if we are creeping up on pull 
request time, or are you primarily interested in, say, seeing that what 
has been in -next for a while is substantially the same as what you pull, 
and has perhaps been there unmodified for at least a few days?  Or would 
you rather see fixup patches if we identify issues in the last few days of 
testing?

Thanks-
sage


Re: [PATCH v3 02/12] of: add J-Core cpu bindings

2016-05-26 Thread Rob Landley


On 05/25/2016 06:04 PM, Rich Felker wrote:
> On Wed, May 25, 2016 at 11:22:15AM +0100, Mark Rutland wrote:
>> * What state should the CPU be in when it branches to the provided
>>   address?
>>   - Must the MMU be off?
> 
> Current models are nommu.

As far as I know, we're the first nommu SMP implementation in Linux.

>> At some point, you are likely to want CPU hotplug and/or cpuidle.

There are hundreds of todo items. At the moment we're just trying to get
basic board support in for more or less the design we demoed a year ago
at https://lwn.net/Articles/647636/

The roadmap page we've posted is a summary of a summary, it doesn't even
mention the DSP designs (plural) we've talked about at some length
internally. There's a DMA engine we're not using yet. We've got various
other things like ethernet support that the cheap $50 introductory board
we're targeting as an entry point hasn't got connectors for. And we
didn't do anything for the vga or sound connectors on that board because
"boot to a shell prompt on serial prompt" was our first target. (It runs
out of initramfs, but since you need the sdcard to load linux from, and
thus the bootloader had to have an sdcard driver, it seemed a shame NOT
to have an sdcard driver in linux too.)

Other than that? Intentionally minimalist first pass.

>> We
>> didn't provision the arm64 spin-table for either of these and never
>> extended it, but you may want to put in place some discoverability now
>> to allow future OSs to use that new support while allowing current OSs
>> to retain functional (e.g. not requiring a new enable-method string).
>>
>>> +-
>>> +Cache controller node
>>> +-
>>> +
>>> +Required properties:
>>> +
>>> +- compatible: Must be "jcore,cache".
>>> +
>>> +- reg: A memory range for the cache controller registers.
>>
>> There is a well-defined memory map for the cache controller?

The icache and dcache are 8k each, have 32 byte cache lines, and they
cache (addr>>5)&255 so reading from + or - 8192 bytes will evict that
cache line due to aliasing.

The icache and dcache each have an enable and flush bit in the
processor's control register. (So 4 bits total controlling the cache, I
think? I'd have to dig up Niishi-san's slides to check.)

Each processor has its own set of control registers. Back when SMP was
first implemented there was a lot of talk among the engineers about
separating the register stuff out, because right now processor stuff and
SOC I/O devices are kinda mashed together. But "release early, release
often" won out over endlessly polishing the same stuff in-house before
showing it to anybody else. We WANT other people to suggest fixes, but
we also want basic support for what's already been working for a year
and a half to go into the vanilla kernel at some point.

>> If so, please refer to documentation for it here (either in this
>> section, or the top of this document if common with other elements
>> described herein).

During my most recent trip to Japan I sat down with the engineer who
wrote the dcache and dram controller and passed on his explanation of
them to somebody on the j-core mailing list, about halfway through this
message:

  http://lists.j-core.org/pipermail/j-core/2016-April/38.html

I've been meaning to cut that out and put it on a web page on
j-core.org, but have been busy with other things. That post also points
at comments in the VHDL source that implement the features.

That is, alas, the level of documentation we're talking about at the
moment. Better is on the todo list. In the meantime you can RTFS if you
understand vhdl, or ask the engineers on the j-core list or #j-core
freenode channel. The instruction set is based on an existing
architecture and the other SOC features in the initial release are as
minimal as we could get them and still be useful. (We've got a lot more
peripherals implemented than this release includes.)

We thought getting working code into the kernel should be a high
priority, but apparently everything has to be done before anything can
be done?

> The current version "jcore,cache" has a single 32-bit control register
> per cpu that can be used to enable/disable/flush icache and/or dcache.
> There is no finer-grained control. If/when we do larger caches in the
> future where it makes sense, there will be a new binding for it. (For
> example it may make sense to do one that matches the original SH
> memory-mapped cache interface.)

The first dache-only Linux support I did last year worked by reading an
aliased cache line from sram to evict individual cache lines, and it
turned out to have no detectable speed advantage over just blanking the
whole thing. Then doing the same "flush by aliasing" trick with icache
would have required an 8k jump table and we just went "no" and
implemented the flush bits instead, so almost all that code went away.

When we implement L2 cache for j-core, we can start caring about
granularity again, but 

Re: [PATCH v3 02/12] of: add J-Core cpu bindings

2016-05-26 Thread Rob Landley


On 05/25/2016 06:04 PM, Rich Felker wrote:
> On Wed, May 25, 2016 at 11:22:15AM +0100, Mark Rutland wrote:
>> * What state should the CPU be in when it branches to the provided
>>   address?
>>   - Must the MMU be off?
> 
> Current models are nommu.

As far as I know, we're the first nommu SMP implementation in Linux.

>> At some point, you are likely to want CPU hotplug and/or cpuidle.

There are hundreds of todo items. At the moment we're just trying to get
basic board support in for more or less the design we demoed a year ago
at https://lwn.net/Articles/647636/

The roadmap page we've posted is a summary of a summary, it doesn't even
mention the DSP designs (plural) we've talked about at some length
internally. There's a DMA engine we're not using yet. We've got various
other things like ethernet support that the cheap $50 introductory board
we're targeting as an entry point hasn't got connectors for. And we
didn't do anything for the vga or sound connectors on that board because
"boot to a shell prompt on serial prompt" was our first target. (It runs
out of initramfs, but since you need the sdcard to load linux from, and
thus the bootloader had to have an sdcard driver, it seemed a shame NOT
to have an sdcard driver in linux too.)

Other than that? Intentionally minimalist first pass.

>> We
>> didn't provision the arm64 spin-table for either of these and never
>> extended it, but you may want to put in place some discoverability now
>> to allow future OSs to use that new support while allowing current OSs
>> to retain functional (e.g. not requiring a new enable-method string).
>>
>>> +-
>>> +Cache controller node
>>> +-
>>> +
>>> +Required properties:
>>> +
>>> +- compatible: Must be "jcore,cache".
>>> +
>>> +- reg: A memory range for the cache controller registers.
>>
>> There is a well-defined memory map for the cache controller?

The icache and dcache are 8k each, have 32 byte cache lines, and they
cache (addr>>5)&255 so reading from + or - 8192 bytes will evict that
cache line due to aliasing.

The icache and dcache each have an enable and flush bit in the
processor's control register. (So 4 bits total controlling the cache, I
think? I'd have to dig up Niishi-san's slides to check.)

Each processor has its own set of control registers. Back when SMP was
first implemented there was a lot of talk among the engineers about
separating the register stuff out, because right now processor stuff and
SOC I/O devices are kinda mashed together. But "release early, release
often" won out over endlessly polishing the same stuff in-house before
showing it to anybody else. We WANT other people to suggest fixes, but
we also want basic support for what's already been working for a year
and a half to go into the vanilla kernel at some point.

>> If so, please refer to documentation for it here (either in this
>> section, or the top of this document if common with other elements
>> described herein).

During my most recent trip to Japan I sat down with the engineer who
wrote the dcache and dram controller and passed on his explanation of
them to somebody on the j-core mailing list, about halfway through this
message:

  http://lists.j-core.org/pipermail/j-core/2016-April/38.html

I've been meaning to cut that out and put it on a web page on
j-core.org, but have been busy with other things. That post also points
at comments in the VHDL source that implement the features.

That is, alas, the level of documentation we're talking about at the
moment. Better is on the todo list. In the meantime you can RTFS if you
understand vhdl, or ask the engineers on the j-core list or #j-core
freenode channel. The instruction set is based on an existing
architecture and the other SOC features in the initial release are as
minimal as we could get them and still be useful. (We've got a lot more
peripherals implemented than this release includes.)

We thought getting working code into the kernel should be a high
priority, but apparently everything has to be done before anything can
be done?

> The current version "jcore,cache" has a single 32-bit control register
> per cpu that can be used to enable/disable/flush icache and/or dcache.
> There is no finer-grained control. If/when we do larger caches in the
> future where it makes sense, there will be a new binding for it. (For
> example it may make sense to do one that matches the original SH
> memory-mapped cache interface.)

The first dache-only Linux support I did last year worked by reading an
aliased cache line from sram to evict individual cache lines, and it
turned out to have no detectable speed advantage over just blanking the
whole thing. Then doing the same "flush by aliasing" trick with icache
would have required an 8k jump table and we just went "no" and
implemented the flush bits instead, so almost all that code went away.

When we implement L2 cache for j-core, we can start caring about
granularity again, but 

RE: [PATCH 2/7] crypto : async implementation for sha1-mb

2016-05-26 Thread Yu, Fenghua
> From: Dey, Megha
> Sent: Thursday, May 19, 2016 5:43 PM
> To: herb...@gondor.apana.org.au
> Cc: tim.c.c...@linux.intel.com; da...@davemloft.net; linux-
> cry...@vger.kernel.org; linux-kernel@vger.kernel.org; Dey, Megha
> ; Yu, Fenghua ; Megha
> Dey 
> Subject: [PATCH 2/7] crypto : async implementation for sha1-mb
> 
> From: Megha Dey 
> 
> Herbert wants the sha1-mb algorithm to have an async implementation:
> https://lkml.org/lkml/2016/4/5/286.
> Currently, sha1-mb uses an async interface for the outer algorithm and a sync
> interface for the inner algorithm. This patch introduces a async interface for
> even the inner algorithm.
> 
> Signed-off-by: Megha Dey 
> Signed-off-by: Tim Chen 
> ---
>  arch/x86/crypto/sha-mb/sha1_mb.c | 190 ++--
> ---
>  crypto/ahash.c   |   6 --
>  crypto/mcryptd.c | 117 +---
>  include/crypto/hash.h|   6 ++
>  include/crypto/internal/hash.h   |   8 +-
>  include/crypto/mcryptd.h |   8 +-
>  6 files changed, 184 insertions(+), 151 deletions(-)
> 

Hi, Herbert,

Any comment/feedback on this patch set? Is there any plan to push it to 
upstream?

Thanks.

-Fenghua


RE: [PATCH 2/7] crypto : async implementation for sha1-mb

2016-05-26 Thread Yu, Fenghua
> From: Dey, Megha
> Sent: Thursday, May 19, 2016 5:43 PM
> To: herb...@gondor.apana.org.au
> Cc: tim.c.c...@linux.intel.com; da...@davemloft.net; linux-
> cry...@vger.kernel.org; linux-kernel@vger.kernel.org; Dey, Megha
> ; Yu, Fenghua ; Megha
> Dey 
> Subject: [PATCH 2/7] crypto : async implementation for sha1-mb
> 
> From: Megha Dey 
> 
> Herbert wants the sha1-mb algorithm to have an async implementation:
> https://lkml.org/lkml/2016/4/5/286.
> Currently, sha1-mb uses an async interface for the outer algorithm and a sync
> interface for the inner algorithm. This patch introduces a async interface for
> even the inner algorithm.
> 
> Signed-off-by: Megha Dey 
> Signed-off-by: Tim Chen 
> ---
>  arch/x86/crypto/sha-mb/sha1_mb.c | 190 ++--
> ---
>  crypto/ahash.c   |   6 --
>  crypto/mcryptd.c | 117 +---
>  include/crypto/hash.h|   6 ++
>  include/crypto/internal/hash.h   |   8 +-
>  include/crypto/mcryptd.h |   8 +-
>  6 files changed, 184 insertions(+), 151 deletions(-)
> 

Hi, Herbert,

Any comment/feedback on this patch set? Is there any plan to push it to 
upstream?

Thanks.

-Fenghua


Re: [RFC 1/5] usb: chipidea: Add support for Tegra20/30/114/124

2016-05-26 Thread Stephen Warren

On 05/26/2016 03:17 PM, Stephen Warren wrote:

On 05/26/2016 09:40 AM, Thierry Reding wrote:

From: Thierry Reding 

All of these Tegra SoC generations have a ChipIdea UDC IP block that can
be used for device mode communication with a host. Implement rudimentary
support that doesn't allow switching between host and device modes.


Are you sure this is correct for Tegra20? I ask because for the /host/
mode driver, there's a "has_hostpc" flag which is set to false for
Tegra20 and true for all other SoCs. In the U-Boot device mode driver
(if not in the kernel driver; I didn't check), there's a concept of "has
hostpc" too. I might expect that flag to be set the same way for both
drivers. That said, I /think/ the host and device HW are unrelated, so
it's possible has_hostpc might be set differently for them.
Unfortunately, we haven't enabled the device mode driver for any Tegra20
system in U-Boot so I can't tell whether we should enable has_hostpc for
Tegra20's device mode driver.

Still, if this code works then I guess it's likely correct...


On the other hand, it looks like the kernel device mode driver might 
auto-detect this; in core.c:hw_device_init(), I see:


reg = hw_read(ci, CAP_HCCPARAMS, HCCPARAMS_LEN) >>
__ffs(HCCPARAMS_LEN);
ci->hw_bank.lpm  = reg;

... and in host.c:host_start(), I see:

ehci->has_hostpc = ci->hw_bank.lpm;



Re: [RFC 1/5] usb: chipidea: Add support for Tegra20/30/114/124

2016-05-26 Thread Stephen Warren

On 05/26/2016 03:17 PM, Stephen Warren wrote:

On 05/26/2016 09:40 AM, Thierry Reding wrote:

From: Thierry Reding 

All of these Tegra SoC generations have a ChipIdea UDC IP block that can
be used for device mode communication with a host. Implement rudimentary
support that doesn't allow switching between host and device modes.


Are you sure this is correct for Tegra20? I ask because for the /host/
mode driver, there's a "has_hostpc" flag which is set to false for
Tegra20 and true for all other SoCs. In the U-Boot device mode driver
(if not in the kernel driver; I didn't check), there's a concept of "has
hostpc" too. I might expect that flag to be set the same way for both
drivers. That said, I /think/ the host and device HW are unrelated, so
it's possible has_hostpc might be set differently for them.
Unfortunately, we haven't enabled the device mode driver for any Tegra20
system in U-Boot so I can't tell whether we should enable has_hostpc for
Tegra20's device mode driver.

Still, if this code works then I guess it's likely correct...


On the other hand, it looks like the kernel device mode driver might 
auto-detect this; in core.c:hw_device_init(), I see:


reg = hw_read(ci, CAP_HCCPARAMS, HCCPARAMS_LEN) >>
__ffs(HCCPARAMS_LEN);
ci->hw_bank.lpm  = reg;

... and in host.c:host_start(), I see:

ehci->has_hostpc = ci->hw_bank.lpm;



[PATCH 1/5] lib/mpi: mpi_read_from_buffer(): return error code

2016-05-26 Thread Nicolai Stange
mpi_read_from_buffer() reads a MPI from a buffer into a newly allocated
MPI instance. It expects the buffer's leading two bytes to contain the
number of bits, followed by the actual payload.

On failure, it returns NULL and updates the in/out argument ret_nread
somewhat inconsistently:
- If the given buffer is too short to contain the leading two bytes
  encoding the number of bits or their value is unsupported, then
  ret_nread will be cleared.
- If the allocation of the resulting MPI instance fails, ret_nread is left
  as is.

The only user of mpi_read_from_buffer(), digsig_verify_rsa(), simply checks
for a return value of NULL and returns -ENOMEM if that happens.

While this is all of cosmetic nature only, there is another error condition
which currently isn't detectable by the caller of mpi_read_from_buffer():
if the given buffer is too small to hold the number of bits as encoded in
its first two bytes, the return value will be non-NULL and *ret_nread > 0.

In preparation of communicating this condition to the caller, let
mpi_read_from_buffer() return error values by means of the ERR_PTR()
mechanism.

Make the sole caller of mpi_read_from_buffer(), digsig_verify_rsa(),
check the return value for IS_ERR() rather than == NULL. If IS_ERR() is
true, return the associated error value rather than the fixed -ENOMEM.

Signed-off-by: Nicolai Stange 
---
 lib/digsig.c   | 12 
 lib/mpi/mpicoder.c |  6 +++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1..a121cbc 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,16 +104,18 @@ static int digsig_verify_rsa(struct key *key,
datap = pkh->mpi;
endp = ukp->data + ukp->datalen;
 
-   err = -ENOMEM;
-
for (i = 0; i < pkh->nmpi; i++) {
unsigned int remaining = endp - datap;
pkey[i] = mpi_read_from_buffer(datap, );
-   if (!pkey[i])
+   if (IS_ERR(pkey[i])) {
+   err = PTR_ERR(pkey[i]);
goto err;
+   }
datap += remaining;
}
 
+   err = -ENOMEM;
+
mblen = mpi_get_nbits(pkey[0]);
mlen = DIV_ROUND_UP(mblen, 8);
 
@@ -126,8 +128,10 @@ static int digsig_verify_rsa(struct key *key,
 
nret = siglen;
in = mpi_read_from_buffer(sig, );
-   if (!in)
+   if (IS_ERR(in)) {
+   err = PTR_ERR(in);
goto err;
+   }
 
res = mpi_alloc(mpi_get_nlimbs(in) * 2);
if (!res)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f..275c71e 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -88,12 +88,12 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned 
*ret_nread)
MPI val = NULL;
 
if (*ret_nread < 2)
-   goto leave;
+   return ERR_PTR(-EINVAL);
nbits = buffer[0] << 8 | buffer[1];
 
if (nbits > MAX_EXTERN_MPI_BITS) {
pr_info("MPI: mpi too large (%u bits)\n", nbits);
-   goto leave;
+   return ERR_PTR(-EINVAL);
}
buffer += 2;
nread = 2;
@@ -102,7 +102,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned 
*ret_nread)
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
if (!val)
-   return NULL;
+   return ERR_PTR(-ENOMEM);
i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
i %= BYTES_PER_MPI_LIMB;
val->nbits = nbits;
-- 
2.8.2



[PATCH 1/5] lib/mpi: mpi_read_from_buffer(): return error code

2016-05-26 Thread Nicolai Stange
mpi_read_from_buffer() reads a MPI from a buffer into a newly allocated
MPI instance. It expects the buffer's leading two bytes to contain the
number of bits, followed by the actual payload.

On failure, it returns NULL and updates the in/out argument ret_nread
somewhat inconsistently:
- If the given buffer is too short to contain the leading two bytes
  encoding the number of bits or their value is unsupported, then
  ret_nread will be cleared.
- If the allocation of the resulting MPI instance fails, ret_nread is left
  as is.

The only user of mpi_read_from_buffer(), digsig_verify_rsa(), simply checks
for a return value of NULL and returns -ENOMEM if that happens.

While this is all of cosmetic nature only, there is another error condition
which currently isn't detectable by the caller of mpi_read_from_buffer():
if the given buffer is too small to hold the number of bits as encoded in
its first two bytes, the return value will be non-NULL and *ret_nread > 0.

In preparation of communicating this condition to the caller, let
mpi_read_from_buffer() return error values by means of the ERR_PTR()
mechanism.

Make the sole caller of mpi_read_from_buffer(), digsig_verify_rsa(),
check the return value for IS_ERR() rather than == NULL. If IS_ERR() is
true, return the associated error value rather than the fixed -ENOMEM.

Signed-off-by: Nicolai Stange 
---
 lib/digsig.c   | 12 
 lib/mpi/mpicoder.c |  6 +++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1..a121cbc 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,16 +104,18 @@ static int digsig_verify_rsa(struct key *key,
datap = pkh->mpi;
endp = ukp->data + ukp->datalen;
 
-   err = -ENOMEM;
-
for (i = 0; i < pkh->nmpi; i++) {
unsigned int remaining = endp - datap;
pkey[i] = mpi_read_from_buffer(datap, );
-   if (!pkey[i])
+   if (IS_ERR(pkey[i])) {
+   err = PTR_ERR(pkey[i]);
goto err;
+   }
datap += remaining;
}
 
+   err = -ENOMEM;
+
mblen = mpi_get_nbits(pkey[0]);
mlen = DIV_ROUND_UP(mblen, 8);
 
@@ -126,8 +128,10 @@ static int digsig_verify_rsa(struct key *key,
 
nret = siglen;
in = mpi_read_from_buffer(sig, );
-   if (!in)
+   if (IS_ERR(in)) {
+   err = PTR_ERR(in);
goto err;
+   }
 
res = mpi_alloc(mpi_get_nlimbs(in) * 2);
if (!res)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f..275c71e 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -88,12 +88,12 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned 
*ret_nread)
MPI val = NULL;
 
if (*ret_nread < 2)
-   goto leave;
+   return ERR_PTR(-EINVAL);
nbits = buffer[0] << 8 | buffer[1];
 
if (nbits > MAX_EXTERN_MPI_BITS) {
pr_info("MPI: mpi too large (%u bits)\n", nbits);
-   goto leave;
+   return ERR_PTR(-EINVAL);
}
buffer += 2;
nread = 2;
@@ -102,7 +102,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned 
*ret_nread)
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
if (!val)
-   return NULL;
+   return ERR_PTR(-ENOMEM);
i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
i %= BYTES_PER_MPI_LIMB;
val->nbits = nbits;
-- 
2.8.2



<    1   2   3   4   5   6   7   8   9   >