[PATCH] ipmi: Stop timers before cleaning up the module

2017-11-14 Thread Masamitsu Yamazaki
System may crash after unloading ipmi_si.ko module
because a timer may remain and fire after the module cleaned up resources.

cleanup_one_si() contains the following processing.

/*
 * Make sure that interrupts, the timer and the thread are
 * stopped and will not run again.
 */
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
wait_for_timer_and_thread(to_clean);

/*
 * Timeouts are stopped, now make sure the interrupts are off
 * in the BMC.  Note that timers and CPU interrupts are off,
 * so no need for locks.
 */
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
}

si_state changes as following in the while loop calling poll(to_clean).

  SI_GETTING_MESSAGES
=> SI_CHECKING_ENABLES
 => SI_SETTING_ENABLES
  => SI_GETTING_EVENTS
   => SI_NORMAL

As written in the code comments above,
timers are expected to stop before the polling loop and not to run again.
But the timer is set again in the following process
when si_state becomes SI_SETTING_ENABLES.

  => poll
 => smi_event_handler
   => handle_transaction_done
  // smi_info->si_state == SI_SETTING_ENABLES
 => start_getting_events
   => start_new_msg
=> smi_mod_timer
  => mod_timer

As a result, before the timer set in start_new_msg() expires, 
the polling loop may see si_state becoming SI_NORMAL 
and the module clean-up finishes.

For example, hard LOCKUP and panic occurred as following.
smi_timeout was called after smi_event_handler,
kcs_event and hangs at port_inb()
trying to access I/O port after release.

#11 [88069fdc5ef0] end_repeat_nmi at 816ac8d3
[exception RIP: port_inb+19]
RIP: c0473053  RSP: 88069fdc3d80  RFLAGS: 0006
RAX: 8806800f8e00  RBX: 880682bd9400  RCX: 
RDX: 0ca3  RSI: 0ca3  RDI: 8806800f8e40
RBP: 88069fdc3d80   R8: 81d86dfc   R9: 81e36426
R10: 000509f0  R11: 0010  R12: 00]:00
R13:   R14: 0246  R15: 8806800f8e00
ORIG_RAX:   CS: 0010  SS: 
---  ---
#12 [88069fdc3d80] port_inb at c0473053 [ipmi_si]
#13 [88069fdc3d88] kcs_event at c0477952 [ipmi_si]
#14 [88069fdc3db0] smi_event_handler at c047465d [ipmi_si]
#15 [88069fdc3df0] smi_timeout at c0474f9e [ipmi_si]

To fix the problem I defined a flag, timer_can_start, 
as member of struct smi_info.
The flag is enabled immediately after initializing the timer
and disabled immediately before waiting for timer deletion.

Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
Signed-off-by: Yamazaki Masamitsu 



diff -Nurp a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
--- a/drivers/char/ipmi/ipmi_si_intf.c  2017-11-09 15:00:31.436926440 +0900
+++ b/drivers/char/ipmi/ipmi_si_intf.c  2017-11-13 14:14:02.399051610 +0900
@@ -242,6 +242,9 @@ struct smi_info {
/* The timer for this si. */
struct timer_list   si_timer;
 
+   /* This flag is set, if the timer can be set */
+   booltimer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't 
enough) */
booltimer_running;
 
@@ -417,6 +420,8 @@ out:
 
 static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
 {
+   if (!smi_info->timer_can_start)
+   return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -436,21 +441,18 @@ static void start_new_msg(struct smi_inf
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
 }
 
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
 {
unsigned char msg[2];
 
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
-   if (start_timer)
-   start_new_msg(smi_info, msg, 2);
-   else
-   smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+   start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
 {
unsigned char msg[3];
 
@@ -459,10 +461,7 @@ static void start_clear_flags(struct smi
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
 
-   if (start_timer)
-   start_new_msg(smi_info, msg, 3);
-   else
-   

[PATCH] ipmi: Stop timers before cleaning up the module

2017-11-14 Thread Masamitsu Yamazaki
System may crash after unloading ipmi_si.ko module
because a timer may remain and fire after the module cleaned up resources.

cleanup_one_si() contains the following processing.

/*
 * Make sure that interrupts, the timer and the thread are
 * stopped and will not run again.
 */
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
wait_for_timer_and_thread(to_clean);

/*
 * Timeouts are stopped, now make sure the interrupts are off
 * in the BMC.  Note that timers and CPU interrupts are off,
 * so no need for locks.
 */
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
}

si_state changes as following in the while loop calling poll(to_clean).

  SI_GETTING_MESSAGES
=> SI_CHECKING_ENABLES
 => SI_SETTING_ENABLES
  => SI_GETTING_EVENTS
   => SI_NORMAL

As written in the code comments above,
timers are expected to stop before the polling loop and not to run again.
But the timer is set again in the following process
when si_state becomes SI_SETTING_ENABLES.

  => poll
 => smi_event_handler
   => handle_transaction_done
  // smi_info->si_state == SI_SETTING_ENABLES
 => start_getting_events
   => start_new_msg
=> smi_mod_timer
  => mod_timer

As a result, before the timer set in start_new_msg() expires, 
the polling loop may see si_state becoming SI_NORMAL 
and the module clean-up finishes.

For example, hard LOCKUP and panic occurred as following.
smi_timeout was called after smi_event_handler,
kcs_event and hangs at port_inb()
trying to access I/O port after release.

#11 [88069fdc5ef0] end_repeat_nmi at 816ac8d3
[exception RIP: port_inb+19]
RIP: c0473053  RSP: 88069fdc3d80  RFLAGS: 0006
RAX: 8806800f8e00  RBX: 880682bd9400  RCX: 
RDX: 0ca3  RSI: 0ca3  RDI: 8806800f8e40
RBP: 88069fdc3d80   R8: 81d86dfc   R9: 81e36426
R10: 000509f0  R11: 0010  R12: 00]:00
R13:   R14: 0246  R15: 8806800f8e00
ORIG_RAX:   CS: 0010  SS: 
---  ---
#12 [88069fdc3d80] port_inb at c0473053 [ipmi_si]
#13 [88069fdc3d88] kcs_event at c0477952 [ipmi_si]
#14 [88069fdc3db0] smi_event_handler at c047465d [ipmi_si]
#15 [88069fdc3df0] smi_timeout at c0474f9e [ipmi_si]

To fix the problem I defined a flag, timer_can_start, 
as member of struct smi_info.
The flag is enabled immediately after initializing the timer
and disabled immediately before waiting for timer deletion.

Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
Signed-off-by: Yamazaki Masamitsu 



diff -Nurp a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
--- a/drivers/char/ipmi/ipmi_si_intf.c  2017-11-09 15:00:31.436926440 +0900
+++ b/drivers/char/ipmi/ipmi_si_intf.c  2017-11-13 14:14:02.399051610 +0900
@@ -242,6 +242,9 @@ struct smi_info {
/* The timer for this si. */
struct timer_list   si_timer;
 
+   /* This flag is set, if the timer can be set */
+   booltimer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't 
enough) */
booltimer_running;
 
@@ -417,6 +420,8 @@ out:
 
 static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
 {
+   if (!smi_info->timer_can_start)
+   return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -436,21 +441,18 @@ static void start_new_msg(struct smi_inf
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
 }
 
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
 {
unsigned char msg[2];
 
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
-   if (start_timer)
-   start_new_msg(smi_info, msg, 2);
-   else
-   smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+   start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
 {
unsigned char msg[3];
 
@@ -459,10 +461,7 @@ static void start_clear_flags(struct smi
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
 
-   if (start_timer)
-   start_new_msg(smi_info, msg, 3);
-   else
-   

[PATCH] refcount_t: documentation for memory ordering differences

2017-11-14 Thread Elena Reshetova
Some functions from refcount_t API provide different
memory ordering guarantees that their atomic counterparts.
This adds a document outlining these differences.

Signed-off-by: Elena Reshetova 
---
 Documentation/refcount-vs-atomic.txt | 124 +++
 1 file changed, 124 insertions(+)
 create mode 100644 Documentation/refcount-vs-atomic.txt

diff --git a/Documentation/refcount-vs-atomic.txt 
b/Documentation/refcount-vs-atomic.txt
new file mode 100644
index 000..e703039
--- /dev/null
+++ b/Documentation/refcount-vs-atomic.txt
@@ -0,0 +1,124 @@
+==
+refcount_t API compare to atomic_t
+==
+
+The goal of refcount_t API is to provide a minimal API for implementing
+object's reference counters. While a generic architecture-independent
+implementation from lib/refcount.c uses atomic operations underneath,
+there are a number of differences between some of the refcount_*() and
+atomic_*() functions with regards to the memory ordering guarantees.
+This document outlines the differences and provides respective examples
+in order to help maintainers validate their code against the change in
+these memory ordering guarantees.
+
+memory-barriers.txt and atomic_t.txt provide more background to the
+memory ordering in general and for atomic operations specifically.
+
+Notation
+
+
+An absence of memory ordering guarantees (i.e. fully unordered)
+in case of atomics & refcounters only provides atomicity and
+program order (po) relation (on the same CPU). It guarantees that
+each atomic_*() and refcount_*() operation is atomic and instructions
+are executed in program order on a single CPU.
+Implemented using READ_ONCE()/WRITE_ONCE() and
+compare-and-swap primitives.
+
+A strong (full) memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before any po-later instruction is executed on the same CPU.
+It also guarantees that all po-earlier stores on the same CPU
+and all propagated stores from other CPUs must propagate to all
+other CPUs before any po-later instruction is executed on the original
+CPU (A-cumulative property). Implemented using smp_mb().
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). Implemented using smp_store_release().
+
+A control dependency (on success) for refcounters guarantees that
+if a reference for an object was successfully obtained (reference
+counter increment or addition happened, function returned true),
+then further stores are ordered against this operation.
+Control dependency on stores are not implemented using any explicit
+barriers, but rely on CPU not to speculate on stores. This is only
+a single CPU relation and provides no guarantees for other CPUs.
+
+
+Comparison of functions
+==
+
+case 1) - non-RMW ops
+-
+
+Function changes:
+atomic_set() --> refcount_set()
+atomic_read() --> refcount_read()
+
+Memory ordering guarantee changes:
+fully unordered --> fully unordered
+
+case 2) - increment-based ops that return no value
+--
+
+Function changes:
+atomic_inc() --> refcount_inc()
+atomic_add() --> refcount_add()
+
+Memory ordering guarantee changes:
+fully unordered --> fully unordered
+
+
+case 3) - decrement-based RMW ops that return no value
+--
+Function changes:
+atomic_dec() --> refcount_dec()
+
+Memory ordering guarantee changes:
+fully unordered --> RELEASE ordering
+
+
+case 4) - increment-based RMW ops that return a value
+-
+
+Function changes:
+atomic_inc_not_zero() --> refcount_inc_not_zero()
+no atomic counterpart --> refcount_add_not_zero()
+
+Memory ordering guarantees changes:
+fully ordered --> control dependency on success for stores
+
+*Note*: we really assume here that necessary ordering is provided as a result
+of obtaining pointer to the object!
+
+
+case 5) - decrement-based RMW ops that return a value
+-
+
+Function changes:
+atomic_dec_and_test() --> refcount_dec_and_test()
+atomic_sub_and_test() --> refcount_sub_and_test()
+no atomic counterpart --> refcount_dec_if_one()
+atomic_add_unless(, -1, 1) --> refcount_dec_not_one()
+
+Memory ordering guarantees changes:
+fully 

[PATCH] refcount_t: documentation for memory ordering differences

2017-11-14 Thread Elena Reshetova
Some functions from refcount_t API provide different
memory ordering guarantees that their atomic counterparts.
This adds a document outlining these differences.

Signed-off-by: Elena Reshetova 
---
 Documentation/refcount-vs-atomic.txt | 124 +++
 1 file changed, 124 insertions(+)
 create mode 100644 Documentation/refcount-vs-atomic.txt

diff --git a/Documentation/refcount-vs-atomic.txt 
b/Documentation/refcount-vs-atomic.txt
new file mode 100644
index 000..e703039
--- /dev/null
+++ b/Documentation/refcount-vs-atomic.txt
@@ -0,0 +1,124 @@
+==
+refcount_t API compare to atomic_t
+==
+
+The goal of refcount_t API is to provide a minimal API for implementing
+object's reference counters. While a generic architecture-independent
+implementation from lib/refcount.c uses atomic operations underneath,
+there are a number of differences between some of the refcount_*() and
+atomic_*() functions with regards to the memory ordering guarantees.
+This document outlines the differences and provides respective examples
+in order to help maintainers validate their code against the change in
+these memory ordering guarantees.
+
+memory-barriers.txt and atomic_t.txt provide more background to the
+memory ordering in general and for atomic operations specifically.
+
+Notation
+
+
+An absence of memory ordering guarantees (i.e. fully unordered)
+in case of atomics & refcounters only provides atomicity and
+program order (po) relation (on the same CPU). It guarantees that
+each atomic_*() and refcount_*() operation is atomic and instructions
+are executed in program order on a single CPU.
+Implemented using READ_ONCE()/WRITE_ONCE() and
+compare-and-swap primitives.
+
+A strong (full) memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before any po-later instruction is executed on the same CPU.
+It also guarantees that all po-earlier stores on the same CPU
+and all propagated stores from other CPUs must propagate to all
+other CPUs before any po-later instruction is executed on the original
+CPU (A-cumulative property). Implemented using smp_mb().
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). Implemented using smp_store_release().
+
+A control dependency (on success) for refcounters guarantees that
+if a reference for an object was successfully obtained (reference
+counter increment or addition happened, function returned true),
+then further stores are ordered against this operation.
+Control dependency on stores are not implemented using any explicit
+barriers, but rely on CPU not to speculate on stores. This is only
+a single CPU relation and provides no guarantees for other CPUs.
+
+
+Comparison of functions
+==
+
+case 1) - non-RMW ops
+-
+
+Function changes:
+atomic_set() --> refcount_set()
+atomic_read() --> refcount_read()
+
+Memory ordering guarantee changes:
+fully unordered --> fully unordered
+
+case 2) - increment-based ops that return no value
+--
+
+Function changes:
+atomic_inc() --> refcount_inc()
+atomic_add() --> refcount_add()
+
+Memory ordering guarantee changes:
+fully unordered --> fully unordered
+
+
+case 3) - decrement-based RMW ops that return no value
+--
+Function changes:
+atomic_dec() --> refcount_dec()
+
+Memory ordering guarantee changes:
+fully unordered --> RELEASE ordering
+
+
+case 4) - increment-based RMW ops that return a value
+-
+
+Function changes:
+atomic_inc_not_zero() --> refcount_inc_not_zero()
+no atomic counterpart --> refcount_add_not_zero()
+
+Memory ordering guarantees changes:
+fully ordered --> control dependency on success for stores
+
+*Note*: we really assume here that necessary ordering is provided as a result
+of obtaining pointer to the object!
+
+
+case 5) - decrement-based RMW ops that return a value
+-
+
+Function changes:
+atomic_dec_and_test() --> refcount_dec_and_test()
+atomic_sub_and_test() --> refcount_sub_and_test()
+no atomic counterpart --> refcount_dec_if_one()
+atomic_add_unless(, -1, 1) --> refcount_dec_not_one()
+
+Memory ordering guarantees changes:
+fully ordered --> RELEASE ordering 

Re: [PATCH v6 2/2] watchdog: Add Spreadtrum watchdog driver

2017-11-14 Thread Eric Long
Hi,

Thanks for Guenter's review and detail comments.
Please help to apply this patch if there is no any other comments.

Best regards,
Eric Long

On Fri, Nov 10, 2017 at 01:00:32PM -0800, Guenter Roeck wrote:
> On Mon, Nov 06, 2017 at 10:46:28AM +0800, Eric Long wrote:
> > This patch adds the watchdog driver for Spreadtrum SC9860 platform.
> > 
> > Signed-off-by: Eric Long 
> 
> Reviewed-by: Guenter Roeck 
> 
> > ---
> > Changes since v5:
> >  - Modify the "irq" type as int type.
> >  - Delete unused api sprd_wdt_is_running().
> > 
> > Changes since v4:
> >  - Remove sprd_wdt_remove().
> >  - Add devm_add_action() for sprd_wdt_disable().
> > 
> > Changes since v3:
> >  - Update Kconfig SPRD_WATCHDOG help messages.
> >  - Correct the wrong spell words.
> >  - Rename SPRD_WDT_CNT_HIGH_VALUE as SPRD_WDT_CNT_HIGH_SHIFT.
> >  - Remove unused macor.
> >  - Update sprd_wdt_set_pretimeout() api.
> >  - Add wdt->wdd.timeout default value.
> >  - Use devm_watchdog_register_device() to register wdt device.
> >  - If module does not support NOWAYOUT, disable wdt when remove this driver.
> >  - Call sprd_wdt_disable() every wdt suspend.
> > 
> > Changes since v2:
> >  - Rename all the macors, add SPRD tag at the head of the macro names.
> >  - Rename SPRD_WDT_CLK as SPRD_WTC_CNT_STEP.
> >  - Remove the code which check timeout value at the wrong place.
> >  - Add min/max timeout value limit.
> >  - Remove set WDOG_HW_RUNNING status at sprd_wdt_enable().
> >  - Add timeout/pretimeout judgment when set them.
> >  - Support WATCHDOG_NOWAYOUT status.
> > 
> > Changes since v1:
> >  - Use pretimeout instead of own implementation.
> >  - Fix timeout loop when loading timeout values.
> >  - use the infrastructure to read and set "timeout-sec" property.
> >  - Add conditions when start or stop watchdog.
> >  - Change the position of enabling watchdog.
> >  - Other optimization.
> > ---
> >  drivers/watchdog/Kconfig|   8 +
> >  drivers/watchdog/Makefile   |   1 +
> >  drivers/watchdog/sprd_wdt.c | 399 
> > 
> >  3 files changed, 408 insertions(+)
> >  create mode 100644 drivers/watchdog/sprd_wdt.c
> > 
> > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> > index c722cbf..3367a8c 100644
> > --- a/drivers/watchdog/Kconfig
> > +++ b/drivers/watchdog/Kconfig
> > @@ -787,6 +787,14 @@ config UNIPHIER_WATCHDOG
> >   To compile this driver as a module, choose M here: the
> >   module will be called uniphier_wdt.
> >  
> > +config SPRD_WATCHDOG
> > +   tristate "Spreadtrum watchdog support"
> > +   depends on ARCH_SPRD || COMPILE_TEST
> > +   select WATCHDOG_CORE
> > +   help
> > + Say Y here to include watchdog timer supported
> > + by Spreadtrum system.
> > +
> >  # AVR32 Architecture
> >  
> >  config AT32AP700X_WDT
> > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> > index 56adf9f..187cca2 100644
> > --- a/drivers/watchdog/Makefile
> > +++ b/drivers/watchdog/Makefile
> > @@ -87,6 +87,7 @@ obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
> >  obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
> >  obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
> >  obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o
> > +obj-$(CONFIG_SPRD_WATCHDOG) += sprd_wdt.o
> >  
> >  # AVR32 Architecture
> >  obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
> > diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
> > new file mode 100644
> > index 000..a8b280f
> > --- /dev/null
> > +++ b/drivers/watchdog/sprd_wdt.c
> > @@ -0,0 +1,399 @@
> > +/*
> > + * Spreadtrum watchdog driver
> > + * Copyright (C) 2017 Spreadtrum - http://www.spreadtrum.com
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * version 2 as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful, but
> > + * WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * General Public License for more details.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define SPRD_WDT_LOAD_LOW  0x0
> > +#define SPRD_WDT_LOAD_HIGH 0x4
> > +#define SPRD_WDT_CTRL  0x8
> > +#define SPRD_WDT_INT_CLR   0xc
> > +#define SPRD_WDT_INT_RAW   0x10
> > +#define SPRD_WDT_INT_MSK   0x14
> > +#define SPRD_WDT_CNT_LOW   0x18
> > +#define SPRD_WDT_CNT_HIGH  0x1c
> > +#define SPRD_WDT_LOCK  0x20
> > +#define SPRD_WDT_IRQ_LOAD_LOW  0x2c
> > +#define SPRD_WDT_IRQ_LOAD_HIGH 0x30
> > +
> > +/* WDT_CTRL */
> > +#define SPRD_WDT_INT_EN_BIT

Re: [PATCH v6 2/2] watchdog: Add Spreadtrum watchdog driver

2017-11-14 Thread Eric Long
Hi,

Thanks for Guenter's review and detail comments.
Please help to apply this patch if there is no any other comments.

Best regards,
Eric Long

On Fri, Nov 10, 2017 at 01:00:32PM -0800, Guenter Roeck wrote:
> On Mon, Nov 06, 2017 at 10:46:28AM +0800, Eric Long wrote:
> > This patch adds the watchdog driver for Spreadtrum SC9860 platform.
> > 
> > Signed-off-by: Eric Long 
> 
> Reviewed-by: Guenter Roeck 
> 
> > ---
> > Changes since v5:
> >  - Modify the "irq" type as int type.
> >  - Delete unused api sprd_wdt_is_running().
> > 
> > Changes since v4:
> >  - Remove sprd_wdt_remove().
> >  - Add devm_add_action() for sprd_wdt_disable().
> > 
> > Changes since v3:
> >  - Update Kconfig SPRD_WATCHDOG help messages.
> >  - Correct the wrong spell words.
> >  - Rename SPRD_WDT_CNT_HIGH_VALUE as SPRD_WDT_CNT_HIGH_SHIFT.
> >  - Remove unused macor.
> >  - Update sprd_wdt_set_pretimeout() api.
> >  - Add wdt->wdd.timeout default value.
> >  - Use devm_watchdog_register_device() to register wdt device.
> >  - If module does not support NOWAYOUT, disable wdt when remove this driver.
> >  - Call sprd_wdt_disable() every wdt suspend.
> > 
> > Changes since v2:
> >  - Rename all the macors, add SPRD tag at the head of the macro names.
> >  - Rename SPRD_WDT_CLK as SPRD_WTC_CNT_STEP.
> >  - Remove the code which check timeout value at the wrong place.
> >  - Add min/max timeout value limit.
> >  - Remove set WDOG_HW_RUNNING status at sprd_wdt_enable().
> >  - Add timeout/pretimeout judgment when set them.
> >  - Support WATCHDOG_NOWAYOUT status.
> > 
> > Changes since v1:
> >  - Use pretimeout instead of own implementation.
> >  - Fix timeout loop when loading timeout values.
> >  - use the infrastructure to read and set "timeout-sec" property.
> >  - Add conditions when start or stop watchdog.
> >  - Change the position of enabling watchdog.
> >  - Other optimization.
> > ---
> >  drivers/watchdog/Kconfig|   8 +
> >  drivers/watchdog/Makefile   |   1 +
> >  drivers/watchdog/sprd_wdt.c | 399 
> > 
> >  3 files changed, 408 insertions(+)
> >  create mode 100644 drivers/watchdog/sprd_wdt.c
> > 
> > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> > index c722cbf..3367a8c 100644
> > --- a/drivers/watchdog/Kconfig
> > +++ b/drivers/watchdog/Kconfig
> > @@ -787,6 +787,14 @@ config UNIPHIER_WATCHDOG
> >   To compile this driver as a module, choose M here: the
> >   module will be called uniphier_wdt.
> >  
> > +config SPRD_WATCHDOG
> > +   tristate "Spreadtrum watchdog support"
> > +   depends on ARCH_SPRD || COMPILE_TEST
> > +   select WATCHDOG_CORE
> > +   help
> > + Say Y here to include watchdog timer supported
> > + by Spreadtrum system.
> > +
> >  # AVR32 Architecture
> >  
> >  config AT32AP700X_WDT
> > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> > index 56adf9f..187cca2 100644
> > --- a/drivers/watchdog/Makefile
> > +++ b/drivers/watchdog/Makefile
> > @@ -87,6 +87,7 @@ obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
> >  obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
> >  obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
> >  obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o
> > +obj-$(CONFIG_SPRD_WATCHDOG) += sprd_wdt.o
> >  
> >  # AVR32 Architecture
> >  obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
> > diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
> > new file mode 100644
> > index 000..a8b280f
> > --- /dev/null
> > +++ b/drivers/watchdog/sprd_wdt.c
> > @@ -0,0 +1,399 @@
> > +/*
> > + * Spreadtrum watchdog driver
> > + * Copyright (C) 2017 Spreadtrum - http://www.spreadtrum.com
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * version 2 as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful, but
> > + * WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * General Public License for more details.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#define SPRD_WDT_LOAD_LOW  0x0
> > +#define SPRD_WDT_LOAD_HIGH 0x4
> > +#define SPRD_WDT_CTRL  0x8
> > +#define SPRD_WDT_INT_CLR   0xc
> > +#define SPRD_WDT_INT_RAW   0x10
> > +#define SPRD_WDT_INT_MSK   0x14
> > +#define SPRD_WDT_CNT_LOW   0x18
> > +#define SPRD_WDT_CNT_HIGH  0x1c
> > +#define SPRD_WDT_LOCK  0x20
> > +#define SPRD_WDT_IRQ_LOAD_LOW  0x2c
> > +#define SPRD_WDT_IRQ_LOAD_HIGH 0x30
> > +
> > +/* WDT_CTRL */
> > +#define SPRD_WDT_INT_EN_BITBIT(0)
> > +#define SPRD_WDT_CNT_EN_BIT 

[PATCH v2] refcount_t vs. atomic_t ordering differences

2017-11-14 Thread Elena Reshetova
Changes in v2:

 - typos and english are fixed based on Randy Dunlap's
   proof reading
 - structure of document improved: 
 * definitions now in the beginning
 * confusing examples removed
 * less redundancy overall and more up-to-the-point text
 - definitions try to follow LKMM defined in
   github.com/aparri/memory-model/blob/master/Documentation/explanation.txt


Elena Reshetova (1):
  refcount_t: documentation for memory ordering differences

 Documentation/refcount-vs-atomic.txt | 124 +++
 1 file changed, 124 insertions(+)
 create mode 100644 Documentation/refcount-vs-atomic.txt

-- 
2.7.4



[PATCH v2] refcount_t vs. atomic_t ordering differences

2017-11-14 Thread Elena Reshetova
Changes in v2:

 - typos and english are fixed based on Randy Dunlap's
   proof reading
 - structure of document improved: 
 * definitions now in the beginning
 * confusing examples removed
 * less redundancy overall and more up-to-the-point text
 - definitions try to follow LKMM defined in
   github.com/aparri/memory-model/blob/master/Documentation/explanation.txt


Elena Reshetova (1):
  refcount_t: documentation for memory ordering differences

 Documentation/refcount-vs-atomic.txt | 124 +++
 1 file changed, 124 insertions(+)
 create mode 100644 Documentation/refcount-vs-atomic.txt

-- 
2.7.4



Re: [PATCH] x86: use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz" again

2017-11-14 Thread Greg Kroah-Hartman
On Wed, Nov 15, 2017 at 08:43:58AM +0100, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki  wrote:
> 
> > On Wednesday, November 15, 2017 1:06:12 AM CET Linus Torvalds wrote:
> > > On Tue, Nov 14, 2017 at 4:04 PM, Linus Torvalds
> > >  wrote:
> > > > On Tue, Nov 14, 2017 at 3:53 PM, Thomas Gleixner  
> > > > wrote:
> > > >> Current head + Raphaels patch:
> > > >>
> > > >> real0m0.029s
> > > >> user0m0.000s
> > > >> sys 0m0.010s
> > > >>
> > > >> So that patch is actually slower.
> > > >
> > > > Oh it definitely is expected to be slower, because it does the IPI to
> > > > all the cores and actually gets their frequency right.
> > > >
> > > > It was the old one that we had to revert (because it did so
> > > > sequentially) that was really bad, and took something like 2+ seconds
> > > > on Ingo's 160-core thing, iirc.
> > > 
> > > Looked it up. Ingo's machine "only" had 120 cores, and he said
> > > 
> > > fomalhaut:~> time cat /proc/cpuinfo  >/dev/null
> > > real0m2.689s
> > > 
> > > for the bad serial case, so yeah, it looks "a bit" better than it was ;)
> > 
> > OK, so may I queue it up?
> > 
> > I don't think I can get that to work substantially faster anyway ...
> 
> The new version is OK I suppose:
> 
>   Acked-by: Ingo Molnar 
> 
> I also think that /proc/cpuinfo is a pretty bad interface for many uses - I 
> personally only very rarely need the cpuinfo of _all_ CPUs.
> 
> We we should eventually have /proc/cpu/N/info or so, so that 99% of the times 
> cpuinfo is needed to report bugs we can do:
> 
>   cat /proc/cpu/0/info
> 
> With maybe also the following variants:
> 
>   /proc/cpu/first/
>   /proc/cpu/last/
>   /proc/cpu/current/
> 
> ... to the first/last/current CPUs.

We started to move this info into /sys/devices/cpu/ in individual files,
but that got stalled due to a lack of review and general "freak out" by
the ARM maintainers :)

Hopefully that patch set will come back soon so people can review it
properly.

thanks,

greg k-h


Re: [PATCH] x86: use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz" again

2017-11-14 Thread Greg Kroah-Hartman
On Wed, Nov 15, 2017 at 08:43:58AM +0100, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki  wrote:
> 
> > On Wednesday, November 15, 2017 1:06:12 AM CET Linus Torvalds wrote:
> > > On Tue, Nov 14, 2017 at 4:04 PM, Linus Torvalds
> > >  wrote:
> > > > On Tue, Nov 14, 2017 at 3:53 PM, Thomas Gleixner  
> > > > wrote:
> > > >> Current head + Raphaels patch:
> > > >>
> > > >> real0m0.029s
> > > >> user0m0.000s
> > > >> sys 0m0.010s
> > > >>
> > > >> So that patch is actually slower.
> > > >
> > > > Oh it definitely is expected to be slower, because it does the IPI to
> > > > all the cores and actually gets their frequency right.
> > > >
> > > > It was the old one that we had to revert (because it did so
> > > > sequentially) that was really bad, and took something like 2+ seconds
> > > > on Ingo's 160-core thing, iirc.
> > > 
> > > Looked it up. Ingo's machine "only" had 120 cores, and he said
> > > 
> > > fomalhaut:~> time cat /proc/cpuinfo  >/dev/null
> > > real0m2.689s
> > > 
> > > for the bad serial case, so yeah, it looks "a bit" better than it was ;)
> > 
> > OK, so may I queue it up?
> > 
> > I don't think I can get that to work substantially faster anyway ...
> 
> The new version is OK I suppose:
> 
>   Acked-by: Ingo Molnar 
> 
> I also think that /proc/cpuinfo is a pretty bad interface for many uses - I 
> personally only very rarely need the cpuinfo of _all_ CPUs.
> 
> We we should eventually have /proc/cpu/N/info or so, so that 99% of the times 
> cpuinfo is needed to report bugs we can do:
> 
>   cat /proc/cpu/0/info
> 
> With maybe also the following variants:
> 
>   /proc/cpu/first/
>   /proc/cpu/last/
>   /proc/cpu/current/
> 
> ... to the first/last/current CPUs.

We started to move this info into /sys/devices/cpu/ in individual files,
but that got stalled due to a lack of review and general "freak out" by
the ARM maintainers :)

Hopefully that patch set will come back soon so people can review it
properly.

thanks,

greg k-h


Re: [PATCH 02/10] dmaengine: virt-dma: Support for race free transfer termination

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> Even with the introduced vchan_synchronize() we can face race when
> terminating a cyclic transfer.
>
> If the terminate_all is called after the interrupt handler called
> vchan_cyclic_callback(), but before the vchan_complete tasklet is called:
> vc->cyclic is set to the cyclic descriptor, but the descriptor itself was
> freed up in the driver's terminate_all() callback.
> When the vhan_complete() is executed it will try to fetch the vc->cyclic
> vdesc, but the pointer is pointing now to uninitialized memory leading to
> (hard to reproduce) kernel crash.
>
> In order to fix this, drivers should:
> - call vchan_terminate_vdesc() from their terminate_all callback instead
> calling their free_desc function to free up the descriptor.
> - implement device_synchronize callback and call vchan_synchronize().
>
> This way we can make sure that the descriptor is only going to be freed up
> after the vchan_callback was executed in a safe manner.
>
> Signed-off-by: Peter Ujfalusi 

Reviewed-by: Linus Walleij 

Yours,
Linus Walleij


Re: [PATCH 02/10] dmaengine: virt-dma: Support for race free transfer termination

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> Even with the introduced vchan_synchronize() we can face race when
> terminating a cyclic transfer.
>
> If the terminate_all is called after the interrupt handler called
> vchan_cyclic_callback(), but before the vchan_complete tasklet is called:
> vc->cyclic is set to the cyclic descriptor, but the descriptor itself was
> freed up in the driver's terminate_all() callback.
> When the vhan_complete() is executed it will try to fetch the vc->cyclic
> vdesc, but the pointer is pointing now to uninitialized memory leading to
> (hard to reproduce) kernel crash.
>
> In order to fix this, drivers should:
> - call vchan_terminate_vdesc() from their terminate_all callback instead
> calling their free_desc function to free up the descriptor.
> - implement device_synchronize callback and call vchan_synchronize().
>
> This way we can make sure that the descriptor is only going to be freed up
> after the vchan_callback was executed in a safe manner.
>
> Signed-off-by: Peter Ujfalusi 

Reviewed-by: Linus Walleij 

Yours,
Linus Walleij


Re: [GIT PULL] x86 updates for v4.15

2017-11-14 Thread Ingo Molnar

* Linus Torvalds  wrote:

> On Tue, Nov 14, 2017 at 1:48 AM, Borislav Petkov  wrote:
> >
> > Just did 2 suspend cycles (once to RAM and once to disk) on my x230
> > with your tree from right now and it looks ok so far. So it could be
> > machine- and config-specific...
> 
> .. and it's not repeatable for me. I rebooted pretty quickly, and
> didn't gather a lot of information (well, 'dmesg' would SIBGUS, so..)
> and it hasn't happened again.
> 
> Will ignore until I have more information.

Haven't seen such behavior or got such reports - although admittedly laptop 
suspend/resume testing is done only sporadically, as it isn't easily automated.

As per the symptoms one thing that _could_ produce SIGSEGVs are the 
CONFIG_X86_INTEL_UMIP changes: the upcoming changes that make any UMIP action 
more 
verbose should make it more apparent if that's the case.

Plus, of course, anything entry code related. We did a few harmless 
(looking...) 
x86/mm changes as well, but none stands out at the moment.

Thanks,

Ingo


Re: [GIT PULL] x86 updates for v4.15

2017-11-14 Thread Ingo Molnar

* Linus Torvalds  wrote:

> On Tue, Nov 14, 2017 at 1:48 AM, Borislav Petkov  wrote:
> >
> > Just did 2 suspend cycles (once to RAM and once to disk) on my x230
> > with your tree from right now and it looks ok so far. So it could be
> > machine- and config-specific...
> 
> .. and it's not repeatable for me. I rebooted pretty quickly, and
> didn't gather a lot of information (well, 'dmesg' would SIBGUS, so..)
> and it hasn't happened again.
> 
> Will ignore until I have more information.

Haven't seen such behavior or got such reports - although admittedly laptop 
suspend/resume testing is done only sporadically, as it isn't easily automated.

As per the symptoms one thing that _could_ produce SIGSEGVs are the 
CONFIG_X86_INTEL_UMIP changes: the upcoming changes that make any UMIP action 
more 
verbose should make it more apparent if that's the case.

Plus, of course, anything entry code related. We did a few harmless 
(looking...) 
x86/mm changes as well, but none stands out at the moment.

Thanks,

Ingo


Re: [PATCH 07/10] dmaengine: amba-pl08x: Use vchan_terminate_vdesc() instead of desc_free

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> To avoid race with vchan_complete, use the race free way to terminate
> running transfer.
>
> Implement the device_synchronize callback to make sure that the terminated
> descriptor is freed.
>
> CC: Linus Walleij 
> Signed-off-by: Peter Ujfalusi 

I had to read patch 1 before I understood how the descriptor
gets free:ed now, but now I see it :)
Reviewed-by: Linus Walleij 

(Good work with hunting down these corner cases, I'm
very happy you're doing this!)

Yours,
Linus Walleij


Re: [PATCH 07/10] dmaengine: amba-pl08x: Use vchan_terminate_vdesc() instead of desc_free

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> To avoid race with vchan_complete, use the race free way to terminate
> running transfer.
>
> Implement the device_synchronize callback to make sure that the terminated
> descriptor is freed.
>
> CC: Linus Walleij 
> Signed-off-by: Peter Ujfalusi 

I had to read patch 1 before I understood how the descriptor
gets free:ed now, but now I see it :)
Reviewed-by: Linus Walleij 

(Good work with hunting down these corner cases, I'm
very happy you're doing this!)

Yours,
Linus Walleij


Re: [PATCH 01/10] dmaengine: virt-dma: Add helper to free/reuse a descriptor

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> The vchan_vdesc_fini() can be used to free or reuse a given descriptor
> after it has been marked as completed.
>
> Signed-off-by: Peter Ujfalusi 

Reviewed-by: Linus Walleij 

Yours,
Linus Walleij


Re: [PATCH 01/10] dmaengine: virt-dma: Add helper to free/reuse a descriptor

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 3:32 PM, Peter Ujfalusi  wrote:

> The vchan_vdesc_fini() can be used to free or reuse a given descriptor
> after it has been marked as completed.
>
> Signed-off-by: Peter Ujfalusi 

Reviewed-by: Linus Walleij 

Yours,
Linus Walleij


Re: [f2fs-dev] [PATCH] f2fs: expose quota information in debugfs

2017-11-14 Thread Chao Yu
Hi Jaegeuk,

On 2017/11/14 13:12, Jaegeuk Kim wrote:
> This patch shows # of dirty pages and # of hidden quota files.
> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/debug.c | 11 +++
>  fs/f2fs/f2fs.h  | 10 --
>  2 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index f7eec506ceea..ecada8425268 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>   si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
>   si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
>   si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
> + si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
>   si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>   si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>   si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> +
> + si->nquota_files = 0;
> + if (f2fs_sb_has_quota_ino(sbi->sb)) {
> + for (i = 0; i < MAXQUOTAS; i++) {
> + if (f2fs_qf_ino(sbi->sb, i))
> + si->nquota_files++;
> + }
> + }
>   si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>   si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
>   si->aw_cnt = atomic_read(>aw_cnt);
> @@ -369,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
>  si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
>   seq_printf(s, "  - datas: %4d in files:%4d\n",
>  si->ndirty_data, si->ndirty_files);
> + seq_printf(s, "  - quota datas: %4d in quota files:%4d\n",
> +si->ndirty_qdata, si->nquota_files);
>   seq_printf(s, "  - meta: %4d in %4d\n",
>  si->ndirty_meta, si->meta_pages);
>   seq_printf(s, "  - imeta: %4d\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c379a8ea075..44f874483ecf 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -865,6 +865,7 @@ struct f2fs_sm_info {
>  enum count_type {
>   F2FS_DIRTY_DENTS,
>   F2FS_DIRTY_DATA,
> + F2FS_DIRTY_QDATA,
>   F2FS_DIRTY_NODES,
>   F2FS_DIRTY_META,
>   F2FS_INMEM_PAGES,
> @@ -1642,6 +1643,8 @@ static inline void inode_inc_dirty_pages(struct inode 
> *inode)
>   atomic_inc(_I(inode)->dirty_pages);
>   inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
>   F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
> + if (IS_NOQUOTA(inode))

If we're trying to get quota sysfile information, how about using sysfile ino
for distinguishing from normal file?

Thanks,

> + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
>  }
>  
>  static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
> @@ -1658,6 +1661,8 @@ static inline void inode_dec_dirty_pages(struct inode 
> *inode)
>   atomic_dec(_I(inode)->dirty_pages);
>   dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
>   F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
> + if (IS_NOQUOTA(inode))
> + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
>  }
>  
>  static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
> @@ -2771,9 +2776,10 @@ struct f2fs_stat_info {
>   unsigned long long hit_largest, hit_cached, hit_rbtree;
>   unsigned long long hit_total, total_ext;
>   int ext_tree, zombie_tree, ext_node;
> - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
> + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> + int ndirty_data, ndirty_qdata;
>   int inmem_pages;
> - unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> + unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
>   int nats, dirty_nats, sits, dirty_sits;
>   int free_nids, avail_nids, alloc_nids;
>   int total_count, utilization;
> 



Re: [f2fs-dev] [PATCH] f2fs: expose quota information in debugfs

2017-11-14 Thread Chao Yu
Hi Jaegeuk,

On 2017/11/14 13:12, Jaegeuk Kim wrote:
> This patch shows # of dirty pages and # of hidden quota files.
> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/debug.c | 11 +++
>  fs/f2fs/f2fs.h  | 10 --
>  2 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index f7eec506ceea..ecada8425268 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>   si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
>   si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
>   si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
> + si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
>   si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
>   si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
>   si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
> +
> + si->nquota_files = 0;
> + if (f2fs_sb_has_quota_ino(sbi->sb)) {
> + for (i = 0; i < MAXQUOTAS; i++) {
> + if (f2fs_qf_ino(sbi->sb, i))
> + si->nquota_files++;
> + }
> + }
>   si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
>   si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
>   si->aw_cnt = atomic_read(>aw_cnt);
> @@ -369,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
>  si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
>   seq_printf(s, "  - datas: %4d in files:%4d\n",
>  si->ndirty_data, si->ndirty_files);
> + seq_printf(s, "  - quota datas: %4d in quota files:%4d\n",
> +si->ndirty_qdata, si->nquota_files);
>   seq_printf(s, "  - meta: %4d in %4d\n",
>  si->ndirty_meta, si->meta_pages);
>   seq_printf(s, "  - imeta: %4d\n",
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c379a8ea075..44f874483ecf 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -865,6 +865,7 @@ struct f2fs_sm_info {
>  enum count_type {
>   F2FS_DIRTY_DENTS,
>   F2FS_DIRTY_DATA,
> + F2FS_DIRTY_QDATA,
>   F2FS_DIRTY_NODES,
>   F2FS_DIRTY_META,
>   F2FS_INMEM_PAGES,
> @@ -1642,6 +1643,8 @@ static inline void inode_inc_dirty_pages(struct inode 
> *inode)
>   atomic_inc(_I(inode)->dirty_pages);
>   inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
>   F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
> + if (IS_NOQUOTA(inode))

If we're trying to get quota sysfile information, how about using sysfile ino
for distinguishing from normal file?

Thanks,

> + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
>  }
>  
>  static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
> @@ -1658,6 +1661,8 @@ static inline void inode_dec_dirty_pages(struct inode 
> *inode)
>   atomic_dec(_I(inode)->dirty_pages);
>   dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
>   F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
> + if (IS_NOQUOTA(inode))
> + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
>  }
>  
>  static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
> @@ -2771,9 +2776,10 @@ struct f2fs_stat_info {
>   unsigned long long hit_largest, hit_cached, hit_rbtree;
>   unsigned long long hit_total, total_ext;
>   int ext_tree, zombie_tree, ext_node;
> - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
> + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> + int ndirty_data, ndirty_qdata;
>   int inmem_pages;
> - unsigned int ndirty_dirs, ndirty_files, ndirty_all;
> + unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
>   int nats, dirty_nats, sits, dirty_sits;
>   int free_nids, avail_nids, alloc_nids;
>   int total_count, utilization;
> 



Re: [PATCH] samples: replace FSF address with web source in license notices

2017-11-14 Thread Martin Kepplinger

Am 15.11.2017 07:29 schrieb Greg KH:

On Tue, Nov 14, 2017 at 10:50:37AM +0100, Martin Kepplinger wrote:
A few years ago the FSF moved and "59 Temple Place" is wrong. Having 
this

still in our source files feels old and unmaintained.

Let's take the license statement serious and not confuse users.

As https://www.gnu.org/licenses/gpl-howto.html suggests, we replace 
the

postal address with "" in the samples
directory.


What would be best is to just put the SPDX single line at the top of 
the

files, and then remove this license "boilerplate" entirely.  I've
started to do that with some subsystems already (drivers/usb/ and
drivers/tty/ are almost finished, see Linus's tree for details), and
I've sent out a patch series for drivers/s390/ yesterday if you want to
see an example of how to do it.

Could you do that here instead of this patch as well?



Is there consensus about this? I'm not a layer, but is this clear enough 
for
useres? And what holds against only adding the new SPDX tag line at the 
top?


Other than I don't like mixing // and /**/ comments, it indeed looks
quite clean. Is there consensus about the syntax too?

thanks

   martin



Re: [PATCH] samples: replace FSF address with web source in license notices

2017-11-14 Thread Martin Kepplinger

Am 15.11.2017 07:29 schrieb Greg KH:

On Tue, Nov 14, 2017 at 10:50:37AM +0100, Martin Kepplinger wrote:
A few years ago the FSF moved and "59 Temple Place" is wrong. Having 
this

still in our source files feels old and unmaintained.

Let's take the license statement serious and not confuse users.

As https://www.gnu.org/licenses/gpl-howto.html suggests, we replace 
the

postal address with "" in the samples
directory.


What would be best is to just put the SPDX single line at the top of 
the

files, and then remove this license "boilerplate" entirely.  I've
started to do that with some subsystems already (drivers/usb/ and
drivers/tty/ are almost finished, see Linus's tree for details), and
I've sent out a patch series for drivers/s390/ yesterday if you want to
see an example of how to do it.

Could you do that here instead of this patch as well?



Is there consensus about this? I'm not a layer, but is this clear enough 
for
useres? And what holds against only adding the new SPDX tag line at the 
top?


Other than I don't like mixing // and /**/ comments, it indeed looks
quite clean. Is there consensus about the syntax too?

thanks

   martin



Re: [RFC PATCH v3 for 4.15 08/24] Provide cpu_opv system call

2017-11-14 Thread Michael Kerrisk (man-pages)
Hi Matthieu

On 14 November 2017 at 21:03, Mathieu Desnoyers
 wrote:
> This new cpu_opv system call executes a vector of operations on behalf
> of user-space on a specific CPU with preemption disabled. It is inspired
> from readv() and writev() system calls which take a "struct iovec" array
> as argument.

Do you have a man page spfr this syscall already?

Thanks,

Michael


> The operations available are: comparison, memcpy, add, or, and, xor,
> left shift, right shift, and mb. The system call receives a CPU number
> from user-space as argument, which is the CPU on which those operations
> need to be performed. All preparation steps such as loading pointers,
> and applying offsets to arrays, need to be performed by user-space
> before invoking the system call. The "comparison" operation can be used
> to check that the data used in the preparation step did not change
> between preparation of system call inputs and operation execution within
> the preempt-off critical section.
>
> The reason why we require all pointer offsets to be calculated by
> user-space beforehand is because we need to use get_user_pages_fast() to
> first pin all pages touched by each operation. This takes care of
> faulting-in the pages. Then, preemption is disabled, and the operations
> are performed atomically with respect to other thread execution on that
> CPU, without generating any page fault.
>
> A maximum limit of 16 operations per cpu_opv syscall invocation is
> enforced, so user-space cannot generate a too long preempt-off critical
> section. Each operation is also limited a length of PAGE_SIZE bytes,
> meaning that an operation can touch a maximum of 4 pages (memcpy: 2
> pages for source, 2 pages for destination if addresses are not aligned
> on page boundaries). Moreover, a total limit of 4216 bytes is applied
> to operation lengths.
>
> If the thread is not running on the requested CPU, a new
> push_task_to_cpu() is invoked to migrate the task to the requested CPU.
> If the requested CPU is not part of the cpus allowed mask of the thread,
> the system call fails with EINVAL. After the migration has been
> performed, preemption is disabled, and the current CPU number is checked
> again and compared to the requested CPU number. If it still differs, it
> means the scheduler migrated us away from that CPU. Return EAGAIN to
> user-space in that case, and let user-space retry (either requesting the
> same CPU number, or a different one, depending on the user-space
> algorithm constraints).
>
> Signed-off-by: Mathieu Desnoyers 
> CC: "Paul E. McKenney" 
> CC: Peter Zijlstra 
> CC: Paul Turner 
> CC: Thomas Gleixner 
> CC: Andrew Hunter 
> CC: Andy Lutomirski 
> CC: Andi Kleen 
> CC: Dave Watson 
> CC: Chris Lameter 
> CC: Ingo Molnar 
> CC: "H. Peter Anvin" 
> CC: Ben Maurer 
> CC: Steven Rostedt 
> CC: Josh Triplett 
> CC: Linus Torvalds 
> CC: Andrew Morton 
> CC: Russell King 
> CC: Catalin Marinas 
> CC: Will Deacon 
> CC: Michael Kerrisk 
> CC: Boqun Feng 
> CC: linux-...@vger.kernel.org
> ---
>
> Changes since v1:
> - handle CPU hotplug,
> - cleanup implementation using function pointers: We can use function
>   pointers to implement the operations rather than duplicating all the
>   user-access code.
> - refuse device pages: Performing cpu_opv operations on io map'd pages
>   with preemption disabled could generate long preempt-off critical
>   sections, which leads to unwanted scheduler latency. Return EFAULT if
>   a device page is received as parameter
> - restrict op vector to 4216 bytes length sum: Restrict the operation
>   vector to length sum of:
>   - 4096 bytes (typical page size on most architectures, should be
> enough for a string, or structures)
>   - 15 * 8 bytes (typical operations on integers or pointers).
>   The goal here is to keep the duration of preempt off critical section
>   short, so we don't add significant scheduler latency.
> - Add INIT_ONSTACK macro: Introduce the
>   CPU_OP_FIELD_u32_u64_INIT_ONSTACK() macros to ensure that users
>   correctly initialize the upper bits of CPU_OP_FIELD_u32_u64() on their
>   stack to 0 on 32-bit architectures.
> - Add CPU_MB_OP operation:
>   Use-cases with:
>   - two consecutive stores,
>   - a mempcy followed by a store,
>   require a memory barrier before the final store operation. A typical
>   use-case is a store-release on the final store. Given that this is a
>   slow path, just providing an explicit full barrier instruction should
>   be 

Re: [RFC PATCH v3 for 4.15 08/24] Provide cpu_opv system call

2017-11-14 Thread Michael Kerrisk (man-pages)
Hi Matthieu

On 14 November 2017 at 21:03, Mathieu Desnoyers
 wrote:
> This new cpu_opv system call executes a vector of operations on behalf
> of user-space on a specific CPU with preemption disabled. It is inspired
> from readv() and writev() system calls which take a "struct iovec" array
> as argument.

Do you have a man page spfr this syscall already?

Thanks,

Michael


> The operations available are: comparison, memcpy, add, or, and, xor,
> left shift, right shift, and mb. The system call receives a CPU number
> from user-space as argument, which is the CPU on which those operations
> need to be performed. All preparation steps such as loading pointers,
> and applying offsets to arrays, need to be performed by user-space
> before invoking the system call. The "comparison" operation can be used
> to check that the data used in the preparation step did not change
> between preparation of system call inputs and operation execution within
> the preempt-off critical section.
>
> The reason why we require all pointer offsets to be calculated by
> user-space beforehand is because we need to use get_user_pages_fast() to
> first pin all pages touched by each operation. This takes care of
> faulting-in the pages. Then, preemption is disabled, and the operations
> are performed atomically with respect to other thread execution on that
> CPU, without generating any page fault.
>
> A maximum limit of 16 operations per cpu_opv syscall invocation is
> enforced, so user-space cannot generate a too long preempt-off critical
> section. Each operation is also limited a length of PAGE_SIZE bytes,
> meaning that an operation can touch a maximum of 4 pages (memcpy: 2
> pages for source, 2 pages for destination if addresses are not aligned
> on page boundaries). Moreover, a total limit of 4216 bytes is applied
> to operation lengths.
>
> If the thread is not running on the requested CPU, a new
> push_task_to_cpu() is invoked to migrate the task to the requested CPU.
> If the requested CPU is not part of the cpus allowed mask of the thread,
> the system call fails with EINVAL. After the migration has been
> performed, preemption is disabled, and the current CPU number is checked
> again and compared to the requested CPU number. If it still differs, it
> means the scheduler migrated us away from that CPU. Return EAGAIN to
> user-space in that case, and let user-space retry (either requesting the
> same CPU number, or a different one, depending on the user-space
> algorithm constraints).
>
> Signed-off-by: Mathieu Desnoyers 
> CC: "Paul E. McKenney" 
> CC: Peter Zijlstra 
> CC: Paul Turner 
> CC: Thomas Gleixner 
> CC: Andrew Hunter 
> CC: Andy Lutomirski 
> CC: Andi Kleen 
> CC: Dave Watson 
> CC: Chris Lameter 
> CC: Ingo Molnar 
> CC: "H. Peter Anvin" 
> CC: Ben Maurer 
> CC: Steven Rostedt 
> CC: Josh Triplett 
> CC: Linus Torvalds 
> CC: Andrew Morton 
> CC: Russell King 
> CC: Catalin Marinas 
> CC: Will Deacon 
> CC: Michael Kerrisk 
> CC: Boqun Feng 
> CC: linux-...@vger.kernel.org
> ---
>
> Changes since v1:
> - handle CPU hotplug,
> - cleanup implementation using function pointers: We can use function
>   pointers to implement the operations rather than duplicating all the
>   user-access code.
> - refuse device pages: Performing cpu_opv operations on io map'd pages
>   with preemption disabled could generate long preempt-off critical
>   sections, which leads to unwanted scheduler latency. Return EFAULT if
>   a device page is received as parameter
> - restrict op vector to 4216 bytes length sum: Restrict the operation
>   vector to length sum of:
>   - 4096 bytes (typical page size on most architectures, should be
> enough for a string, or structures)
>   - 15 * 8 bytes (typical operations on integers or pointers).
>   The goal here is to keep the duration of preempt off critical section
>   short, so we don't add significant scheduler latency.
> - Add INIT_ONSTACK macro: Introduce the
>   CPU_OP_FIELD_u32_u64_INIT_ONSTACK() macros to ensure that users
>   correctly initialize the upper bits of CPU_OP_FIELD_u32_u64() on their
>   stack to 0 on 32-bit architectures.
> - Add CPU_MB_OP operation:
>   Use-cases with:
>   - two consecutive stores,
>   - a mempcy followed by a store,
>   require a memory barrier before the final store operation. A typical
>   use-case is a store-release on the final store. Given that this is a
>   slow path, just providing an explicit full barrier instruction should
>   be sufficient.
> - Add expect fault field:
>   The use-case of list_pop brings interesting challenges. With rseq, we
>   can use rseq_cmpnev_storeoffp_load(), and therefore load a pointer,
>   compare it against NULL, add an offset, and load the target "next"
>   pointer from the object, all within a single req critical section.
>
>   Life is not so easy for cpu_opv in this use-case, mainly because we
>   need to pin all pages we are going to touch in the preempt-off
>   critical section beforehand. So we need to 

Re: [PATCH] x86: use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz" again

2017-11-14 Thread Ingo Molnar

* Rafael J. Wysocki  wrote:

> On Wednesday, November 15, 2017 1:06:12 AM CET Linus Torvalds wrote:
> > On Tue, Nov 14, 2017 at 4:04 PM, Linus Torvalds
> >  wrote:
> > > On Tue, Nov 14, 2017 at 3:53 PM, Thomas Gleixner  
> > > wrote:
> > >> Current head + Raphaels patch:
> > >>
> > >> real0m0.029s
> > >> user0m0.000s
> > >> sys 0m0.010s
> > >>
> > >> So that patch is actually slower.
> > >
> > > Oh it definitely is expected to be slower, because it does the IPI to
> > > all the cores and actually gets their frequency right.
> > >
> > > It was the old one that we had to revert (because it did so
> > > sequentially) that was really bad, and took something like 2+ seconds
> > > on Ingo's 160-core thing, iirc.
> > 
> > Looked it up. Ingo's machine "only" had 120 cores, and he said
> > 
> > fomalhaut:~> time cat /proc/cpuinfo  >/dev/null
> > real0m2.689s
> > 
> > for the bad serial case, so yeah, it looks "a bit" better than it was ;)
> 
> OK, so may I queue it up?
> 
> I don't think I can get that to work substantially faster anyway ...

The new version is OK I suppose:

  Acked-by: Ingo Molnar 

I also think that /proc/cpuinfo is a pretty bad interface for many uses - I 
personally only very rarely need the cpuinfo of _all_ CPUs.

We we should eventually have /proc/cpu/N/info or so, so that 99% of the times 
cpuinfo is needed to report bugs we can do:

cat /proc/cpu/0/info

With maybe also the following variants:

/proc/cpu/first/
/proc/cpu/last/
/proc/cpu/current/

... to the first/last/current CPUs.

Thanks,

Ingo


Re: [PATCH] x86: use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz" again

2017-11-14 Thread Ingo Molnar

* Rafael J. Wysocki  wrote:

> On Wednesday, November 15, 2017 1:06:12 AM CET Linus Torvalds wrote:
> > On Tue, Nov 14, 2017 at 4:04 PM, Linus Torvalds
> >  wrote:
> > > On Tue, Nov 14, 2017 at 3:53 PM, Thomas Gleixner  
> > > wrote:
> > >> Current head + Raphaels patch:
> > >>
> > >> real0m0.029s
> > >> user0m0.000s
> > >> sys 0m0.010s
> > >>
> > >> So that patch is actually slower.
> > >
> > > Oh it definitely is expected to be slower, because it does the IPI to
> > > all the cores and actually gets their frequency right.
> > >
> > > It was the old one that we had to revert (because it did so
> > > sequentially) that was really bad, and took something like 2+ seconds
> > > on Ingo's 160-core thing, iirc.
> > 
> > Looked it up. Ingo's machine "only" had 120 cores, and he said
> > 
> > fomalhaut:~> time cat /proc/cpuinfo  >/dev/null
> > real0m2.689s
> > 
> > for the bad serial case, so yeah, it looks "a bit" better than it was ;)
> 
> OK, so may I queue it up?
> 
> I don't think I can get that to work substantially faster anyway ...

The new version is OK I suppose:

  Acked-by: Ingo Molnar 

I also think that /proc/cpuinfo is a pretty bad interface for many uses - I 
personally only very rarely need the cpuinfo of _all_ CPUs.

We we should eventually have /proc/cpu/N/info or so, so that 99% of the times 
cpuinfo is needed to report bugs we can do:

cat /proc/cpu/0/info

With maybe also the following variants:

/proc/cpu/first/
/proc/cpu/last/
/proc/cpu/current/

... to the first/last/current CPUs.

Thanks,

Ingo


Re: [PATCH] gpio: always include linux/gpio/consumer.h in linux/gpio.h

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 12:39 PM, Arnd Bergmann  wrote:

> linux/gpio/consumer.h is a bit odd, it contains definitions for a number
> of the advanced gpio interfaces, in variants for both gpiolib-based
> platforms and those not using gpiolib.
>
> The file gets included implicitly by linux/gpio.h, but only if gpiolib
> is enabled. Driver writers regularly fail to notice this and include
> the top-level linux/gpio.h but use the newer interfaces.
>
> The latest such driver is a new touchscreen driver that produced this
> build failure on an x86 randconfig build:
>
> drivers/input/touchscreen/hideep.c: In function 'hideep_power_on':
> drivers/input/touchscreen/hideep.c:670:3: error: implicit declaration of 
> function 'gpiod_set_value_cansleep'; did you mean 'gpio_set_value_cansleep'? 
> [-Werror=implicit-function-declaration]
>gpiod_set_value_cansleep(ts->reset_gpio, 0);
>
> I don't want to manually add linux/gpio/consumer.h inclusions to each
> such file any more, so let's just include this in linux/gpio.h for everyone.

Consumers should really just use 
and stop including  at all.

 does not have the producer/consumer split
that the new API has, and the latter was inspired by
 and 
etc.

I.e. the right fix is not just to add #include 
but also *delete* #include 

The only time a driver need both includes is when they
use the legacy GPIO API and the new consumer API
at the same time. Or if they both produce and consume
GPIOs (such as some GPIO drivers do).

I don't know what to do besides documenting it, and it is
documented clearly in:
Documentation/gpio/consumer.txt

Apparently people write their drivers for GPIO without reading
this documentation and just including random headers or
copy-pasting.

I am trying to make more drivers good examples, one at a
time, starting with the most important and used ones.
drivers/gpio/busses/i2c-gpio.c is the most recent cleanup.

We can't delete the inclusion of  from
 however much we wanted to, because it breaks
a ton of legacy code. Instead we move one step at the time.

What we *could* do is try to emit a build warning for drivers
that use the implicit include of 
from .

Or add some code to checkpatch to scream about it.

Ideas?

Yours,
Linus Walleij


Re: [PATCH] gpio: always include linux/gpio/consumer.h in linux/gpio.h

2017-11-14 Thread Linus Walleij
On Tue, Nov 14, 2017 at 12:39 PM, Arnd Bergmann  wrote:

> linux/gpio/consumer.h is a bit odd, it contains definitions for a number
> of the advanced gpio interfaces, in variants for both gpiolib-based
> platforms and those not using gpiolib.
>
> The file gets included implicitly by linux/gpio.h, but only if gpiolib
> is enabled. Driver writers regularly fail to notice this and include
> the top-level linux/gpio.h but use the newer interfaces.
>
> The latest such driver is a new touchscreen driver that produced this
> build failure on an x86 randconfig build:
>
> drivers/input/touchscreen/hideep.c: In function 'hideep_power_on':
> drivers/input/touchscreen/hideep.c:670:3: error: implicit declaration of 
> function 'gpiod_set_value_cansleep'; did you mean 'gpio_set_value_cansleep'? 
> [-Werror=implicit-function-declaration]
>gpiod_set_value_cansleep(ts->reset_gpio, 0);
>
> I don't want to manually add linux/gpio/consumer.h inclusions to each
> such file any more, so let's just include this in linux/gpio.h for everyone.

Consumers should really just use 
and stop including  at all.

 does not have the producer/consumer split
that the new API has, and the latter was inspired by
 and 
etc.

I.e. the right fix is not just to add #include 
but also *delete* #include 

The only time a driver need both includes is when they
use the legacy GPIO API and the new consumer API
at the same time. Or if they both produce and consume
GPIOs (such as some GPIO drivers do).

I don't know what to do besides documenting it, and it is
documented clearly in:
Documentation/gpio/consumer.txt

Apparently people write their drivers for GPIO without reading
this documentation and just including random headers or
copy-pasting.

I am trying to make more drivers good examples, one at a
time, starting with the most important and used ones.
drivers/gpio/busses/i2c-gpio.c is the most recent cleanup.

We can't delete the inclusion of  from
 however much we wanted to, because it breaks
a ton of legacy code. Instead we move one step at the time.

What we *could* do is try to emit a build warning for drivers
that use the implicit include of 
from .

Or add some code to checkpatch to scream about it.

Ideas?

Yours,
Linus Walleij


Re: [PATCH v3 4/6] PM / core: Add helpers for subsystem callback selection

2017-11-14 Thread Ulf Hansson
On 12 November 2017 at 01:42, Rafael J. Wysocki  wrote:
> From: Rafael J. Wysocki 
>
> Add helper routines to find and return a suitable subsystem callback
> during the "noirq" phases of system suspend/resume (or analogous)
> transitions as well as during the "late" phase of system suspend and
> the "early" phase of system resume (or analogous) transitions.
>
> The helpers will be called from additional sites going forward.
>
> Signed-off-by: Rafael J. Wysocki 

With a minor nitpick, see below, feel free to add:

Reviewed-by: Ulf Hansson 

> ---
>
> v2 -> v3: No changes.
>
> ---
>  drivers/base/power/main.c |  196 
> +++---
>  1 file changed, 136 insertions(+), 60 deletions(-)
>
> Index: linux-pm/drivers/base/power/main.c
> ===
> --- linux-pm.orig/drivers/base/power/main.c
> +++ linux-pm/drivers/base/power/main.c
> @@ -525,6 +525,14 @@ static void dpm_watchdog_clear(struct dp
>  #define dpm_watchdog_clear(x)
>  #endif
>
> +static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
> +pm_message_t state,
> +const char **info_p);
> +
> +static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
> +   pm_message_t state,
> +   const char **info_p);
> +

There is no need to declare these functions.

Perhaps a following patch in the series need them, but then that
change should add these or even better (in my opinion) just move the
implementations and avoid the declarations all together.

[...]

Kind regards
Uffe


Re: [PATCH v3 4/6] PM / core: Add helpers for subsystem callback selection

2017-11-14 Thread Ulf Hansson
On 12 November 2017 at 01:42, Rafael J. Wysocki  wrote:
> From: Rafael J. Wysocki 
>
> Add helper routines to find and return a suitable subsystem callback
> during the "noirq" phases of system suspend/resume (or analogous)
> transitions as well as during the "late" phase of system suspend and
> the "early" phase of system resume (or analogous) transitions.
>
> The helpers will be called from additional sites going forward.
>
> Signed-off-by: Rafael J. Wysocki 

With a minor nitpick, see below, feel free to add:

Reviewed-by: Ulf Hansson 

> ---
>
> v2 -> v3: No changes.
>
> ---
>  drivers/base/power/main.c |  196 
> +++---
>  1 file changed, 136 insertions(+), 60 deletions(-)
>
> Index: linux-pm/drivers/base/power/main.c
> ===
> --- linux-pm.orig/drivers/base/power/main.c
> +++ linux-pm/drivers/base/power/main.c
> @@ -525,6 +525,14 @@ static void dpm_watchdog_clear(struct dp
>  #define dpm_watchdog_clear(x)
>  #endif
>
> +static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev,
> +pm_message_t state,
> +const char **info_p);
> +
> +static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev,
> +   pm_message_t state,
> +   const char **info_p);
> +

There is no need to declare these functions.

Perhaps a following patch in the series need them, but then that
change should add these or even better (in my opinion) just move the
implementations and avoid the declarations all together.

[...]

Kind regards
Uffe


[PATCH v2] arm64: perf: remove unsupported events for Cortex-A73

2017-11-14 Thread Xu YiPing
bus access read/write events are not supported in A73, based on the
Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460).

Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73"
Signed-off-by: Xu YiPing 
---
 arch/arm64/kernel/perf_event.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9eaef51..3affca3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -262,12 +262,6 @@ static const unsigned 
armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]  = 
ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
-
-   [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
-   [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-
-   [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
-   [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-- 
2.7.4



[PATCH v2] arm64: perf: remove unsupported events for Cortex-A73

2017-11-14 Thread Xu YiPing
bus access read/write events are not supported in A73, based on the
Cortex-A73 TRM r0p2, section 11.9 Events (pages 11-457 to 11-460).

Fixes: 5561b6c5e981 "arm64: perf: add support for Cortex-A73"
Signed-off-by: Xu YiPing 
---
 arch/arm64/kernel/perf_event.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9eaef51..3affca3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -262,12 +262,6 @@ static const unsigned 
armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]  = 
ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
-
-   [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
-   [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-
-   [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
-   [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = 
ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
 };
 
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
-- 
2.7.4



Re: [PATCH v3 1/3] leds: core: Introduce generic pattern interface

2017-11-14 Thread Greg KH
On Tue, Nov 14, 2017 at 11:13:43PM -0800, Bjorn Andersson wrote:
> Some LED controllers have support for autonomously controlling
> brightness over time, according to some preprogrammed pattern or
> function.
> 
> This adds a new optional operator that LED class drivers can implement
> if they support such functionality as well as a new device attribute to
> configure the pattern for a given LED.
> 
> Signed-off-by: Bjorn Andersson 
> ---
> 
> Changes since v2:
> - None
> 
> Changes since v1:
> - New patch, based on discussions following v1
> 
>  Documentation/ABI/testing/sysfs-class-led |  20 
>  drivers/leds/led-class.c  | 150 
> ++
>  include/linux/leds.h  |  21 +
>  3 files changed, 191 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-led 
> b/Documentation/ABI/testing/sysfs-class-led
> index 5f67f7ab277b..74a7f5b1f89b 100644
> --- a/Documentation/ABI/testing/sysfs-class-led
> +++ b/Documentation/ABI/testing/sysfs-class-led
> @@ -61,3 +61,23 @@ Description:
>   gpio and backlight triggers. In case of the backlight trigger,
>   it is useful when driving a LED which is intended to indicate
>   a device in a standby like state.
> +
> +What:/sys/class/leds//pattern
> +Date:July 2017

That was many months ago :)

> +KernelVersion:   4.14

And that kernel version is long since released :)

thanks,

greg k-h


Re: [PATCH v3 1/3] leds: core: Introduce generic pattern interface

2017-11-14 Thread Greg KH
On Tue, Nov 14, 2017 at 11:13:43PM -0800, Bjorn Andersson wrote:
> Some LED controllers have support for autonomously controlling
> brightness over time, according to some preprogrammed pattern or
> function.
> 
> This adds a new optional operator that LED class drivers can implement
> if they support such functionality as well as a new device attribute to
> configure the pattern for a given LED.
> 
> Signed-off-by: Bjorn Andersson 
> ---
> 
> Changes since v2:
> - None
> 
> Changes since v1:
> - New patch, based on discussions following v1
> 
>  Documentation/ABI/testing/sysfs-class-led |  20 
>  drivers/leds/led-class.c  | 150 
> ++
>  include/linux/leds.h  |  21 +
>  3 files changed, 191 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-led 
> b/Documentation/ABI/testing/sysfs-class-led
> index 5f67f7ab277b..74a7f5b1f89b 100644
> --- a/Documentation/ABI/testing/sysfs-class-led
> +++ b/Documentation/ABI/testing/sysfs-class-led
> @@ -61,3 +61,23 @@ Description:
>   gpio and backlight triggers. In case of the backlight trigger,
>   it is useful when driving a LED which is intended to indicate
>   a device in a standby like state.
> +
> +What:/sys/class/leds//pattern
> +Date:July 2017

That was many months ago :)

> +KernelVersion:   4.14

And that kernel version is long since released :)

thanks,

greg k-h


Re: [PATCH 1/2] bpf: add a bpf_override_function helper

2017-11-14 Thread Ingo Molnar

* Josef Bacik  wrote:

> > > Then 'not crashing kernel' requirement will be preserved.
> > > btrfs or whatever else we will be testing with override_return
> > > will be functioning in 'stress test' mode and if bpf program
> > > is not careful and returns error all the time then one particular
> > > subsystem (like btrfs) will not be functional, but the kernel
> > > will not be crashing.
> > > Thoughts?
> > 
> > Yeah, that approach sounds much better to me: it should be fundamentally be 
> > opt-in, and should be documented that it should not be possible to crash 
> > the 
> > kernel via changing the return value.
> > 
> > I'd make it a bit clearer in the naming what the purpose of the annotation 
> > is: for 
> > example would BPF_ALLOW_ERROR_INJECTION() work for you guys? I.e. I think 
> > it 
> > should generally be used to change actual integer error values - or at most 
> > user 
> > pointers, but not kernel pointers. Not enforced in a type safe manner, but 
> > the 
> > naming should give enough hints?
> > 
> > Such return-injection BFR programs can still totally confuse user-space 
> > obviously: 
> > for example returning an IO error could corrupt application data - but 
> > that's the 
> > nature of such facilities and similar results could already be achieved via 
> > ptrace 
> > as well. But the result of a BPF program should never be _worse_ than 
> > ptrace, in 
> > terms of kernel integrity.
> > 
> > Note that with such a safety mechanism in place no kernel message has to be 
> > generated either I suspect.
> > 
> > In any case, my NAK would be lifted with such an approach.
> 
> I'm going to want to annotate kmalloc, so it's still going to be possible to
> make things go horribly wrong, is this still going to be ok with you?  
> Obviously
> I want to use this for btrfs, but really what I used this for originally was 
> an
> NBD problem where I had to do special handling for getting EINTR back from
> kernel_sendmsg, which was a pain to trigger properly without this patch.  
> Opt-in
> is going to make it so we're just flagging important function calls anwyay
> because those are the ones that fail rarely and that we want to test, which 
> puts
> us back in the same situation you are worried about, so it doesn't make much
> sense to me to do it this way.  Thanks,

I suppose - let's see how it goes? The important factor is the opt-in aspect I 
believe.

Technically the kernel should never crash on a kmalloc() failure either, 
although 
obviously things can go horribly wrong from user-space's perspective.

Thanks,

Ingo


Re: [PATCH 1/2] bpf: add a bpf_override_function helper

2017-11-14 Thread Ingo Molnar

* Josef Bacik  wrote:

> > > Then 'not crashing kernel' requirement will be preserved.
> > > btrfs or whatever else we will be testing with override_return
> > > will be functioning in 'stress test' mode and if bpf program
> > > is not careful and returns error all the time then one particular
> > > subsystem (like btrfs) will not be functional, but the kernel
> > > will not be crashing.
> > > Thoughts?
> > 
> > Yeah, that approach sounds much better to me: it should be fundamentally be 
> > opt-in, and should be documented that it should not be possible to crash 
> > the 
> > kernel via changing the return value.
> > 
> > I'd make it a bit clearer in the naming what the purpose of the annotation 
> > is: for 
> > example would BPF_ALLOW_ERROR_INJECTION() work for you guys? I.e. I think 
> > it 
> > should generally be used to change actual integer error values - or at most 
> > user 
> > pointers, but not kernel pointers. Not enforced in a type safe manner, but 
> > the 
> > naming should give enough hints?
> > 
> > Such return-injection BFR programs can still totally confuse user-space 
> > obviously: 
> > for example returning an IO error could corrupt application data - but 
> > that's the 
> > nature of such facilities and similar results could already be achieved via 
> > ptrace 
> > as well. But the result of a BPF program should never be _worse_ than 
> > ptrace, in 
> > terms of kernel integrity.
> > 
> > Note that with such a safety mechanism in place no kernel message has to be 
> > generated either I suspect.
> > 
> > In any case, my NAK would be lifted with such an approach.
> 
> I'm going to want to annotate kmalloc, so it's still going to be possible to
> make things go horribly wrong, is this still going to be ok with you?  
> Obviously
> I want to use this for btrfs, but really what I used this for originally was 
> an
> NBD problem where I had to do special handling for getting EINTR back from
> kernel_sendmsg, which was a pain to trigger properly without this patch.  
> Opt-in
> is going to make it so we're just flagging important function calls anwyay
> because those are the ones that fail rarely and that we want to test, which 
> puts
> us back in the same situation you are worried about, so it doesn't make much
> sense to me to do it this way.  Thanks,

I suppose - let's see how it goes? The important factor is the opt-in aspect I 
believe.

Technically the kernel should never crash on a kmalloc() failure either, 
although 
obviously things can go horribly wrong from user-space's perspective.

Thanks,

Ingo


Re: [GIT PULL] sound updates for 4.15-rc1

2017-11-14 Thread Takashi Iwai
[Adding more people and alsa-devel to Cc]

On Wed, 15 Nov 2017 03:40:09 +0100,
Linus Torvalds wrote:
> 
> On Tue, Nov 14, 2017 at 6:51 AM, Takashi Iwai  wrote:
> >
> > please pull sound updates for v4.15-rc1 from:
> 
> Hmm. Making "oldconfig" on my laptop with this, my
> SND_SOC_INTEL_SKYLAKE went away.
> 
> And the reason seems to be that new SND_SOC_INTEL_SST_TOPLEVEL config option.
> 
> Which has no help associated with it.
> 
> This is not a friendly thing to do to people. It basically breaks
> existing setups for no documented reason, and with no explanation.
> 
> Please fix the config situation. At the very least, add documentation.

Sorry about that.  I saw Vinod already submitted a patch to add the
help text to CONFIG_SND_SOC_INTEL_SST_TOPLEVEL, so the least fix
should go in soon.

But now looking at these changes, I noticed a few things, too:

- With the introduction of SND_SOC_INTEL_SST_TOPLEVEL, keeping
  SND_SOC_INTEL_COMMON and SND_SOC_INTEL_MACH individually doesn't
  make much sense.  They can be dropped and replaced with
  SND_SOC_INTEL_SST_TOPLEVEL as a further cleanup.

- ... or, make SND_SOC_INTEL_SST_TOPLEVEL=y as default, if this is
  considered to be a top-level filter config (like the network vendor
  kconfig items).  In that case, the reverse-selection of
  SND_SOC_INTEL_COMMON and SND_SOC_INTEL_MACH should be avoided, but
  they should be selected from the actual drivers instead.


And I believe there are a few more possible cleanups / fixes in the
messy Intel ASoC Kconfigs.  For example, SND_SOC_INTEL_SST is almost
always set.  The only exception is via SND_SST_ATOM_HIFI2_PLATFORM.
But all machine drivers using Atom Hifi2 do set SND_SST_IPC_ACPI,
which also requires SND_SOC_INTEL_SST.

Further looking at this, we see that the only entry that does *not*
require SND_SOC_INTEL_SST is the case with SND_MFLD_MACHINE in
sound/soc/intel/boards.  And now more interesting part -- there is no
corresponding entry in Makefile.  That is, this kconfig is effectively
dead!  The source code mfld_machine.c exists, but it's just a place
holder now.  The code was supposed to be integrated into atom
directory by the commit b97169da0699, but it seems forgotten to be
updated.

Hmm...


Takashi


Re: [GIT PULL] sound updates for 4.15-rc1

2017-11-14 Thread Takashi Iwai
[Adding more people and alsa-devel to Cc]

On Wed, 15 Nov 2017 03:40:09 +0100,
Linus Torvalds wrote:
> 
> On Tue, Nov 14, 2017 at 6:51 AM, Takashi Iwai  wrote:
> >
> > please pull sound updates for v4.15-rc1 from:
> 
> Hmm. Making "oldconfig" on my laptop with this, my
> SND_SOC_INTEL_SKYLAKE went away.
> 
> And the reason seems to be that new SND_SOC_INTEL_SST_TOPLEVEL config option.
> 
> Which has no help associated with it.
> 
> This is not a friendly thing to do to people. It basically breaks
> existing setups for no documented reason, and with no explanation.
> 
> Please fix the config situation. At the very least, add documentation.

Sorry about that.  I saw Vinod already submitted a patch to add the
help text to CONFIG_SND_SOC_INTEL_SST_TOPLEVEL, so the least fix
should go in soon.

But now looking at these changes, I noticed a few things, too:

- With the introduction of SND_SOC_INTEL_SST_TOPLEVEL, keeping
  SND_SOC_INTEL_COMMON and SND_SOC_INTEL_MACH individually doesn't
  make much sense.  They can be dropped and replaced with
  SND_SOC_INTEL_SST_TOPLEVEL as a further cleanup.

- ... or, make SND_SOC_INTEL_SST_TOPLEVEL=y as default, if this is
  considered to be a top-level filter config (like the network vendor
  kconfig items).  In that case, the reverse-selection of
  SND_SOC_INTEL_COMMON and SND_SOC_INTEL_MACH should be avoided, but
  they should be selected from the actual drivers instead.


And I believe there are a few more possible cleanups / fixes in the
messy Intel ASoC Kconfigs.  For example, SND_SOC_INTEL_SST is almost
always set.  The only exception is via SND_SST_ATOM_HIFI2_PLATFORM.
But all machine drivers using Atom Hifi2 do set SND_SST_IPC_ACPI,
which also requires SND_SOC_INTEL_SST.

Further looking at this, we see that the only entry that does *not*
require SND_SOC_INTEL_SST is the case with SND_MFLD_MACHINE in
sound/soc/intel/boards.  And now more interesting part -- there is no
corresponding entry in Makefile.  That is, this kconfig is effectively
dead!  The source code mfld_machine.c exists, but it's just a place
holder now.  The code was supposed to be integrated into atom
directory by the commit b97169da0699, but it seems forgotten to be
updated.

Hmm...


Takashi


[RFC PATCH 5/5] ARM: dts: rockchip: add isp node for rk3288

2017-11-14 Thread Jacob Chen
From: Jacob Chen 

rk3288 have a Embedded 13M ISP and MIPI-CSI2 interface.

Signed-off-by: Jacob Chen 
---
 arch/arm/boot/dts/rk3288.dtsi | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 60658c5c9a48..f9a81137146d 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -962,6 +962,30 @@
status = "disabled";
};
 
+   isp: isp@ff91 {
+   compatible = "rockchip,rk3288-cif-isp";
+   reg = <0x0 0xff91 0x0 0x4000>;
+   interrupts = ;
+   clocks = < SCLK_ISP>, < ACLK_ISP>,
+< HCLK_ISP>, < PCLK_ISP_IN>,
+< SCLK_ISP_JPE>;
+   clock-names = "clk_isp", "aclk_isp",
+ "hclk_isp", "pclk_isp_in",
+ "sclk_isp_jpe";
+   assigned-clocks = < SCLK_ISP>;
+   assigned-clock-rates = <4>;
+   power-domains = < RK3288_PD_VIO>;
+   iommus = <_mmu>;
+   status = "disabled";
+   isp_mipi_phy_rx0: isp-mipi-phy-rx0 {
+   compatible = "rockchip,rk3288-mipi-dphy";
+   rockchip,grf = <>;
+   clocks = < SCLK_MIPIDSI_24M>, < PCLK_MIPI_CSI>;
+   clock-names = "dphy-ref", "pclk";
+   status = "disabled";
+   };
+   };
+
isp_mmu: iommu@ff914000 {
compatible = "rockchip,iommu";
reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>;
-- 
2.14.2



[RFC PATCH 5/5] ARM: dts: rockchip: add isp node for rk3288

2017-11-14 Thread Jacob Chen
From: Jacob Chen 

rk3288 have a Embedded 13M ISP and MIPI-CSI2 interface.

Signed-off-by: Jacob Chen 
---
 arch/arm/boot/dts/rk3288.dtsi | 24 
 1 file changed, 24 insertions(+)

diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 60658c5c9a48..f9a81137146d 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -962,6 +962,30 @@
status = "disabled";
};
 
+   isp: isp@ff91 {
+   compatible = "rockchip,rk3288-cif-isp";
+   reg = <0x0 0xff91 0x0 0x4000>;
+   interrupts = ;
+   clocks = < SCLK_ISP>, < ACLK_ISP>,
+< HCLK_ISP>, < PCLK_ISP_IN>,
+< SCLK_ISP_JPE>;
+   clock-names = "clk_isp", "aclk_isp",
+ "hclk_isp", "pclk_isp_in",
+ "sclk_isp_jpe";
+   assigned-clocks = < SCLK_ISP>;
+   assigned-clock-rates = <4>;
+   power-domains = < RK3288_PD_VIO>;
+   iommus = <_mmu>;
+   status = "disabled";
+   isp_mipi_phy_rx0: isp-mipi-phy-rx0 {
+   compatible = "rockchip,rk3288-mipi-dphy";
+   rockchip,grf = <>;
+   clocks = < SCLK_MIPIDSI_24M>, < PCLK_MIPI_CSI>;
+   clock-names = "dphy-ref", "pclk";
+   status = "disabled";
+   };
+   };
+
isp_mmu: iommu@ff914000 {
compatible = "rockchip,iommu";
reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>;
-- 
2.14.2



[RFC PATCH 4/5] arm64: dts: rockchip: add isp0 node for rk3399

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

rk3399 have two ISP, but we havn't test isp1, so just add isp0 at present.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 arch/arm64/boot/dts/rockchip/rk3399.dtsi | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi 
b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index ab7629c5b856..f696e62d09dd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1577,6 +1577,32 @@
status = "disabled";
};
 
+   isp0: isp0@ff91 {
+   compatible = "rockchip,rk3399-cif-isp";
+   reg = <0x0 0xff91 0x0 0x4000>;
+   interrupts = ;
+   clocks = < SCLK_ISP0>,
+< ACLK_ISP0>, < ACLK_ISP0_WRAPPER>,
+< HCLK_ISP0>, < HCLK_ISP0_WRAPPER>;
+   clock-names = "clk_isp",
+ "aclk_isp", "aclk_isp_wrap",
+ "hclk_isp", "hclk_isp_wrap";
+   power-domains = < RK3399_PD_ISP0>;
+   iommus = <_mmu>;
+   status = "disabled";
+
+   isp_mipi_dphy_rx0: isp-mipi-dphy-rx0 {
+   compatible = "rockchip,rk3399-mipi-dphy";
+   rockchip,grf = <>;
+   clocks = < SCLK_MIPIDPHY_REF>,
+< SCLK_DPHY_RX0_CFG>,
+< PCLK_VIO_GRF>;
+   clock-names = "dphy-ref", "dphy-cfg", "grf";
+   power-domains = < RK3399_PD_VIO>;
+   status = "disabled";
+   };
+   };
+
isp0_mmu: iommu@ff914000 {
compatible = "rockchip,iommu";
reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>;
-- 
2.14.2



[RFC PATCH 4/5] arm64: dts: rockchip: add isp0 node for rk3399

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

rk3399 have two ISP, but we havn't test isp1, so just add isp0 at present.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 arch/arm64/boot/dts/rockchip/rk3399.dtsi | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi 
b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index ab7629c5b856..f696e62d09dd 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -1577,6 +1577,32 @@
status = "disabled";
};
 
+   isp0: isp0@ff91 {
+   compatible = "rockchip,rk3399-cif-isp";
+   reg = <0x0 0xff91 0x0 0x4000>;
+   interrupts = ;
+   clocks = < SCLK_ISP0>,
+< ACLK_ISP0>, < ACLK_ISP0_WRAPPER>,
+< HCLK_ISP0>, < HCLK_ISP0_WRAPPER>;
+   clock-names = "clk_isp",
+ "aclk_isp", "aclk_isp_wrap",
+ "hclk_isp", "hclk_isp_wrap";
+   power-domains = < RK3399_PD_ISP0>;
+   iommus = <_mmu>;
+   status = "disabled";
+
+   isp_mipi_dphy_rx0: isp-mipi-dphy-rx0 {
+   compatible = "rockchip,rk3399-mipi-dphy";
+   rockchip,grf = <>;
+   clocks = < SCLK_MIPIDPHY_REF>,
+< SCLK_DPHY_RX0_CFG>,
+< PCLK_VIO_GRF>;
+   clock-names = "dphy-ref", "dphy-cfg", "grf";
+   power-domains = < RK3399_PD_VIO>;
+   status = "disabled";
+   };
+   };
+
isp0_mmu: iommu@ff914000 {
compatible = "rockchip,iommu";
reg = <0x0 0xff914000 0x0 0x100>, <0x0 0xff915000 0x0 0x100>;
-- 
2.14.2



[RFC PATCH 2/5] media: rkisp1: Add user space ABI definitions

2017-11-14 Thread Jacob Chen
From: Jeffy Chen 

Add the header for userspace

Signed-off-by: Jeffy Chen 
Signed-off-by: Jacob Chen 
---
 include/uapi/linux/rkisp1-config.h | 554 +
 1 file changed, 554 insertions(+)
 create mode 100644 include/uapi/linux/rkisp1-config.h

diff --git a/include/uapi/linux/rkisp1-config.h 
b/include/uapi/linux/rkisp1-config.h
new file mode 100644
index ..a801fbc9ef47
--- /dev/null
+++ b/include/uapi/linux/rkisp1-config.h
@@ -0,0 +1,554 @@
+/*
+ * Rockchip isp1 driver
+ *
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UAPI_RKISP1_CONFIG_H
+#define _UAPI_RKISP1_CONFIG_H
+
+#include 
+#include 
+
+#define CIFISP_MODULE_DPCC  (1 << 0)
+#define CIFISP_MODULE_BLS   (1 << 1)
+#define CIFISP_MODULE_SDG   (1 << 2)
+#define CIFISP_MODULE_HST   (1 << 3)
+#define CIFISP_MODULE_LSC   (1 << 4)
+#define CIFISP_MODULE_AWB_GAIN  (1 << 5)
+#define CIFISP_MODULE_FLT   (1 << 6)
+#define CIFISP_MODULE_BDM   (1 << 7)
+#define CIFISP_MODULE_CTK   (1 << 8)
+#define CIFISP_MODULE_GOC   (1 << 9)
+#define CIFISP_MODULE_CPROC (1 << 10)
+#define CIFISP_MODULE_AFC   (1 << 11)
+#define CIFISP_MODULE_AWB   (1 << 12)
+#define CIFISP_MODULE_IE(1 << 13)
+#define CIFISP_MODULE_AEC   (1 << 14)
+#define CIFISP_MODULE_WDR   (1 << 15)
+#define CIFISP_MODULE_DPF   (1 << 16)
+#define CIFISP_MODULE_DPF_STRENGTH  (1 << 17)
+
+#define CIFISP_CTK_COEFF_MAX0x100
+#define CIFISP_CTK_OFFSET_MAX   0x800
+
+#define CIFISP_AE_MEAN_MAX  25
+#define CIFISP_HIST_BIN_N_MAX   16
+#define CIFISP_AFM_MAX_WINDOWS  3
+#define CIFISP_DEGAMMA_CURVE_SIZE   17
+
+#define CIFISP_BDM_MAX_TH   0xFF
+
+/* maximum value for horizontal start address */
+#define CIFISP_BLS_START_H_MAX 0x0FFF
+/* maximum value for horizontal stop address */
+#define CIFISP_BLS_STOP_H_MAX  0x0FFF
+/* maximum value for vertical start address */
+#define CIFISP_BLS_START_V_MAX 0x0FFF
+/* maximum value for vertical stop address */
+#define CIFISP_BLS_STOP_V_MAX  0x0FFF
+/* maximum is 2^18 = 262144*/
+#define CIFISP_BLS_SAMPLES_MAX 0x0012
+/* maximum value for fixed black level */
+#define CIFISP_BLS_FIX_SUB_MAX 0x0FFF
+/* minimum value for fixed black level */
+#define CIFISP_BLS_FIX_SUB_MIN 0xF000
+/* 13 bit range (signed)*/
+#define CIFISP_BLS_FIX_MASK0x1FFF
+/* AWB */
+#define CIFISP_AWB_MAX_GRID1
+#define CIFISP_AWB_MAX_FRAMES  7
+
+/* Gamma out*/
+/* Maximum number of color samples supported */
+#define CIFISP_GAMMA_OUT_MAX_SAMPLES   17
+
+/* LSC */
+#define CIFISP_LSC_GRAD_TBL_SIZE   8
+#define CIFISP_LSC_SIZE_TBL_SIZE   8
+/*
+ * The following matches the tuning process,
+ * not the max capabilities of the chip.
+ * Last value unused.
+ */
+#defineCIFISP_LSC_DATA_TBL_SIZE   290
+/* HIST */
+/* Last 3 values unused. */
+#define CIFISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28
+
+/* DPCC */
+#define CIFISP_DPCC_METHODS_MAX   3
+
+/* DPF */
+#define CIFISP_DPF_MAX_NLF_COEFFS  17
+#define CIFISP_DPF_MAX_SPATIAL_COEFFS  6
+
+/* measurement types */
+#define CIFISP_STAT_AWB   (1 << 0)
+#define CIFISP_STAT_AUTOEXP   

[RFC PATCH 2/5] media: rkisp1: Add user space ABI definitions

2017-11-14 Thread Jacob Chen
From: Jeffy Chen 

Add the header for userspace

Signed-off-by: Jeffy Chen 
Signed-off-by: Jacob Chen 
---
 include/uapi/linux/rkisp1-config.h | 554 +
 1 file changed, 554 insertions(+)
 create mode 100644 include/uapi/linux/rkisp1-config.h

diff --git a/include/uapi/linux/rkisp1-config.h 
b/include/uapi/linux/rkisp1-config.h
new file mode 100644
index ..a801fbc9ef47
--- /dev/null
+++ b/include/uapi/linux/rkisp1-config.h
@@ -0,0 +1,554 @@
+/*
+ * Rockchip isp1 driver
+ *
+ * Copyright (C) 2017 Rockchip Electronics Co., Ltd.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UAPI_RKISP1_CONFIG_H
+#define _UAPI_RKISP1_CONFIG_H
+
+#include 
+#include 
+
+#define CIFISP_MODULE_DPCC  (1 << 0)
+#define CIFISP_MODULE_BLS   (1 << 1)
+#define CIFISP_MODULE_SDG   (1 << 2)
+#define CIFISP_MODULE_HST   (1 << 3)
+#define CIFISP_MODULE_LSC   (1 << 4)
+#define CIFISP_MODULE_AWB_GAIN  (1 << 5)
+#define CIFISP_MODULE_FLT   (1 << 6)
+#define CIFISP_MODULE_BDM   (1 << 7)
+#define CIFISP_MODULE_CTK   (1 << 8)
+#define CIFISP_MODULE_GOC   (1 << 9)
+#define CIFISP_MODULE_CPROC (1 << 10)
+#define CIFISP_MODULE_AFC   (1 << 11)
+#define CIFISP_MODULE_AWB   (1 << 12)
+#define CIFISP_MODULE_IE(1 << 13)
+#define CIFISP_MODULE_AEC   (1 << 14)
+#define CIFISP_MODULE_WDR   (1 << 15)
+#define CIFISP_MODULE_DPF   (1 << 16)
+#define CIFISP_MODULE_DPF_STRENGTH  (1 << 17)
+
+#define CIFISP_CTK_COEFF_MAX0x100
+#define CIFISP_CTK_OFFSET_MAX   0x800
+
+#define CIFISP_AE_MEAN_MAX  25
+#define CIFISP_HIST_BIN_N_MAX   16
+#define CIFISP_AFM_MAX_WINDOWS  3
+#define CIFISP_DEGAMMA_CURVE_SIZE   17
+
+#define CIFISP_BDM_MAX_TH   0xFF
+
+/* maximum value for horizontal start address */
+#define CIFISP_BLS_START_H_MAX 0x0FFF
+/* maximum value for horizontal stop address */
+#define CIFISP_BLS_STOP_H_MAX  0x0FFF
+/* maximum value for vertical start address */
+#define CIFISP_BLS_START_V_MAX 0x0FFF
+/* maximum value for vertical stop address */
+#define CIFISP_BLS_STOP_V_MAX  0x0FFF
+/* maximum is 2^18 = 262144*/
+#define CIFISP_BLS_SAMPLES_MAX 0x0012
+/* maximum value for fixed black level */
+#define CIFISP_BLS_FIX_SUB_MAX 0x0FFF
+/* minimum value for fixed black level */
+#define CIFISP_BLS_FIX_SUB_MIN 0xF000
+/* 13 bit range (signed)*/
+#define CIFISP_BLS_FIX_MASK0x1FFF
+/* AWB */
+#define CIFISP_AWB_MAX_GRID1
+#define CIFISP_AWB_MAX_FRAMES  7
+
+/* Gamma out*/
+/* Maximum number of color samples supported */
+#define CIFISP_GAMMA_OUT_MAX_SAMPLES   17
+
+/* LSC */
+#define CIFISP_LSC_GRAD_TBL_SIZE   8
+#define CIFISP_LSC_SIZE_TBL_SIZE   8
+/*
+ * The following matches the tuning process,
+ * not the max capabilities of the chip.
+ * Last value unused.
+ */
+#defineCIFISP_LSC_DATA_TBL_SIZE   290
+/* HIST */
+/* Last 3 values unused. */
+#define CIFISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28
+
+/* DPCC */
+#define CIFISP_DPCC_METHODS_MAX   3
+
+/* DPF */
+#define CIFISP_DPF_MAX_NLF_COEFFS  17
+#define CIFISP_DPF_MAX_SPATIAL_COEFFS  6
+
+/* measurement types */
+#define CIFISP_STAT_AWB   (1 << 0)
+#define CIFISP_STAT_AUTOEXP   (1 << 1)
+#define CIFISP_STAT_AFM_FIN   (1 << 2)
+#define CIFISP_STAT_HIST  

[RFC PATCH 1/5] media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

Add the Rockchip ISP1 specific processing parameter format
V4L2_META_FMT_RK_ISP1_PARAMS and metadata format
V4L2_META_FMT_RK_ISP1_STAT_3A for 3A.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++
 include/uapi/linux/videodev2.h   | 4 
 2 files changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c 
b/drivers/media/v4l2-core/v4l2-ioctl.c
index d6587b3ec33e..0604ae9ea444 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1252,6 +1252,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; 
break;
case V4L2_META_FMT_VSP1_HGO:descr = "R-Car VSP1 1-D Histogram"; 
break;
case V4L2_META_FMT_VSP1_HGT:descr = "R-Car VSP1 2-D Histogram"; 
break;
+   case V4L2_META_FMT_RK_ISP1_PARAMS:  descr = "Rockchip ISP1 3A 
params"; break;
+   case V4L2_META_FMT_RK_ISP1_STAT_3A: descr = "Rockchip ISP1 3A 
statistics"; break;
 
default:
/* Compressed formats */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e507b29ba1e0..14efa6513126 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -690,6 +690,10 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_VSP1_HGOv4l2_fourcc('V', 'S', 'P', 'H') /* R-Car 
VSP1 1-D Histogram */
 #define V4L2_META_FMT_VSP1_HGTv4l2_fourcc('V', 'S', 'P', 'T') /* R-Car 
VSP1 2-D Histogram */
 
+/* Vendor specific - used for IPU3 camera sub-system */
+#define V4L2_META_FMT_RK_ISP1_PARAMS   v4l2_fourcc('R', 'K', '1', 'P') /* 
Rockchip ISP1 params */
+#define V4L2_META_FMT_RK_ISP1_STAT_3A  v4l2_fourcc('R', 'K', '1', 'S') /* 
Rockchip ISP1 3A statistics */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC0xfeedcafe
 
-- 
2.14.2



[RFC PATCH 1/5] media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

Add the Rockchip ISP1 specific processing parameter format
V4L2_META_FMT_RK_ISP1_PARAMS and metadata format
V4L2_META_FMT_RK_ISP1_STAT_3A for 3A.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++
 include/uapi/linux/videodev2.h   | 4 
 2 files changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c 
b/drivers/media/v4l2-core/v4l2-ioctl.c
index d6587b3ec33e..0604ae9ea444 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1252,6 +1252,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; 
break;
case V4L2_META_FMT_VSP1_HGO:descr = "R-Car VSP1 1-D Histogram"; 
break;
case V4L2_META_FMT_VSP1_HGT:descr = "R-Car VSP1 2-D Histogram"; 
break;
+   case V4L2_META_FMT_RK_ISP1_PARAMS:  descr = "Rockchip ISP1 3A 
params"; break;
+   case V4L2_META_FMT_RK_ISP1_STAT_3A: descr = "Rockchip ISP1 3A 
statistics"; break;
 
default:
/* Compressed formats */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e507b29ba1e0..14efa6513126 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -690,6 +690,10 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_VSP1_HGOv4l2_fourcc('V', 'S', 'P', 'H') /* R-Car 
VSP1 1-D Histogram */
 #define V4L2_META_FMT_VSP1_HGTv4l2_fourcc('V', 'S', 'P', 'T') /* R-Car 
VSP1 2-D Histogram */
 
+/* Vendor specific - used for IPU3 camera sub-system */
+#define V4L2_META_FMT_RK_ISP1_PARAMS   v4l2_fourcc('R', 'K', '1', 'P') /* 
Rockchip ISP1 params */
+#define V4L2_META_FMT_RK_ISP1_STAT_3A  v4l2_fourcc('R', 'K', '1', 'S') /* 
Rockchip ISP1 3A statistics */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC0xfeedcafe
 
-- 
2.14.2



[PATCH] rtc: Add tracepoints for RTC system

2017-11-14 Thread Baolin Wang
It will be more helpful to add some tracepoints to track RTC actions when
debugging RTC driver. Below sample is that we set/read the RTC time, then
set 2 alarms, so we can see the trace logs:

set/read RTC time:
kworker/1:1-586   [001]  21.826112: rtc_set_time: 2017-11-10 08:13:00 UTC 
(1510301580)
kworker/1:1-586   [001]  21.826174: rtc_read_time: 2017-11-10 08:13:00 UTC 
(1510301580)

set the first alarm timer:
kworker/1:1-586   [001]  21.841098: rtc_timer_enqueue: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/1:1-586   [001]  22.009424: rtc_set_alarm: 2017-11-10 08:15:00 UTC 
(1510301700)

set the second alarm timer:
kworker/1:1-586   [001]  22.181304: rtc_timer_enqueue: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)

the first alarm timer was expired:
kworker/0:1-67[000]  145.156226: rtc_timer_dequeue: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/0:1-67[000]  145.156235: rtc_timer_fired: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/0:1-67[000]  145.173137: rtc_set_alarm: 2017-11-10 08:17:00 UTC 
(1510301820)

the second alarm timer was expired:
kworker/0:1-67[000]  269.102985: rtc_timer_dequeue: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)
kworker/0:1-67[000]  269.102992: rtc_timer_fired: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)

disable alarm irq:
kworker/0:1-67[000]  269.103098: rtc_alarm_irq_enable: disable RTC 
alarm IRQ

Signed-off-by: Baolin Wang 
---
 drivers/rtc/interface.c|   46 ++
 include/trace/events/rtc.h |  215 
 2 files changed, 261 insertions(+)
 create mode 100644 include/trace/events/rtc.h

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 8cec9a0..cdd3ac8 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -17,6 +17,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
 static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 
@@ -53,6 +56,9 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 
err = __rtc_read_time(rtc, tm);
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_read_time(tm);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_time);
@@ -87,6 +93,9 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
mutex_unlock(>ops_lock);
/* A timer might have just expired */
schedule_work(>irqwork);
+
+   if (!err)
+   trace_rtc_set_time(tm);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -119,6 +128,9 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, 
struct rtc_wkalrm *al
}
 
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_read_alarm(>time);
return err;
 }
 
@@ -316,6 +328,8 @@ int rtc_read_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
}
mutex_unlock(>ops_lock);
 
+   if (!err)
+   trace_rtc_read_alarm(>time);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
@@ -352,6 +366,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
else
err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
 
+   if (!err)
+   trace_rtc_set_alarm(>time);
return err;
 }
 
@@ -406,6 +422,8 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
 
rtc->aie_timer.enabled = 1;
timerqueue_add(>timerqueue, >aie_timer.node);
+   trace_rtc_timer_enqueue(>aie_timer,
+   rtc_ktime_to_tm(rtc->aie_timer.node.expires));
}
mutex_unlock(>ops_lock);
return err;
@@ -435,6 +453,9 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned 
int enabled)
err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
 
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_alarm_irq_enable(enabled);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
@@ -709,6 +730,9 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct 
rtc_task *task, int enabled
rtc->pie_enabled = enabled;
}
spin_unlock_irqrestore(>irq_task_lock, flags);
+
+   if (!err)
+   trace_rtc_irq_set_state(enabled);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
@@ -745,6 +769,9 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct 
rtc_task *task, int freq)
}
}
spin_unlock_irqrestore(>irq_task_lock, flags);
+
+   if (!err)
+   trace_rtc_irq_set_freq(freq);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
@@ -779,6 +806,7 @@ 

[PATCH] rtc: Add tracepoints for RTC system

2017-11-14 Thread Baolin Wang
It will be more helpful to add some tracepoints to track RTC actions when
debugging RTC driver. Below sample is that we set/read the RTC time, then
set 2 alarms, so we can see the trace logs:

set/read RTC time:
kworker/1:1-586   [001]  21.826112: rtc_set_time: 2017-11-10 08:13:00 UTC 
(1510301580)
kworker/1:1-586   [001]  21.826174: rtc_read_time: 2017-11-10 08:13:00 UTC 
(1510301580)

set the first alarm timer:
kworker/1:1-586   [001]  21.841098: rtc_timer_enqueue: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/1:1-586   [001]  22.009424: rtc_set_alarm: 2017-11-10 08:15:00 UTC 
(1510301700)

set the second alarm timer:
kworker/1:1-586   [001]  22.181304: rtc_timer_enqueue: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)

the first alarm timer was expired:
kworker/0:1-67[000]  145.156226: rtc_timer_dequeue: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/0:1-67[000]  145.156235: rtc_timer_fired: RTC 
timer:(ffc15ad913c8) 2017-11-10 08:15:00 UTC (1510301700)
kworker/0:1-67[000]  145.173137: rtc_set_alarm: 2017-11-10 08:17:00 UTC 
(1510301820)

the second alarm timer was expired:
kworker/0:1-67[000]  269.102985: rtc_timer_dequeue: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)
kworker/0:1-67[000]  269.102992: rtc_timer_fired: RTC 
timer:(ff80088e6430) 2017-11-10 08:17:00 UTC (1510301820)

disable alarm irq:
kworker/0:1-67[000]  269.103098: rtc_alarm_irq_enable: disable RTC 
alarm IRQ

Signed-off-by: Baolin Wang 
---
 drivers/rtc/interface.c|   46 ++
 include/trace/events/rtc.h |  215 
 2 files changed, 261 insertions(+)
 create mode 100644 include/trace/events/rtc.h

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 8cec9a0..cdd3ac8 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -17,6 +17,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
 static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 
@@ -53,6 +56,9 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 
err = __rtc_read_time(rtc, tm);
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_read_time(tm);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_time);
@@ -87,6 +93,9 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
mutex_unlock(>ops_lock);
/* A timer might have just expired */
schedule_work(>irqwork);
+
+   if (!err)
+   trace_rtc_set_time(tm);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -119,6 +128,9 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, 
struct rtc_wkalrm *al
}
 
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_read_alarm(>time);
return err;
 }
 
@@ -316,6 +328,8 @@ int rtc_read_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
}
mutex_unlock(>ops_lock);
 
+   if (!err)
+   trace_rtc_read_alarm(>time);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
@@ -352,6 +366,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
else
err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
 
+   if (!err)
+   trace_rtc_set_alarm(>time);
return err;
 }
 
@@ -406,6 +422,8 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct 
rtc_wkalrm *alarm)
 
rtc->aie_timer.enabled = 1;
timerqueue_add(>timerqueue, >aie_timer.node);
+   trace_rtc_timer_enqueue(>aie_timer,
+   rtc_ktime_to_tm(rtc->aie_timer.node.expires));
}
mutex_unlock(>ops_lock);
return err;
@@ -435,6 +453,9 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned 
int enabled)
err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
 
mutex_unlock(>ops_lock);
+
+   if (!err)
+   trace_rtc_alarm_irq_enable(enabled);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
@@ -709,6 +730,9 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct 
rtc_task *task, int enabled
rtc->pie_enabled = enabled;
}
spin_unlock_irqrestore(>irq_task_lock, flags);
+
+   if (!err)
+   trace_rtc_irq_set_state(enabled);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
@@ -745,6 +769,9 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct 
rtc_task *task, int freq)
}
}
spin_unlock_irqrestore(>irq_task_lock, flags);
+
+   if (!err)
+   trace_rtc_irq_set_freq(freq);
return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
@@ -779,6 +806,7 @@ static int 

[RFC PATCH 0/5] Rockchip ISP1 Driver

2017-11-14 Thread Jacob Chen
This patch series add a ISP(Camera) v4l2 driver for rockchip rk3288/rk3399 SoC.

TODO:
  - Thomas is rewriting the binding code between isp, phy, sensors, i hope we 
could get suggestions.

https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/768633/2
rules:
  - There are many mipi interfaces("rx0", "dxrx0")(actually it also could 
be parallel interface) in SoC and isp can decide which one will be used.
  - Sometimes there will be more than one senor in a mipi phy, the sofrware 
should decide which one is used(media link).
  - rk3399 have two isp.
  - Add a dummy buffer(dma_alloc_coherent) so drvier won't hold buffer.
  - Finish all TODO comments(mostly about hardware) in driver.

To help do a quick review, i have push source code to my Github.
  
https://github.com/wzyy2/linux/tree/rkisp1/drivers/media/platform/rockchip/isp1

Below are some infomations about driver/hardware:

Rockchip ISP1 have many Hardware Blocks(simplied):

  MIPI  --> ISP --> DCrop(Mainpath) --> RSZ(Mainpath) --> DMA(Mainpath)
  DMA-Input --> --> DCrop(Selfpath) --> RSZ(Selfpath) --> DMA(Selfpath);)

(Acutally the TRM(rk3288, isp) could be found online.. which contains a 
more detailed block diagrams ;-P)

The funcitons of each hardware block:

  Mainpath : up to 4k resolution, support raw/yuv format
  Selfpath : up tp 1080p, support rotate, support rgb/yuv format
  RSZ: scaling 
  DCrop: crop
  ISP: 3A, Color processing, Crop
  MIPI: MIPI Camera interface

Media pipelines:

  Mainpath, Selfpath <-- ISP subdev <-- MIPI  <-- Sensor
  3A stats   <--<-- 3A parms

Code struct:

  capture.c : Mainpath, Selfpath, RSZ, DCROP : capture device.
  rkisp1.c : ISP : v4l2 sub-device.
  isp_params.c : 3A parms : output device.
  isp_stats.c : 3A stats : capture device.
  mipi_dphy_sy.c : MIPI : sperated v4l2 sub-device.

Usage:
  ChromiumOS:
use below v4l2-ctl command to capture frames.

  v4l2-ctl --verbose -d /dev/video4 --stream-mmap=2
  --stream-to=/tmp/stream.out --stream-count=60 --stream-poll

use below command to playback the video on your PC.

  mplayer /tmp/stream.out -loop 0 --demuxer=rawvideo
  --rawvideo=w=800:h=600:size=$((800*600*2)):format=yuy2
or
  mplayer ./stream.out -loop 0 -demuxer rawvideo -rawvideo
  w=800:h=600:size=$((800*600*2)):format=yuy2

  Linux:
use rkcamsrc gstreamer plugin(just a modified v4l2src) to preview.

  gst-launch-1.0 rkcamsrc device=/dev/video0 io-mode=4 disable-3A=true
  videoconvert ! video/x-raw,format=NV12,width=640,height=480 ! kmssink

Jacob Chen (2):
  media: rkisp1: add rockchip isp1 driver
  ARM: dts: rockchip: add isp node for rk3288

Jeffy Chen (1):
  media: rkisp1: Add user space ABI definitions

Shunqian Zheng (2):
  media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format
  arm64: dts: rockchip: add isp0 node for rk3399

 arch/arm/boot/dts/rk3288.dtsi  |   24 +
 arch/arm64/boot/dts/rockchip/rk3399.dtsi   |   26 +
 drivers/media/platform/Kconfig |   10 +
 drivers/media/platform/Makefile|1 +
 drivers/media/platform/rockchip/isp1/Makefile  |9 +
 drivers/media/platform/rockchip/isp1/capture.c | 1678 
 drivers/media/platform/rockchip/isp1/capture.h |   46 +
 drivers/media/platform/rockchip/isp1/common.h  |  327 
 drivers/media/platform/rockchip/isp1/dev.c |  728 +
 drivers/media/platform/rockchip/isp1/isp_params.c  | 1556 ++
 drivers/media/platform/rockchip/isp1/isp_params.h  |   81 +
 drivers/media/platform/rockchip/isp1/isp_stats.c   |  537 +++
 drivers/media/platform/rockchip/isp1/isp_stats.h   |   81 +
 .../media/platform/rockchip/isp1/mipi_dphy_sy.c|  619 
 .../media/platform/rockchip/isp1/mipi_dphy_sy.h|   42 +
 drivers/media/platform/rockchip/isp1/regs.c|  251 +++
 drivers/media/platform/rockchip/isp1/regs.h| 1578 ++
 drivers/media/platform/rockchip/isp1/rkisp1.c  | 1132 +
 drivers/media/platform/rockchip/isp1/rkisp1.h  |  130 ++
 drivers/media/v4l2-core/v4l2-ioctl.c   |2 +
 include/uapi/linux/rkisp1-config.h |  554 +++
 include/uapi/linux/videodev2.h |4 +
 22 files changed, 9416 insertions(+)
 create mode 100644 drivers/media/platform/rockchip/isp1/Makefile
 create mode 100644 drivers/media/platform/rockchip/isp1/capture.c
 create mode 100644 drivers/media/platform/rockchip/isp1/capture.h
 create mode 100644 drivers/media/platform/rockchip/isp1/common.h
 create mode 100644 drivers/media/platform/rockchip/isp1/dev.c
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_params.c
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_params.h
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_stats.c
 create mode 100644 

[RFC PATCH 0/5] Rockchip ISP1 Driver

2017-11-14 Thread Jacob Chen
This patch series add a ISP(Camera) v4l2 driver for rockchip rk3288/rk3399 SoC.

TODO:
  - Thomas is rewriting the binding code between isp, phy, sensors, i hope we 
could get suggestions.

https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/768633/2
rules:
  - There are many mipi interfaces("rx0", "dxrx0")(actually it also could 
be parallel interface) in SoC and isp can decide which one will be used.
  - Sometimes there will be more than one senor in a mipi phy, the sofrware 
should decide which one is used(media link).
  - rk3399 have two isp.
  - Add a dummy buffer(dma_alloc_coherent) so drvier won't hold buffer.
  - Finish all TODO comments(mostly about hardware) in driver.

To help do a quick review, i have push source code to my Github.
  
https://github.com/wzyy2/linux/tree/rkisp1/drivers/media/platform/rockchip/isp1

Below are some infomations about driver/hardware:

Rockchip ISP1 have many Hardware Blocks(simplied):

  MIPI  --> ISP --> DCrop(Mainpath) --> RSZ(Mainpath) --> DMA(Mainpath)
  DMA-Input --> --> DCrop(Selfpath) --> RSZ(Selfpath) --> DMA(Selfpath);)

(Acutally the TRM(rk3288, isp) could be found online.. which contains a 
more detailed block diagrams ;-P)

The funcitons of each hardware block:

  Mainpath : up to 4k resolution, support raw/yuv format
  Selfpath : up tp 1080p, support rotate, support rgb/yuv format
  RSZ: scaling 
  DCrop: crop
  ISP: 3A, Color processing, Crop
  MIPI: MIPI Camera interface

Media pipelines:

  Mainpath, Selfpath <-- ISP subdev <-- MIPI  <-- Sensor
  3A stats   <--<-- 3A parms

Code struct:

  capture.c : Mainpath, Selfpath, RSZ, DCROP : capture device.
  rkisp1.c : ISP : v4l2 sub-device.
  isp_params.c : 3A parms : output device.
  isp_stats.c : 3A stats : capture device.
  mipi_dphy_sy.c : MIPI : sperated v4l2 sub-device.

Usage:
  ChromiumOS:
use below v4l2-ctl command to capture frames.

  v4l2-ctl --verbose -d /dev/video4 --stream-mmap=2
  --stream-to=/tmp/stream.out --stream-count=60 --stream-poll

use below command to playback the video on your PC.

  mplayer /tmp/stream.out -loop 0 --demuxer=rawvideo
  --rawvideo=w=800:h=600:size=$((800*600*2)):format=yuy2
or
  mplayer ./stream.out -loop 0 -demuxer rawvideo -rawvideo
  w=800:h=600:size=$((800*600*2)):format=yuy2

  Linux:
use rkcamsrc gstreamer plugin(just a modified v4l2src) to preview.

  gst-launch-1.0 rkcamsrc device=/dev/video0 io-mode=4 disable-3A=true
  videoconvert ! video/x-raw,format=NV12,width=640,height=480 ! kmssink

Jacob Chen (2):
  media: rkisp1: add rockchip isp1 driver
  ARM: dts: rockchip: add isp node for rk3288

Jeffy Chen (1):
  media: rkisp1: Add user space ABI definitions

Shunqian Zheng (2):
  media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format
  arm64: dts: rockchip: add isp0 node for rk3399

 arch/arm/boot/dts/rk3288.dtsi  |   24 +
 arch/arm64/boot/dts/rockchip/rk3399.dtsi   |   26 +
 drivers/media/platform/Kconfig |   10 +
 drivers/media/platform/Makefile|1 +
 drivers/media/platform/rockchip/isp1/Makefile  |9 +
 drivers/media/platform/rockchip/isp1/capture.c | 1678 
 drivers/media/platform/rockchip/isp1/capture.h |   46 +
 drivers/media/platform/rockchip/isp1/common.h  |  327 
 drivers/media/platform/rockchip/isp1/dev.c |  728 +
 drivers/media/platform/rockchip/isp1/isp_params.c  | 1556 ++
 drivers/media/platform/rockchip/isp1/isp_params.h  |   81 +
 drivers/media/platform/rockchip/isp1/isp_stats.c   |  537 +++
 drivers/media/platform/rockchip/isp1/isp_stats.h   |   81 +
 .../media/platform/rockchip/isp1/mipi_dphy_sy.c|  619 
 .../media/platform/rockchip/isp1/mipi_dphy_sy.h|   42 +
 drivers/media/platform/rockchip/isp1/regs.c|  251 +++
 drivers/media/platform/rockchip/isp1/regs.h| 1578 ++
 drivers/media/platform/rockchip/isp1/rkisp1.c  | 1132 +
 drivers/media/platform/rockchip/isp1/rkisp1.h  |  130 ++
 drivers/media/v4l2-core/v4l2-ioctl.c   |2 +
 include/uapi/linux/rkisp1-config.h |  554 +++
 include/uapi/linux/videodev2.h |4 +
 22 files changed, 9416 insertions(+)
 create mode 100644 drivers/media/platform/rockchip/isp1/Makefile
 create mode 100644 drivers/media/platform/rockchip/isp1/capture.c
 create mode 100644 drivers/media/platform/rockchip/isp1/capture.h
 create mode 100644 drivers/media/platform/rockchip/isp1/common.h
 create mode 100644 drivers/media/platform/rockchip/isp1/dev.c
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_params.c
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_params.h
 create mode 100644 drivers/media/platform/rockchip/isp1/isp_stats.c
 create mode 100644 

[RFC PATCH 1/5] media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

Add the Rockchip ISP1 specific processing parameter format
V4L2_META_FMT_RK_ISP1_PARAMS and metadata format
V4L2_META_FMT_RK_ISP1_STAT_3A for 3A.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++
 include/uapi/linux/videodev2.h   | 4 
 2 files changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c 
b/drivers/media/v4l2-core/v4l2-ioctl.c
index d6587b3ec33e..0604ae9ea444 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1252,6 +1252,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; 
break;
case V4L2_META_FMT_VSP1_HGO:descr = "R-Car VSP1 1-D Histogram"; 
break;
case V4L2_META_FMT_VSP1_HGT:descr = "R-Car VSP1 2-D Histogram"; 
break;
+   case V4L2_META_FMT_RK_ISP1_PARAMS:  descr = "Rockchip ISP1 3A 
params"; break;
+   case V4L2_META_FMT_RK_ISP1_STAT_3A: descr = "Rockchip ISP1 3A 
statistics"; break;
 
default:
/* Compressed formats */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e507b29ba1e0..14efa6513126 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -690,6 +690,10 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_VSP1_HGOv4l2_fourcc('V', 'S', 'P', 'H') /* R-Car 
VSP1 1-D Histogram */
 #define V4L2_META_FMT_VSP1_HGTv4l2_fourcc('V', 'S', 'P', 'T') /* R-Car 
VSP1 2-D Histogram */
 
+/* Vendor specific - used for IPU3 camera sub-system */
+#define V4L2_META_FMT_RK_ISP1_PARAMS   v4l2_fourcc('R', 'K', '1', 'P') /* 
Rockchip ISP1 params */
+#define V4L2_META_FMT_RK_ISP1_STAT_3A  v4l2_fourcc('R', 'K', '1', 'S') /* 
Rockchip ISP1 3A statistics */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC0xfeedcafe
 
-- 
2.14.2



[RFC PATCH 1/5] media: videodev2.h, v4l2-ioctl: add rkisp1 meta buffer format

2017-11-14 Thread Jacob Chen
From: Shunqian Zheng 

Add the Rockchip ISP1 specific processing parameter format
V4L2_META_FMT_RK_ISP1_PARAMS and metadata format
V4L2_META_FMT_RK_ISP1_STAT_3A for 3A.

Signed-off-by: Shunqian Zheng 
Signed-off-by: Jacob Chen 
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++
 include/uapi/linux/videodev2.h   | 4 
 2 files changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c 
b/drivers/media/v4l2-core/v4l2-ioctl.c
index d6587b3ec33e..0604ae9ea444 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1252,6 +1252,8 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
case V4L2_TCH_FMT_TU08: descr = "8-bit unsigned touch data"; 
break;
case V4L2_META_FMT_VSP1_HGO:descr = "R-Car VSP1 1-D Histogram"; 
break;
case V4L2_META_FMT_VSP1_HGT:descr = "R-Car VSP1 2-D Histogram"; 
break;
+   case V4L2_META_FMT_RK_ISP1_PARAMS:  descr = "Rockchip ISP1 3A 
params"; break;
+   case V4L2_META_FMT_RK_ISP1_STAT_3A: descr = "Rockchip ISP1 3A 
statistics"; break;
 
default:
/* Compressed formats */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e507b29ba1e0..14efa6513126 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -690,6 +690,10 @@ struct v4l2_pix_format {
 #define V4L2_META_FMT_VSP1_HGOv4l2_fourcc('V', 'S', 'P', 'H') /* R-Car 
VSP1 1-D Histogram */
 #define V4L2_META_FMT_VSP1_HGTv4l2_fourcc('V', 'S', 'P', 'T') /* R-Car 
VSP1 2-D Histogram */
 
+/* Vendor specific - used for IPU3 camera sub-system */
+#define V4L2_META_FMT_RK_ISP1_PARAMS   v4l2_fourcc('R', 'K', '1', 'P') /* 
Rockchip ISP1 params */
+#define V4L2_META_FMT_RK_ISP1_STAT_3A  v4l2_fourcc('R', 'K', '1', 'S') /* 
Rockchip ISP1 3A statistics */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC0xfeedcafe
 
-- 
2.14.2



Re: [PATCH v3 2/3] usb: xhci: Add DbC support in xHCI driver

2017-11-14 Thread Lu Baolu
Hi,

On 11/14/2017 03:28 PM, Felipe Balbi wrote:
> Hi,
>
> Mathias Nyman  writes:
>>> +static int dbc_buf_alloc(struct dbc_buf *db, unsigned int size)
>>> +{
>>> +   db->buf_buf = kzalloc(size, GFP_KERNEL);
>>> +   if (!db->buf_buf)
>>> +   return -ENOMEM;
>>> +
>>> +   db->buf_size = size;
>>> +   db->buf_put = db->buf_buf;
>>> +   db->buf_get = db->buf_buf;
>>> +
>>> +   return 0;
>>> +}
> you may wanna have a look at kfifo.
>

Yeah! kfifo gives me exactly what I want here.

I will replace it with kfifo. Thank you.

Best regards,
Lu Baolu


Re: [PATCH v3 2/3] usb: xhci: Add DbC support in xHCI driver

2017-11-14 Thread Lu Baolu
Hi,

On 11/14/2017 03:28 PM, Felipe Balbi wrote:
> Hi,
>
> Mathias Nyman  writes:
>>> +static int dbc_buf_alloc(struct dbc_buf *db, unsigned int size)
>>> +{
>>> +   db->buf_buf = kzalloc(size, GFP_KERNEL);
>>> +   if (!db->buf_buf)
>>> +   return -ENOMEM;
>>> +
>>> +   db->buf_size = size;
>>> +   db->buf_put = db->buf_buf;
>>> +   db->buf_get = db->buf_buf;
>>> +
>>> +   return 0;
>>> +}
> you may wanna have a look at kfifo.
>

Yeah! kfifo gives me exactly what I want here.

I will replace it with kfifo. Thank you.

Best regards,
Lu Baolu


Re: [PATCH] remoteproc: qcom: Fix error handling paths in order to avoid memory leaks

2017-11-14 Thread Bjorn Andersson
On Tue 14 Nov 22:58 PST 2017, Christophe JAILLET wrote:

> In case of error returned by 'q6v5_xfer_mem_ownership', we must free
> some resources before returning.
> 
> In 'q6v5_mpss_init_image()', add a new label to undo a previous
> 'dma_alloc_attrs()'.
> In 'q6v5_mpss_load()', re-use the already existing error handling code to
> undo a previous 'request_firmware()', as already done in the other error
> handling paths of the function.
> 
> Signed-off-by: Christophe JAILLET 

Thanks!

Regards,
Bjorn


Re: [PATCH] remoteproc: qcom: Fix error handling paths in order to avoid memory leaks

2017-11-14 Thread Bjorn Andersson
On Tue 14 Nov 22:58 PST 2017, Christophe JAILLET wrote:

> In case of error returned by 'q6v5_xfer_mem_ownership', we must free
> some resources before returning.
> 
> In 'q6v5_mpss_init_image()', add a new label to undo a previous
> 'dma_alloc_attrs()'.
> In 'q6v5_mpss_load()', re-use the already existing error handling code to
> undo a previous 'request_firmware()', as already done in the other error
> handling paths of the function.
> 
> Signed-off-by: Christophe JAILLET 

Thanks!

Regards,
Bjorn


Re: [PATCH] PM / runtime: Drop children check from __pm_runtime_set_status()

2017-11-14 Thread Ulf Hansson
[...]

>>
>> When pm_runtime_set_suspended(dev) is called, dev's child device may
>> still be runtime PM enabled and active.
>> I was suggesting to add a check for this scenario, to see if dev's
>> child device is runtime PM is enabled, as and additional constraint
>> before deciding to return an error code.
>
> Well, that's sort of difficult to do, however, because the code would need to
> walk all of the children of the device and the child power lock cannot be
> acquired under the one of the parent, so it would be fragile and ugly.

Yeah, you have a point.

>
>> The idea was to get a consistent behavior, from the
>> pm_runtime_set_active|suspended() APIs point of view, and not from the
>> runtime PM core point of view.
>
> Yes, but the cost is high and the benefit is shallow.
>
> The enable-time WARN() should cover the really broken cases without that
> much complexity.

Fair enough!

Feel free to add:
Reviewed-by: Ulf Hansson 

Kind regards
Uffe


Re: [PATCH] PM / runtime: Drop children check from __pm_runtime_set_status()

2017-11-14 Thread Ulf Hansson
[...]

>>
>> When pm_runtime_set_suspended(dev) is called, dev's child device may
>> still be runtime PM enabled and active.
>> I was suggesting to add a check for this scenario, to see if dev's
>> child device is runtime PM is enabled, as and additional constraint
>> before deciding to return an error code.
>
> Well, that's sort of difficult to do, however, because the code would need to
> walk all of the children of the device and the child power lock cannot be
> acquired under the one of the parent, so it would be fragile and ugly.

Yeah, you have a point.

>
>> The idea was to get a consistent behavior, from the
>> pm_runtime_set_active|suspended() APIs point of view, and not from the
>> runtime PM core point of view.
>
> Yes, but the cost is high and the benefit is shallow.
>
> The enable-time WARN() should cover the really broken cases without that
> much complexity.

Fair enough!

Feel free to add:
Reviewed-by: Ulf Hansson 

Kind regards
Uffe


[PATCH v3 0/3] Qualcomm Light Pulse Generator

2017-11-14 Thread Bjorn Andersson
This series introduces a generic pattern interface in the LED class and
a driver for the Qualcomm Light Pulse Generator.

Bjorn Andersson (3):
  leds: core: Introduce generic pattern interface
  leds: Add driver for Qualcomm LPG
  DT: leds: Add Qualcomm Light Pulse Generator binding

 Documentation/ABI/testing/sysfs-class-led  |   20 +
 .../devicetree/bindings/leds/leds-qcom-lpg.txt |   66 ++
 drivers/leds/Kconfig   |7 +
 drivers/leds/Makefile  |1 +
 drivers/leds/led-class.c   |  150 +++
 drivers/leds/leds-qcom-lpg.c   | 1232 
 include/linux/leds.h   |   21 +
 7 files changed, 1497 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
 create mode 100644 drivers/leds/leds-qcom-lpg.c

-- 
2.15.0



[PATCH v3 0/3] Qualcomm Light Pulse Generator

2017-11-14 Thread Bjorn Andersson
This series introduces a generic pattern interface in the LED class and
a driver for the Qualcomm Light Pulse Generator.

Bjorn Andersson (3):
  leds: core: Introduce generic pattern interface
  leds: Add driver for Qualcomm LPG
  DT: leds: Add Qualcomm Light Pulse Generator binding

 Documentation/ABI/testing/sysfs-class-led  |   20 +
 .../devicetree/bindings/leds/leds-qcom-lpg.txt |   66 ++
 drivers/leds/Kconfig   |7 +
 drivers/leds/Makefile  |1 +
 drivers/leds/led-class.c   |  150 +++
 drivers/leds/leds-qcom-lpg.c   | 1232 
 include/linux/leds.h   |   21 +
 7 files changed, 1497 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
 create mode 100644 drivers/leds/leds-qcom-lpg.c

-- 
2.15.0



[PATCH v3 3/3] DT: leds: Add Qualcomm Light Pulse Generator binding

2017-11-14 Thread Bjorn Andersson
This adds the binding document describing the three hardware blocks
related to the Light Pulse Generator found in a wide range of Qualcomm
PMICs.

Signed-off-by: Bjorn Andersson 
---

Changes since v2:
- Squashed all things into one node
- Removed quirks from the binding, compatible implies number of channels, their
  configuration etc.
- Binding describes LEDs connected as child nodes
- Support describing multi-channel LEDs
- Change style of the binding document, to match other LED bindings

Changes since v1:
- Dropped custom pattern properties
- Renamed cell-index to qcom,lpg-channel to clarify its purpose

 .../devicetree/bindings/leds/leds-qcom-lpg.txt | 66 ++
 1 file changed, 66 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt

diff --git a/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt 
b/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
new file mode 100644
index ..9cee6f9f543c
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
@@ -0,0 +1,66 @@
+Binding for Qualcomm Light Pulse Generator
+
+The Qualcomm Light Pulse Generator consists of three different hardware blocks;
+a ramp generator with lookup table, the light pulse generator and a three
+channel current sink. These blocks are found in a wide range of Qualcomm PMICs.
+
+Required properties:
+- compatible: one of:
+ "qcom,pm8916-pwm",
+ "qcom,pm8941-lpg",
+ "qcom,pm8994-lpg",
+ "qcom,pmi8994-lpg",
+ "qcom,pmi8998-lpg",
+
+Optional properties:
+- qcom,power-source: power-source used to drive the output, as defined in the
+datasheet. Should be specified if the TRILED block is
+present
+- qcom,dtest: configures the output into an internal test line of the
+ pmic. Specified by a list of u32 pairs, one pair per channel,
+ where each pair denotes the test line to drive and the second
+ configures how the value should be outputed, as defined in the
+ datasheet
+- #pwm-cells: should be 2, see ../pwm/pwm.txt
+
+LED subnodes:
+A set of subnodes can be used to specify LEDs connected to the LPG. Channels
+not associated with a LED are available as pwm channels, see ../pwm/pwm.txt.
+
+Required properties:
+- led-sources: list of channels associated with this LED, starting at 1 for the
+  first LPG channel
+
+Optional properties:
+- label: see Documentation/devicetree/bindings/leds/common.txt
+- default-state: see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+The following example defines a RGB LED attached to the PM8941.
+
+_bus {
+   pm8941@1 {
+   lpg {
+   compatible = "qcom,pm8941-lpg";
+   qcom,power-source = <1>;
+
+   rgb {
+   led-sources = <7 6 5>;
+   };
+   };
+   };
+};
+
+The following example defines the single PWM channel of the PM8916, which can
+be muxed by the MPP4 as a current sink.
+
+_bus {
+   pm8916@1 {
+   pm8916_pwm: pwm {
+   compatible = "qcom,pm8916-pwm";
+
+   #pwm-cells = <2>;
+   };
+   };
+};
-- 
2.15.0



[PATCH v3 2/3] leds: Add driver for Qualcomm LPG

2017-11-14 Thread Bjorn Andersson
The Light Pulse Generator (LPG) is a PWM-block found in a wide range of
PMICs from Qualcomm. It can operate on fixed parameters or based on a
lookup-table, altering the duty cycle over time - which provides the
means for e.g. hardware assisted transitions of LED brightness.

Signed-off-by: Bjorn Andersson 
---
Changes since v2:
- Squash all components into one driver
- Track PWM channels and "logical" LEDs separately
- Support multiple channels to be bound to a single LED
- Per-PMIC compatible, to deal with minor differences (e.g. value to enable
  9bit resolution for PWM)
- TRILED enablement is done atomically for all channels associated with a LED
- LUT sequencer start is done atomically for all channels associated with a LED
- Support PM8916 (PWM only), PM8941, PM8994 and PMI8998 introduced (PMI8994
  still works...)

The multiple channels per LED is currently implemented by assigning the same
pattern and same brightness to all channels. This allows the RGB LED to show
various brighness of white and do patterns in shades of white. But it's
implemented in a way that as we figure out how to expose multi-color LEDs
through the LED framework this new information could easily be applied to the
right channel, and we would have the ability to control the channels
individually.

Changes since v1:
- Remove custom DT properties for patterns
- Extract pattern interface into the LED core

 drivers/leds/Kconfig |7 +
 drivers/leds/Makefile|1 +
 drivers/leds/leds-qcom-lpg.c | 1232 ++
 3 files changed, 1240 insertions(+)
 create mode 100644 drivers/leds/leds-qcom-lpg.c

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 52ea34e337cd..ccc3aa4b2474 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -651,6 +651,13 @@ config LEDS_POWERNV
  To compile this driver as a module, choose 'm' here: the module
  will be called leds-powernv.
 
+config LEDS_QCOM_LPG
+   tristate "LED support for Qualcomm LPG"
+   depends on LEDS_CLASS
+   help
+ This option enables support for the Light Pulse Generator found in a
+ wide variety of Qualcomm PMICs.
+
 config LEDS_SYSCON
bool "LED support for LEDs on system controllers"
depends on LEDS_CLASS=y
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 35980450db9b..2d5149ca429d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_LEDS_MAX77693)   += leds-max77693.o
 obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o
 obj-$(CONFIG_LEDS_LM355x)  += leds-lm355x.o
 obj-$(CONFIG_LEDS_BLINKM)  += leds-blinkm.o
+obj-$(CONFIG_LEDS_QCOM_LPG)+= leds-qcom-lpg.o
 obj-$(CONFIG_LEDS_SYSCON)  += leds-syscon.o
 obj-$(CONFIG_LEDS_MENF21BMC)   += leds-menf21bmc.o
 obj-$(CONFIG_LEDS_KTD2692) += leds-ktd2692.o
diff --git a/drivers/leds/leds-qcom-lpg.c b/drivers/leds/leds-qcom-lpg.c
new file mode 100644
index ..481e940d7e04
--- /dev/null
+++ b/drivers/leds/leds-qcom-lpg.c
@@ -0,0 +1,1232 @@
+/*
+ * Copyright (c) 2017 Linaro Ltd
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define LPG_PATTERN_CONFIG_REG 0x40
+#define LPG_SIZE_CLK_REG   0x41
+#define LPG_PREDIV_CLK_REG 0x42
+#define PWM_TYPE_CONFIG_REG0x43
+#define PWM_VALUE_REG  0x44
+#define PWM_ENABLE_CONTROL_REG 0x46
+#define PWM_SYNC_REG   0x47
+#define LPG_RAMP_DURATION_REG  0x50
+#define LPG_HI_PAUSE_REG   0x52
+#define LPG_LO_PAUSE_REG   0x54
+#define LPG_HI_IDX_REG 0x56
+#define LPG_LO_IDX_REG 0x57
+#define PWM_SEC_ACCESS_REG 0xd0
+#define PWM_DTEST_REG(x)   (0xe2 + (x) - 1)
+
+#define TRI_LED_SRC_SEL0x45
+#define TRI_LED_EN_CTL 0x46
+#define TRI_LED_ATC_CTL0x47
+
+#define LPG_LUT_REG(x) (0x40 + (x) * 2)
+#define RAMP_CONTROL_REG   0xc8
+
+struct lpg_channel;
+struct lpg_data;
+
+/**
+ * struct lpg - LPG device context
+ * @dev:   struct device for LPG device
+ * @map:   regmap for register access
+ * @pwm:   PWM-chip object, if operating in PWM mode
+ * @pwm_9bit_mask: bitmask for enabling 9bit pwm
+ * @lut_base:  base address of the LUT block (optional)
+ * @lut_size:  number of entries in the LUT block
+ * @lut_bitmap:

[PATCH v3 1/3] leds: core: Introduce generic pattern interface

2017-11-14 Thread Bjorn Andersson
Some LED controllers have support for autonomously controlling
brightness over time, according to some preprogrammed pattern or
function.

This adds a new optional operator that LED class drivers can implement
if they support such functionality as well as a new device attribute to
configure the pattern for a given LED.

Signed-off-by: Bjorn Andersson 
---

Changes since v2:
- None

Changes since v1:
- New patch, based on discussions following v1

 Documentation/ABI/testing/sysfs-class-led |  20 
 drivers/leds/led-class.c  | 150 ++
 include/linux/leds.h  |  21 +
 3 files changed, 191 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-led 
b/Documentation/ABI/testing/sysfs-class-led
index 5f67f7ab277b..74a7f5b1f89b 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -61,3 +61,23 @@ Description:
gpio and backlight triggers. In case of the backlight trigger,
it is useful when driving a LED which is intended to indicate
a device in a standby like state.
+
+What:  /sys/class/leds//pattern
+Date:  July 2017
+KernelVersion: 4.14
+Description:
+   Specify a pattern for the LED, for LED hardware that support
+   altering the brightness as a function of time.
+
+   The pattern is given by a series of tuples, of brightness and
+   duration (ms). The LED is expected to traverse the series and
+   each brightness value for the specified duration.
+
+   Additionally a repeat marker ":|" can be appended to the
+   series, which should cause the pattern to be repeated
+   endlessly.
+
+   As LED hardware might have different capabilities and precision
+   the requested pattern might be slighly adjusted by the driver
+   and the resulting pattern of such operation should be returned
+   when this file is read.
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index b0e2d55acbd6..bd630e2ae967 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -74,6 +74,154 @@ static ssize_t max_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(max_brightness);
 
+static ssize_t pattern_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct led_classdev *led_cdev = dev_get_drvdata(dev);
+   struct led_pattern *pattern;
+   size_t offset = 0;
+   size_t count;
+   bool repeat;
+   size_t i;
+   int n;
+
+   if (!led_cdev->pattern_get)
+   return -EOPNOTSUPP;
+
+   pattern = led_cdev->pattern_get(led_cdev, , );
+   if (IS_ERR_OR_NULL(pattern))
+   return PTR_ERR(pattern);
+
+   for (i = 0; i < count; i++) {
+   n = snprintf(buf + offset, PAGE_SIZE - offset, "%d %d",
+pattern[i].brightness, pattern[i].delta_t);
+
+   if (offset + n >= PAGE_SIZE)
+   goto err_nospc;
+
+   offset += n;
+
+   if (i < count - 1)
+   buf[offset++] = ' ';
+   }
+
+   if (repeat) {
+   if (offset + 4 >= PAGE_SIZE)
+   goto err_nospc;
+
+   memcpy(buf + offset, " :|", 3);
+   offset += 3;
+   }
+
+   if (offset + 1 >= PAGE_SIZE)
+   goto err_nospc;
+
+   buf[offset++] = '\n';
+
+   kfree(pattern);
+   return offset;
+
+err_nospc:
+   kfree(pattern);
+   return -ENOSPC;
+}
+
+static ssize_t pattern_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t size)
+{
+   struct led_classdev *led_cdev = dev_get_drvdata(dev);
+   struct led_pattern *pattern = NULL;
+   unsigned long val;
+   char *sbegin;
+   char *elem;
+   char *s;
+   int len = 0;
+   int ret = 0;
+   bool odd = true;
+   bool repeat = false;
+
+   s = sbegin = kstrndup(buf, size, GFP_KERNEL);
+   if (!s)
+   return -ENOMEM;
+
+   /* Trim trailing newline */
+   s[strcspn(s, "\n")] = '\0';
+
+   /* If the remaining string is empty, clear the pattern */
+   if (!s[0]) {
+   ret = led_cdev->pattern_clear(led_cdev);
+   goto out;
+   }
+
+   pattern = kcalloc(size, sizeof(*pattern), GFP_KERNEL);
+   if (!pattern) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   /* Parse out the brightness & delta_t touples and check for repeat */
+   while ((elem = strsep(, " ")) != NULL) {
+   if (!strcmp(elem, ":|")) {
+   repeat = true;
+   break;
+   }
+
+   ret = 

[PATCH v3 3/3] DT: leds: Add Qualcomm Light Pulse Generator binding

2017-11-14 Thread Bjorn Andersson
This adds the binding document describing the three hardware blocks
related to the Light Pulse Generator found in a wide range of Qualcomm
PMICs.

Signed-off-by: Bjorn Andersson 
---

Changes since v2:
- Squashed all things into one node
- Removed quirks from the binding, compatible implies number of channels, their
  configuration etc.
- Binding describes LEDs connected as child nodes
- Support describing multi-channel LEDs
- Change style of the binding document, to match other LED bindings

Changes since v1:
- Dropped custom pattern properties
- Renamed cell-index to qcom,lpg-channel to clarify its purpose

 .../devicetree/bindings/leds/leds-qcom-lpg.txt | 66 ++
 1 file changed, 66 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt

diff --git a/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt 
b/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
new file mode 100644
index ..9cee6f9f543c
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-qcom-lpg.txt
@@ -0,0 +1,66 @@
+Binding for Qualcomm Light Pulse Generator
+
+The Qualcomm Light Pulse Generator consists of three different hardware blocks;
+a ramp generator with lookup table, the light pulse generator and a three
+channel current sink. These blocks are found in a wide range of Qualcomm PMICs.
+
+Required properties:
+- compatible: one of:
+ "qcom,pm8916-pwm",
+ "qcom,pm8941-lpg",
+ "qcom,pm8994-lpg",
+ "qcom,pmi8994-lpg",
+ "qcom,pmi8998-lpg",
+
+Optional properties:
+- qcom,power-source: power-source used to drive the output, as defined in the
+datasheet. Should be specified if the TRILED block is
+present
+- qcom,dtest: configures the output into an internal test line of the
+ pmic. Specified by a list of u32 pairs, one pair per channel,
+ where each pair denotes the test line to drive and the second
+ configures how the value should be outputed, as defined in the
+ datasheet
+- #pwm-cells: should be 2, see ../pwm/pwm.txt
+
+LED subnodes:
+A set of subnodes can be used to specify LEDs connected to the LPG. Channels
+not associated with a LED are available as pwm channels, see ../pwm/pwm.txt.
+
+Required properties:
+- led-sources: list of channels associated with this LED, starting at 1 for the
+  first LPG channel
+
+Optional properties:
+- label: see Documentation/devicetree/bindings/leds/common.txt
+- default-state: see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+The following example defines a RGB LED attached to the PM8941.
+
+_bus {
+   pm8941@1 {
+   lpg {
+   compatible = "qcom,pm8941-lpg";
+   qcom,power-source = <1>;
+
+   rgb {
+   led-sources = <7 6 5>;
+   };
+   };
+   };
+};
+
+The following example defines the single PWM channel of the PM8916, which can
+be muxed by the MPP4 as a current sink.
+
+_bus {
+   pm8916@1 {
+   pm8916_pwm: pwm {
+   compatible = "qcom,pm8916-pwm";
+
+   #pwm-cells = <2>;
+   };
+   };
+};
-- 
2.15.0



[PATCH v3 2/3] leds: Add driver for Qualcomm LPG

2017-11-14 Thread Bjorn Andersson
The Light Pulse Generator (LPG) is a PWM-block found in a wide range of
PMICs from Qualcomm. It can operate on fixed parameters or based on a
lookup-table, altering the duty cycle over time - which provides the
means for e.g. hardware assisted transitions of LED brightness.

Signed-off-by: Bjorn Andersson 
---
Changes since v2:
- Squash all components into one driver
- Track PWM channels and "logical" LEDs separately
- Support multiple channels to be bound to a single LED
- Per-PMIC compatible, to deal with minor differences (e.g. value to enable
  9bit resolution for PWM)
- TRILED enablement is done atomically for all channels associated with a LED
- LUT sequencer start is done atomically for all channels associated with a LED
- Support PM8916 (PWM only), PM8941, PM8994 and PMI8998 introduced (PMI8994
  still works...)

The multiple channels per LED is currently implemented by assigning the same
pattern and same brightness to all channels. This allows the RGB LED to show
various brighness of white and do patterns in shades of white. But it's
implemented in a way that as we figure out how to expose multi-color LEDs
through the LED framework this new information could easily be applied to the
right channel, and we would have the ability to control the channels
individually.

Changes since v1:
- Remove custom DT properties for patterns
- Extract pattern interface into the LED core

 drivers/leds/Kconfig |7 +
 drivers/leds/Makefile|1 +
 drivers/leds/leds-qcom-lpg.c | 1232 ++
 3 files changed, 1240 insertions(+)
 create mode 100644 drivers/leds/leds-qcom-lpg.c

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 52ea34e337cd..ccc3aa4b2474 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -651,6 +651,13 @@ config LEDS_POWERNV
  To compile this driver as a module, choose 'm' here: the module
  will be called leds-powernv.
 
+config LEDS_QCOM_LPG
+   tristate "LED support for Qualcomm LPG"
+   depends on LEDS_CLASS
+   help
+ This option enables support for the Light Pulse Generator found in a
+ wide variety of Qualcomm PMICs.
+
 config LEDS_SYSCON
bool "LED support for LEDs on system controllers"
depends on LEDS_CLASS=y
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 35980450db9b..2d5149ca429d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_LEDS_MAX77693)   += leds-max77693.o
 obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o
 obj-$(CONFIG_LEDS_LM355x)  += leds-lm355x.o
 obj-$(CONFIG_LEDS_BLINKM)  += leds-blinkm.o
+obj-$(CONFIG_LEDS_QCOM_LPG)+= leds-qcom-lpg.o
 obj-$(CONFIG_LEDS_SYSCON)  += leds-syscon.o
 obj-$(CONFIG_LEDS_MENF21BMC)   += leds-menf21bmc.o
 obj-$(CONFIG_LEDS_KTD2692) += leds-ktd2692.o
diff --git a/drivers/leds/leds-qcom-lpg.c b/drivers/leds/leds-qcom-lpg.c
new file mode 100644
index ..481e940d7e04
--- /dev/null
+++ b/drivers/leds/leds-qcom-lpg.c
@@ -0,0 +1,1232 @@
+/*
+ * Copyright (c) 2017 Linaro Ltd
+ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define LPG_PATTERN_CONFIG_REG 0x40
+#define LPG_SIZE_CLK_REG   0x41
+#define LPG_PREDIV_CLK_REG 0x42
+#define PWM_TYPE_CONFIG_REG0x43
+#define PWM_VALUE_REG  0x44
+#define PWM_ENABLE_CONTROL_REG 0x46
+#define PWM_SYNC_REG   0x47
+#define LPG_RAMP_DURATION_REG  0x50
+#define LPG_HI_PAUSE_REG   0x52
+#define LPG_LO_PAUSE_REG   0x54
+#define LPG_HI_IDX_REG 0x56
+#define LPG_LO_IDX_REG 0x57
+#define PWM_SEC_ACCESS_REG 0xd0
+#define PWM_DTEST_REG(x)   (0xe2 + (x) - 1)
+
+#define TRI_LED_SRC_SEL0x45
+#define TRI_LED_EN_CTL 0x46
+#define TRI_LED_ATC_CTL0x47
+
+#define LPG_LUT_REG(x) (0x40 + (x) * 2)
+#define RAMP_CONTROL_REG   0xc8
+
+struct lpg_channel;
+struct lpg_data;
+
+/**
+ * struct lpg - LPG device context
+ * @dev:   struct device for LPG device
+ * @map:   regmap for register access
+ * @pwm:   PWM-chip object, if operating in PWM mode
+ * @pwm_9bit_mask: bitmask for enabling 9bit pwm
+ * @lut_base:  base address of the LUT block (optional)
+ * @lut_size:  number of entries in the LUT block
+ * @lut_bitmap:allocation bitmap for LUT entries
+ 

[PATCH v3 1/3] leds: core: Introduce generic pattern interface

2017-11-14 Thread Bjorn Andersson
Some LED controllers have support for autonomously controlling
brightness over time, according to some preprogrammed pattern or
function.

This adds a new optional operator that LED class drivers can implement
if they support such functionality as well as a new device attribute to
configure the pattern for a given LED.

Signed-off-by: Bjorn Andersson 
---

Changes since v2:
- None

Changes since v1:
- New patch, based on discussions following v1

 Documentation/ABI/testing/sysfs-class-led |  20 
 drivers/leds/led-class.c  | 150 ++
 include/linux/leds.h  |  21 +
 3 files changed, 191 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-led 
b/Documentation/ABI/testing/sysfs-class-led
index 5f67f7ab277b..74a7f5b1f89b 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -61,3 +61,23 @@ Description:
gpio and backlight triggers. In case of the backlight trigger,
it is useful when driving a LED which is intended to indicate
a device in a standby like state.
+
+What:  /sys/class/leds//pattern
+Date:  July 2017
+KernelVersion: 4.14
+Description:
+   Specify a pattern for the LED, for LED hardware that support
+   altering the brightness as a function of time.
+
+   The pattern is given by a series of tuples, of brightness and
+   duration (ms). The LED is expected to traverse the series and
+   each brightness value for the specified duration.
+
+   Additionally a repeat marker ":|" can be appended to the
+   series, which should cause the pattern to be repeated
+   endlessly.
+
+   As LED hardware might have different capabilities and precision
+   the requested pattern might be slighly adjusted by the driver
+   and the resulting pattern of such operation should be returned
+   when this file is read.
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index b0e2d55acbd6..bd630e2ae967 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -74,6 +74,154 @@ static ssize_t max_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(max_brightness);
 
+static ssize_t pattern_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct led_classdev *led_cdev = dev_get_drvdata(dev);
+   struct led_pattern *pattern;
+   size_t offset = 0;
+   size_t count;
+   bool repeat;
+   size_t i;
+   int n;
+
+   if (!led_cdev->pattern_get)
+   return -EOPNOTSUPP;
+
+   pattern = led_cdev->pattern_get(led_cdev, , );
+   if (IS_ERR_OR_NULL(pattern))
+   return PTR_ERR(pattern);
+
+   for (i = 0; i < count; i++) {
+   n = snprintf(buf + offset, PAGE_SIZE - offset, "%d %d",
+pattern[i].brightness, pattern[i].delta_t);
+
+   if (offset + n >= PAGE_SIZE)
+   goto err_nospc;
+
+   offset += n;
+
+   if (i < count - 1)
+   buf[offset++] = ' ';
+   }
+
+   if (repeat) {
+   if (offset + 4 >= PAGE_SIZE)
+   goto err_nospc;
+
+   memcpy(buf + offset, " :|", 3);
+   offset += 3;
+   }
+
+   if (offset + 1 >= PAGE_SIZE)
+   goto err_nospc;
+
+   buf[offset++] = '\n';
+
+   kfree(pattern);
+   return offset;
+
+err_nospc:
+   kfree(pattern);
+   return -ENOSPC;
+}
+
+static ssize_t pattern_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t size)
+{
+   struct led_classdev *led_cdev = dev_get_drvdata(dev);
+   struct led_pattern *pattern = NULL;
+   unsigned long val;
+   char *sbegin;
+   char *elem;
+   char *s;
+   int len = 0;
+   int ret = 0;
+   bool odd = true;
+   bool repeat = false;
+
+   s = sbegin = kstrndup(buf, size, GFP_KERNEL);
+   if (!s)
+   return -ENOMEM;
+
+   /* Trim trailing newline */
+   s[strcspn(s, "\n")] = '\0';
+
+   /* If the remaining string is empty, clear the pattern */
+   if (!s[0]) {
+   ret = led_cdev->pattern_clear(led_cdev);
+   goto out;
+   }
+
+   pattern = kcalloc(size, sizeof(*pattern), GFP_KERNEL);
+   if (!pattern) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   /* Parse out the brightness & delta_t touples and check for repeat */
+   while ((elem = strsep(, " ")) != NULL) {
+   if (!strcmp(elem, ":|")) {
+   repeat = true;
+   break;
+   }
+
+   ret = kstrtoul(elem, 10, );
+ 

Re: [PATCH v6 03/11] mm, x86: Add support for eXclusive Page Frame Ownership (XPFO)

2017-11-14 Thread Dave Hansen
On 11/14/2017 07:44 PM, Matthew Wilcox wrote:
> On Mon, Nov 13, 2017 at 02:46:25PM -0800, Dave Hansen wrote:
>> On 11/13/2017 02:20 PM, Dave Hansen wrote:
>>> On 11/09/2017 05:09 PM, Tycho Andersen wrote:
 which I guess is from the additional flags in grow_dev_page() somewhere 
 down
 the stack. Anyway... it seems this is a kernel allocation that's using
 MIGRATE_MOVABLE, so perhaps we need some more fine tuned heuristic than 
 just
 all MOVABLE allocations are un-mapped via xpfo, and all the others are 
 mapped.

 Do you have any ideas?
>>>
>>> It still has to do a kmap() or kmap_atomic() to be able to access it.  I
>>> thought you hooked into that.  Why isn't that path getting hit for these?
>>
>> Oh, this looks to be accessing data mapped by a buffer_head.  It
>> (rudely) accesses data via:
>>
>> void set_bh_page(struct buffer_head *bh,
>> ...
>>  bh->b_data = page_address(page) + offset;
> 
> We don't need to kmap in order to access MOVABLE allocations.  kmap is
> only needed for HIGHMEM allocations.  So there's nothing wrong with ext4
> or set_bh_page().

Yeah, it's definitely not _buggy_.

Although, I do wonder what we should do about these for XPFO.  Should we
just stick a kmap() in there and comment it?  What we really need is a
mechanism to say "use this as a kernel page" and "stop using this as a
kernel page".  kmap() does that... kinda.  It's not a perfect fit, but
it's pretty close.



Re: [PATCH v6 03/11] mm, x86: Add support for eXclusive Page Frame Ownership (XPFO)

2017-11-14 Thread Dave Hansen
On 11/14/2017 07:44 PM, Matthew Wilcox wrote:
> On Mon, Nov 13, 2017 at 02:46:25PM -0800, Dave Hansen wrote:
>> On 11/13/2017 02:20 PM, Dave Hansen wrote:
>>> On 11/09/2017 05:09 PM, Tycho Andersen wrote:
 which I guess is from the additional flags in grow_dev_page() somewhere 
 down
 the stack. Anyway... it seems this is a kernel allocation that's using
 MIGRATE_MOVABLE, so perhaps we need some more fine tuned heuristic than 
 just
 all MOVABLE allocations are un-mapped via xpfo, and all the others are 
 mapped.

 Do you have any ideas?
>>>
>>> It still has to do a kmap() or kmap_atomic() to be able to access it.  I
>>> thought you hooked into that.  Why isn't that path getting hit for these?
>>
>> Oh, this looks to be accessing data mapped by a buffer_head.  It
>> (rudely) accesses data via:
>>
>> void set_bh_page(struct buffer_head *bh,
>> ...
>>  bh->b_data = page_address(page) + offset;
> 
> We don't need to kmap in order to access MOVABLE allocations.  kmap is
> only needed for HIGHMEM allocations.  So there's nothing wrong with ext4
> or set_bh_page().

Yeah, it's definitely not _buggy_.

Although, I do wonder what we should do about these for XPFO.  Should we
just stick a kmap() in there and comment it?  What we really need is a
mechanism to say "use this as a kernel page" and "stop using this as a
kernel page".  kmap() does that... kinda.  It's not a perfect fit, but
it's pretty close.



[PATCH] remoteproc: qcom: Fix error handling paths in order to avoid memory leaks

2017-11-14 Thread Christophe JAILLET
In case of error returned by 'q6v5_xfer_mem_ownership', we must free
some resources before returning.

In 'q6v5_mpss_init_image()', add a new label to undo a previous
'dma_alloc_attrs()'.
In 'q6v5_mpss_load()', re-use the already existing error handling code to
undo a previous 'request_firmware()', as already done in the other error
handling paths of the function.

Signed-off-by: Christophe JAILLET 
---
We could certainly also propagate the error code returned by
'q6v5_xfer_mem_ownership()' instead of returning an unconditional -EAGAIN.
Not sure of the potential impacts, so I've left it as-is.
---
 drivers/remoteproc/qcom_q6v5_pil.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_pil.c 
b/drivers/remoteproc/qcom_q6v5_pil.c
index a019796c363a..8a3fa2bcc9f6 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -580,7 +580,8 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const 
struct firmware *fw)
if (ret) {
dev_err(qproc->dev,
"assigning Q6 access to metadata failed: %d\n", ret);
-   return -EAGAIN;
+   ret = -EAGAIN;
+   goto free_dma_attrs;
}
 
writel(phys, qproc->rmb_base + RMB_PMI_META_DATA_REG);
@@ -599,6 +600,7 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const 
struct firmware *fw)
dev_warn(qproc->dev,
 "mdt buffer not reclaimed system may become 
unstable\n");
 
+free_dma_attrs:
dma_free_attrs(qproc->dev, fw->size, ptr, phys, dma_attrs);
 
return ret < 0 ? ret : 0;
@@ -712,7 +714,8 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
if (ret) {
dev_err(qproc->dev,
"assigning Q6 access to mpss memory failed: %d\n", ret);
-   return -EAGAIN;
+   ret = -EAGAIN;
+   goto release_firmware;
}
 
boot_addr = relocate ? qproc->mpss_phys : min_addr;
-- 
2.14.1



[PATCH] remoteproc: qcom: Fix error handling paths in order to avoid memory leaks

2017-11-14 Thread Christophe JAILLET
In case of error returned by 'q6v5_xfer_mem_ownership', we must free
some resources before returning.

In 'q6v5_mpss_init_image()', add a new label to undo a previous
'dma_alloc_attrs()'.
In 'q6v5_mpss_load()', re-use the already existing error handling code to
undo a previous 'request_firmware()', as already done in the other error
handling paths of the function.

Signed-off-by: Christophe JAILLET 
---
We could certainly also propagate the error code returned by
'q6v5_xfer_mem_ownership()' instead of returning an unconditional -EAGAIN.
Not sure of the potential impacts, so I've left it as-is.
---
 drivers/remoteproc/qcom_q6v5_pil.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_pil.c 
b/drivers/remoteproc/qcom_q6v5_pil.c
index a019796c363a..8a3fa2bcc9f6 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -580,7 +580,8 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const 
struct firmware *fw)
if (ret) {
dev_err(qproc->dev,
"assigning Q6 access to metadata failed: %d\n", ret);
-   return -EAGAIN;
+   ret = -EAGAIN;
+   goto free_dma_attrs;
}
 
writel(phys, qproc->rmb_base + RMB_PMI_META_DATA_REG);
@@ -599,6 +600,7 @@ static int q6v5_mpss_init_image(struct q6v5 *qproc, const 
struct firmware *fw)
dev_warn(qproc->dev,
 "mdt buffer not reclaimed system may become 
unstable\n");
 
+free_dma_attrs:
dma_free_attrs(qproc->dev, fw->size, ptr, phys, dma_attrs);
 
return ret < 0 ? ret : 0;
@@ -712,7 +714,8 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
if (ret) {
dev_err(qproc->dev,
"assigning Q6 access to mpss memory failed: %d\n", ret);
-   return -EAGAIN;
+   ret = -EAGAIN;
+   goto release_firmware;
}
 
boot_addr = relocate ? qproc->mpss_phys : min_addr;
-- 
2.14.1



Re: [PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-11-14 Thread Dave Young
On 11/15/17 at 01:47pm, Baoquan He wrote:
> Hi Dave,
> 
> Thanks for your effort to push this into upstream. While I have one
> concern, please see the inline comments.
> 
> On 10/24/17 at 01:31pm, Dave Young wrote:
> > Now crashkernel=X will fail if there's not enough memory at low region
> > (below 896M) when trying to reserve large memory size.  One can use
> > crashkernel=xM,high to reserve it at high region (>4G) but it is more
> > convinient to improve crashkernel=X to: 
> > 
> >  - First try to reserve X below 896M (for being compatible with old
> >kexec-tools).
> >  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
> >  - If fails, try to reserve X from MAXMEM top down.
> > 
> > It's more transparent and user-friendly.
> > 
> > If crashkernel is large and the reserved is beyond 896M, old kexec-tools
> > is not compatible with new kernel because old kexec-tools can not load
> > kernel at high memory region, there was an old discussion below:
> > https://lkml.org/lkml/2013/10/15/601
> > 
> > But actually the behavior is consistent during my test. Suppose
> > old kernel fail to reserve memory at low areas, kdump does not
> > work because no meory reserved. With this patch, suppose new kernel
> > successfully reserved memory at high areas, old kexec-tools still fail
> > to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
> > worry about the compatibility.
> > 
> > Here is the test result (kexec-tools 2.0.2, no high memory load
> > support):
> > Crashkernel over 4G:
> > # cat /proc/iomem|grep Crash
> >   be00-cdff : Crash kernel
> >   21300-21eff : Crash kernel
> > # ./kexec  -p /boot/vmlinuz-`uname -r`
> > Memory for crashkernel is not reserved
> > Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
> > Then try loading kdump kernel
> > 
> > crashkernel: 896M-4G:
> > # cat /proc/iomem|grep Crash
> >   9600-cdef : Crash kernel
> > # ./kexec -p /boot/vmlinuz-4.14.0-rc4+
> > ELF core (kcore) parse failed
> > Cannot load /boot/vmlinuz-4.14.0-rc4+
> > 
> > Signed-off-by: Dave Young 
> > ---
> >  arch/x86/kernel/setup.c |   16 
> >  1 file changed, 16 insertions(+)
> > 
> > --- linux-x86.orig/arch/x86/kernel/setup.c
> > +++ linux-x86/arch/x86/kernel/setup.c
> > @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
> > high ? CRASH_ADDR_HIGH_MAX
> >  : CRASH_ADDR_LOW_MAX,
> > crash_size, CRASH_ALIGN);
> > +#ifdef CONFIG_X86_64
> > +   /*
> > +* crashkernel=X reserve below 896M fails? Try below 4G
> > +*/
> > +   if (!high && !crash_base)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   (1ULL << 32),
> > +   crash_size, CRASH_ALIGN);
> > +   /*
> > +* crashkernel=X reserve below 4G fails? Try MAXMEM
> > +*/
> > +   if (!high && !crash_base)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   CRASH_ADDR_HIGH_MAX,
> > +   crash_size, CRASH_ALIGN);
> 
> For kdump, most of systems are x86 64. If both Yinghai and Vivek have no
> objection to search an available region of crash_size above 896M
> naturely, why don't we search it with function
> __memblock_find_range_bottom_up(). It can search from below 896M to
> above 4G, almost the same as the change you have made currently. Mainly
> the code will be much simpler.
> 
> The several times of searching looks not good and a little confusing.
> 
> What do you think?

Bao, thanks for the comment, it might be a good idea, will explore this
way see if there are risks to go with your suggestion.

> 
> Thanks
> Baoquan
> 
> > +#endif
> > if (!crash_base) {
> > pr_info("crashkernel reservation failed - No suitable 
> > area found.\n");
> > return;
> > 
> > 


Re: [PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-11-14 Thread Dave Young
On 11/15/17 at 01:47pm, Baoquan He wrote:
> Hi Dave,
> 
> Thanks for your effort to push this into upstream. While I have one
> concern, please see the inline comments.
> 
> On 10/24/17 at 01:31pm, Dave Young wrote:
> > Now crashkernel=X will fail if there's not enough memory at low region
> > (below 896M) when trying to reserve large memory size.  One can use
> > crashkernel=xM,high to reserve it at high region (>4G) but it is more
> > convinient to improve crashkernel=X to: 
> > 
> >  - First try to reserve X below 896M (for being compatible with old
> >kexec-tools).
> >  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
> >  - If fails, try to reserve X from MAXMEM top down.
> > 
> > It's more transparent and user-friendly.
> > 
> > If crashkernel is large and the reserved is beyond 896M, old kexec-tools
> > is not compatible with new kernel because old kexec-tools can not load
> > kernel at high memory region, there was an old discussion below:
> > https://lkml.org/lkml/2013/10/15/601
> > 
> > But actually the behavior is consistent during my test. Suppose
> > old kernel fail to reserve memory at low areas, kdump does not
> > work because no meory reserved. With this patch, suppose new kernel
> > successfully reserved memory at high areas, old kexec-tools still fail
> > to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
> > worry about the compatibility.
> > 
> > Here is the test result (kexec-tools 2.0.2, no high memory load
> > support):
> > Crashkernel over 4G:
> > # cat /proc/iomem|grep Crash
> >   be00-cdff : Crash kernel
> >   21300-21eff : Crash kernel
> > # ./kexec  -p /boot/vmlinuz-`uname -r`
> > Memory for crashkernel is not reserved
> > Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
> > Then try loading kdump kernel
> > 
> > crashkernel: 896M-4G:
> > # cat /proc/iomem|grep Crash
> >   9600-cdef : Crash kernel
> > # ./kexec -p /boot/vmlinuz-4.14.0-rc4+
> > ELF core (kcore) parse failed
> > Cannot load /boot/vmlinuz-4.14.0-rc4+
> > 
> > Signed-off-by: Dave Young 
> > ---
> >  arch/x86/kernel/setup.c |   16 
> >  1 file changed, 16 insertions(+)
> > 
> > --- linux-x86.orig/arch/x86/kernel/setup.c
> > +++ linux-x86/arch/x86/kernel/setup.c
> > @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
> > high ? CRASH_ADDR_HIGH_MAX
> >  : CRASH_ADDR_LOW_MAX,
> > crash_size, CRASH_ALIGN);
> > +#ifdef CONFIG_X86_64
> > +   /*
> > +* crashkernel=X reserve below 896M fails? Try below 4G
> > +*/
> > +   if (!high && !crash_base)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   (1ULL << 32),
> > +   crash_size, CRASH_ALIGN);
> > +   /*
> > +* crashkernel=X reserve below 4G fails? Try MAXMEM
> > +*/
> > +   if (!high && !crash_base)
> > +   crash_base = memblock_find_in_range(CRASH_ALIGN,
> > +   CRASH_ADDR_HIGH_MAX,
> > +   crash_size, CRASH_ALIGN);
> 
> For kdump, most of systems are x86 64. If both Yinghai and Vivek have no
> objection to search an available region of crash_size above 896M
> naturely, why don't we search it with function
> __memblock_find_range_bottom_up(). It can search from below 896M to
> above 4G, almost the same as the change you have made currently. Mainly
> the code will be much simpler.
> 
> The several times of searching looks not good and a little confusing.
> 
> What do you think?

Bao, thanks for the comment, it might be a good idea, will explore this
way see if there are risks to go with your suggestion.

> 
> Thanks
> Baoquan
> 
> > +#endif
> > if (!crash_base) {
> > pr_info("crashkernel reservation failed - No suitable 
> > area found.\n");
> > return;
> > 
> > 


Re: [PATCH 1/2] x86,kvm: move qemu/guest FPU switching out to vcpu_run

2017-11-14 Thread quan.x...@gmail.com



On 2017/11/15 05:54, r...@redhat.com wrote:

From: Rik van Riel 

Currently, every time a VCPU is scheduled out, the host kernel will
first save the guest FPU/xstate context, then load the qemu userspace
FPU context, only to then immediately save the qemu userspace FPU
context back to memory. When scheduling in a VCPU, the same extraneous
FPU loads and saves are done.


Rik, be careful with VM migration. with you patch, I don't think you 
could load fpu/xstate

  context accurately after VM migration.


Quan
Alibaba Cloud

This could be avoided by moving from a model where the guest FPU is
loaded and stored with preemption disabled, to a model where the
qemu userspace FPU is swapped out for the guest FPU context for
the duration of the KVM_RUN ioctl.

This is done under the VCPU mutex, which is also taken when other
tasks inspect the VCPU FPU context, so the code should already be
safe for this change. That should come as no surprise, given that
s390 already has this optimization.

No performance changes were detected in quick ping-pong tests on
my 4 socket system, which is expected since an FPU+xstate load is
on the order of 0.1us, while ping-ponging between CPUs is on the
order of 20us, and somewhat noisy.

There may be other tests where performance changes are noticeable.

Signed-off-by: Rik van Riel 
Suggested-by: Christian Borntraeger 
---
  arch/x86/include/asm/kvm_host.h | 13 +
  arch/x86/kvm/x86.c  | 34 +-
  include/linux/kvm_host.h|  2 +-
  3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d7d856b2d89..ffe54958491f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
  
+	/*

+* QEMU userspace and the guest each have their own FPU state.
+* In vcpu_run, we switch between the user and guest FPU contexts.
+* While running a VCPU, the VCPU thread will have the guest FPU
+* context.
+*
+* Note that while the PKRU state lives inside the fpu registers,
+* it is switched out separately at VMENTER and VMEXIT time. The
+* "guest_fpu" state here contains the guest FPU context, with the
+* host PRKU bits.
+*/
+   struct fpu user_fpu;
struct fpu guest_fpu;
+
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03869eb7fcd6..aad5181ed4e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2917,7 +2917,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(>kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
-   kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
  }
  
@@ -5228,13 +5227,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
  
  static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)

  {
-   preempt_disable();
-   kvm_load_guest_fpu(emul_to_vcpu(ctxt));
  }
  
  static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)

  {
-   preempt_enable();
  }
  
  static int emulator_intercept(struct x86_emulate_ctxt *ctxt,

@@ -6908,7 +6904,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
  
  	kvm_x86_ops->prepare_guest_switch(vcpu);

-   kvm_load_guest_fpu(vcpu);
  
  	/*

 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -7255,12 +7250,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
}
}
  
+	kvm_load_guest_fpu(vcpu);

+
if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = 
vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
r = cui(vcpu);
if (r <= 0)
-   goto out;
+   goto out_fpu;
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
  
@@ -7269,6 +7266,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

else
r = vcpu_run(vcpu);
  
+out_fpu:

+   kvm_put_guest_fpu(vcpu);
  out:
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
@@ -7663,32 +7662,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
  }
  
+/* Swap (qemu) user FPU context for the guest FPU context. */

  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
-   if (vcpu->guest_fpu_loaded)
-   return;
-
-   /*
-* Restore all possible states in the guest,
-* and assume host would use all available bits.
-* 

Re: [PATCH 1/2] x86,kvm: move qemu/guest FPU switching out to vcpu_run

2017-11-14 Thread quan.x...@gmail.com



On 2017/11/15 05:54, r...@redhat.com wrote:

From: Rik van Riel 

Currently, every time a VCPU is scheduled out, the host kernel will
first save the guest FPU/xstate context, then load the qemu userspace
FPU context, only to then immediately save the qemu userspace FPU
context back to memory. When scheduling in a VCPU, the same extraneous
FPU loads and saves are done.


Rik, be careful with VM migration. with you patch, I don't think you 
could load fpu/xstate

  context accurately after VM migration.


Quan
Alibaba Cloud

This could be avoided by moving from a model where the guest FPU is
loaded and stored with preemption disabled, to a model where the
qemu userspace FPU is swapped out for the guest FPU context for
the duration of the KVM_RUN ioctl.

This is done under the VCPU mutex, which is also taken when other
tasks inspect the VCPU FPU context, so the code should already be
safe for this change. That should come as no surprise, given that
s390 already has this optimization.

No performance changes were detected in quick ping-pong tests on
my 4 socket system, which is expected since an FPU+xstate load is
on the order of 0.1us, while ping-ponging between CPUs is on the
order of 20us, and somewhat noisy.

There may be other tests where performance changes are noticeable.

Signed-off-by: Rik van Riel 
Suggested-by: Christian Borntraeger 
---
  arch/x86/include/asm/kvm_host.h | 13 +
  arch/x86/kvm/x86.c  | 34 +-
  include/linux/kvm_host.h|  2 +-
  3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d7d856b2d89..ffe54958491f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
  
+	/*

+* QEMU userspace and the guest each have their own FPU state.
+* In vcpu_run, we switch between the user and guest FPU contexts.
+* While running a VCPU, the VCPU thread will have the guest FPU
+* context.
+*
+* Note that while the PKRU state lives inside the fpu registers,
+* it is switched out separately at VMENTER and VMEXIT time. The
+* "guest_fpu" state here contains the guest FPU context, with the
+* host PRKU bits.
+*/
+   struct fpu user_fpu;
struct fpu guest_fpu;
+
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03869eb7fcd6..aad5181ed4e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2917,7 +2917,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(>kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
-   kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
  }
  
@@ -5228,13 +5227,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
  
  static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)

  {
-   preempt_disable();
-   kvm_load_guest_fpu(emul_to_vcpu(ctxt));
  }
  
  static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)

  {
-   preempt_enable();
  }
  
  static int emulator_intercept(struct x86_emulate_ctxt *ctxt,

@@ -6908,7 +6904,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
  
  	kvm_x86_ops->prepare_guest_switch(vcpu);

-   kvm_load_guest_fpu(vcpu);
  
  	/*

 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -7255,12 +7250,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
}
}
  
+	kvm_load_guest_fpu(vcpu);

+
if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = 
vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
r = cui(vcpu);
if (r <= 0)
-   goto out;
+   goto out_fpu;
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
  
@@ -7269,6 +7266,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

else
r = vcpu_run(vcpu);
  
+out_fpu:

+   kvm_put_guest_fpu(vcpu);
  out:
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
@@ -7663,32 +7662,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
  }
  
+/* Swap (qemu) user FPU context for the guest FPU context. */

  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
-   if (vcpu->guest_fpu_loaded)
-   return;
-
-   /*
-* Restore all possible states in the guest,
-* and assume host would use all available bits.
-* Guest xcr0 would be loaded later.
-*/
-   

Re: Adding rseq tree to -next

2017-11-14 Thread Mathieu Desnoyers
- On Nov 14, 2017, at 11:38 PM, Stephen Rothwell s...@canb.auug.org.au 
wrote:

> Hi Mathieu,
> 
> On Wed, 15 Nov 2017 01:22:04 + (UTC) Mathieu Desnoyers
>  wrote:
>>
>> - On Nov 14, 2017, at 7:15 PM, Stephen Rothwell s...@canb.auug.org.au 
>> wrote:
>> 
>> > On Tue, 14 Nov 2017 23:54:06 + (UTC) Mathieu Desnoyers
>> >  wrote:
>> >>
>> >> Would it be possible to add the "rseq" tree to -next for testing ?
>> >> 
>> >> I prepared a branch at:
>> >> 
>> >> https://git.kernel.org/pub/scm/linux/kernel/git/rseq/linux-rseq.git
>> >> branch: rseq/for-next
>> > 
>> > I try not to add new trees during the merge window (the only exceptions
>> > are for trees that will remain empty until after the merge window
>> > closes or trees only containing material for the current merge window -
>> > and in that case it is a bit late and a pain if it interacts with
>> > anything else).
>> > 
>> > I will add it after -rc1 is released, though.  Please remind me if I
>> > forget.
>> 
>> No worries, sorry for the short notice. I'll try to do a merge
>> attempt into -next on my end before sending to Linus then.
> 
> OK, since you intend to ask Linus to merge it during this merge window,
> I have added it from today (I hope I don't regret it too much :-)).

Thanks! I did attempt to do the merge with -next myself, and the
conflicts were pretty much trivial to handle. One I have not seen
in your messages so far is the comment added to mmdrop() on x86. The
function moves from a static inline in a header to a standard function
(in a C file), so the comment should move accordingly.

Thank you,

Mathieu

> 
> Thanks for adding your subsystem tree as a participant of linux-next.  As
> you may know, this is not a judgement of your code.  The purpose of
> linux-next is for integration testing and to lower the impact of
> conflicts between subsystems in the next merge window.
> 
> You will need to ensure that the patches/commits in your tree/series have
> been:
> * submitted under GPL v2 (or later) and include the Contributor's
>Signed-off-by,
> * posted to the relevant mailing list,
> * reviewed by you (or another maintainer of your subsystem tree),
> * successfully unit tested, and
> * destined for the current or next Linux merge window.
> 
> Basically, this should be just what you would send to Linus (or ask him
> to fetch).  It is allowed to be rebased if you deem it necessary.
> 
> --
> Cheers,
> Stephen Rothwell
> s...@canb.auug.org.au

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


Re: Adding rseq tree to -next

2017-11-14 Thread Mathieu Desnoyers
- On Nov 14, 2017, at 11:38 PM, Stephen Rothwell s...@canb.auug.org.au 
wrote:

> Hi Mathieu,
> 
> On Wed, 15 Nov 2017 01:22:04 + (UTC) Mathieu Desnoyers
>  wrote:
>>
>> - On Nov 14, 2017, at 7:15 PM, Stephen Rothwell s...@canb.auug.org.au 
>> wrote:
>> 
>> > On Tue, 14 Nov 2017 23:54:06 + (UTC) Mathieu Desnoyers
>> >  wrote:
>> >>
>> >> Would it be possible to add the "rseq" tree to -next for testing ?
>> >> 
>> >> I prepared a branch at:
>> >> 
>> >> https://git.kernel.org/pub/scm/linux/kernel/git/rseq/linux-rseq.git
>> >> branch: rseq/for-next
>> > 
>> > I try not to add new trees during the merge window (the only exceptions
>> > are for trees that will remain empty until after the merge window
>> > closes or trees only containing material for the current merge window -
>> > and in that case it is a bit late and a pain if it interacts with
>> > anything else).
>> > 
>> > I will add it after -rc1 is released, though.  Please remind me if I
>> > forget.
>> 
>> No worries, sorry for the short notice. I'll try to do a merge
>> attempt into -next on my end before sending to Linus then.
> 
> OK, since you intend to ask Linus to merge it during this merge window,
> I have added it from today (I hope I don't regret it too much :-)).

Thanks! I did attempt to do the merge with -next myself, and the
conflicts were pretty much trivial to handle. One I have not seen
in your messages so far is the comment added to mmdrop() on x86. The
function moves from a static inline in a header to a standard function
(in a C file), so the comment should move accordingly.

Thank you,

Mathieu

> 
> Thanks for adding your subsystem tree as a participant of linux-next.  As
> you may know, this is not a judgement of your code.  The purpose of
> linux-next is for integration testing and to lower the impact of
> conflicts between subsystems in the next merge window.
> 
> You will need to ensure that the patches/commits in your tree/series have
> been:
> * submitted under GPL v2 (or later) and include the Contributor's
>Signed-off-by,
> * posted to the relevant mailing list,
> * reviewed by you (or another maintainer of your subsystem tree),
> * successfully unit tested, and
> * destined for the current or next Linux merge window.
> 
> Basically, this should be just what you would send to Linus (or ask him
> to fetch).  It is allowed to be rebased if you deem it necessary.
> 
> --
> Cheers,
> Stephen Rothwell
> s...@canb.auug.org.au

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


Re: [PATCH 00/24] staging: ccree: more cleanup patches

2017-11-14 Thread Gilad Ben-Yossef
On Tue, Nov 14, 2017 at 11:48 AM, Dan Carpenter
 wrote:
> On Tue, Nov 14, 2017 at 11:33:20AM +0200, Gilad Ben-Yossef wrote:
>> On Mon, Nov 13, 2017 at 8:33 PM, Dan Carpenter  
>> wrote:
>> > These cleanups look nice.  Thanks.
>> >
>> > I hope you do a mass remove of likely/unlikely in a patch soon.
>> > Whenever, I see one of those in a + line I always have to remind myself
>> > that you're planning to do it in a later patch.
>> >
>>
>> So, a question about that - there indeed seems to be an inflation of
>> likely/unlikely in the ccree driver, but
>> what stopped me from removing them was that I found out I don't have a
>> clue about when it's a good idea
>> to use them and when it isn't (obviously in places where you know the
>> probable code flow of course).
>>
>> Any hints?
>
> They should only be included if benchmarking shows that it makes a
> difference.  I think they need to be about 100 right predictions to 1
> wrong prediction on a fast path.  So remove them all and add them back
> one at a time.
>

OK, that makes a lot of sense.

Thanks,
Gilad


-- 
Gilad Ben-Yossef
Chief Coffee Drinker

"If you take a class in large-scale robotics, can you end up in a
situation where the homework eats your dog?"
 -- Jean-Baptiste Queru


Re: [PATCH 00/24] staging: ccree: more cleanup patches

2017-11-14 Thread Gilad Ben-Yossef
On Tue, Nov 14, 2017 at 11:48 AM, Dan Carpenter
 wrote:
> On Tue, Nov 14, 2017 at 11:33:20AM +0200, Gilad Ben-Yossef wrote:
>> On Mon, Nov 13, 2017 at 8:33 PM, Dan Carpenter  
>> wrote:
>> > These cleanups look nice.  Thanks.
>> >
>> > I hope you do a mass remove of likely/unlikely in a patch soon.
>> > Whenever, I see one of those in a + line I always have to remind myself
>> > that you're planning to do it in a later patch.
>> >
>>
>> So, a question about that - there indeed seems to be an inflation of
>> likely/unlikely in the ccree driver, but
>> what stopped me from removing them was that I found out I don't have a
>> clue about when it's a good idea
>> to use them and when it isn't (obviously in places where you know the
>> probable code flow of course).
>>
>> Any hints?
>
> They should only be included if benchmarking shows that it makes a
> difference.  I think they need to be about 100 right predictions to 1
> wrong prediction on a fast path.  So remove them all and add them back
> one at a time.
>

OK, that makes a lot of sense.

Thanks,
Gilad


-- 
Gilad Ben-Yossef
Chief Coffee Drinker

"If you take a class in large-scale robotics, can you end up in a
situation where the homework eats your dog?"
 -- Jean-Baptiste Queru


[PATCH] Input: ALPS - fix DualPoint flag for 74 03 28 devices

2017-11-14 Thread Aaron Ma
There is a regression of commit 4a646580f793 ("Input: ALPS - fix
two-finger scroll breakage"), ALPS device fails with log:

psmouse serio1: alps: Rejected trackstick packet from non DualPoint device

ALPS device with id "74 03 28" report OTP[0] data 0xCE after
commit 4a646580f793, after restore the OTP reading order,
it becomes to 0x10 as before and reports the right flag.

Fixes: 4a646580f793 ("Input: ALPS - fix two-finger scroll breakage")
Cc: 
Signed-off-by: Aaron Ma 
---
 drivers/input/mouse/alps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 579b899add26..c59b8f7ca2fc 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2562,8 +2562,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse 
*psmouse,
 
memset(otp, 0, sizeof(otp));
 
-   if (alps_get_otp_values_ss4_v2(psmouse, 1, [1][0]) ||
-   alps_get_otp_values_ss4_v2(psmouse, 0, [0][0]))
+   if (alps_get_otp_values_ss4_v2(psmouse, 0, [0][0]) ||
+   alps_get_otp_values_ss4_v2(psmouse, 1, [1][0]))
return -1;
 
alps_update_device_area_ss4_v2(otp, priv);
-- 
2.13.6



[PATCH] Input: ALPS - fix DualPoint flag for 74 03 28 devices

2017-11-14 Thread Aaron Ma
There is a regression of commit 4a646580f793 ("Input: ALPS - fix
two-finger scroll breakage"), ALPS device fails with log:

psmouse serio1: alps: Rejected trackstick packet from non DualPoint device

ALPS device with id "74 03 28" report OTP[0] data 0xCE after
commit 4a646580f793, after restore the OTP reading order,
it becomes to 0x10 as before and reports the right flag.

Fixes: 4a646580f793 ("Input: ALPS - fix two-finger scroll breakage")
Cc: 
Signed-off-by: Aaron Ma 
---
 drivers/input/mouse/alps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 579b899add26..c59b8f7ca2fc 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2562,8 +2562,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse 
*psmouse,
 
memset(otp, 0, sizeof(otp));
 
-   if (alps_get_otp_values_ss4_v2(psmouse, 1, [1][0]) ||
-   alps_get_otp_values_ss4_v2(psmouse, 0, [0][0]))
+   if (alps_get_otp_values_ss4_v2(psmouse, 0, [0][0]) ||
+   alps_get_otp_values_ss4_v2(psmouse, 1, [1][0]))
return -1;
 
alps_update_device_area_ss4_v2(otp, priv);
-- 
2.13.6



Re: [RFC PATCH for 4.15 00/24] Restartable sequences and CPU op vector v11

2017-11-14 Thread Mathieu Desnoyers
- On Nov 14, 2017, at 11:12 PM, Andy Lutomirski l...@amacapital.net wrote:

>> On Nov 14, 2017, at 1:32 PM, Mathieu Desnoyers 
>> 
>> wrote:
>>
>> - On Nov 14, 2017, at 4:15 PM, Andy Lutomirski l...@amacapital.net wrote:
>>
>>
>> One thing I kept however that diverge from your recommendation is the
>> "sign" parameter to the rseq syscall. I prefer this flexible
>> approach to a hardcoded signature value. We never know when we may
>> need to randomize or change this in the future.
>>
>> Regarding abort target signature the vs x86 disassemblers, I used a
>> 5-byte no-op on x86 32/64:
>>
>>  x86-32: nopl 
>>  x86-64: nopl (%rip)
> 
> I still don't see how this can possibly work well with libraries.  If
> glibc or whatever issues the syscall and registers some signature,
> that signature *must* match the expectation of all libraries used in
> that thread or it's not going to work.

Here is how I envision this signature can eventually be randomized:

A librseq.so provided by glibc manages rseq thread registration. That
library could generate a random uint32_t value as signature for each
process within a constructor, as well as lazily upon first call to
signature query function (whichever comes first).

The constructors of every program/library using rseq would invoke
a signature getter function to query the random value, and iterate over
a section of pointers to signatures, and update those as part of the
constructors (temporarily mprotecting the pages as writeable).

Given that this would prevent page sharing across processes due to
CoW, I would not advise going for this randomized signature solution
unless necessary, but I think it's good to keep the door open to this
by keeping a uint32_t sig argument to sys_rseq.


> I can see two reasonable ways
> to handle it:
> 
> 1. The signature is just a well-known constant.  If you have an rseq
> abort landing site, you end up with something like:
> 
> nopl $11223344(%rip)
> landing_site:
> 
> or whatever the constant is.

If librseq.so passes a hardcoded constant to sys_rseq, then my solution
is very similar to this one, except that mine can allow randomized
signatures in the future for a kernel ABI perspective.


> 
> 2. The signature varies depending on the rseq_cs in use.  So you get:
> 
> static struct rseq_cs this_cs = {
>  .signature = 0x55667788;
>  ...
> };
> 
> and then the abort landing site has:
> 
> nopl $11223344(%rip)
> nopl $55667788(%rax)
> landing_site:

AFAIU, this solution defeats the purpose of having code signatures in the
in the first place. An attacker simply has to:

1) Craft a dummy struct rseq_cs on the stack, with:

struct rseq_cs {
  .signature = ,
  .start_ip = 0x0,
  .len = -1UL,
  .abort_ip = ,
}

2) Store the address of this dummy struct rseq_cs into __rseq_abi.rseq_cs.

3) Profit.

You should _never_ compare the signature in the code with an integer
value which can end up being controlled by the attacker.

Passing the signature to the system call upon registration leaves to the
kernel the job of keeping that signature around. An attacker would need
to first invoke sys_rseq to unregister the current __rseq_abi and re-register
with another signature in order to make this work. If an attacker has that
much access to control program execution and issue system calls at will,
then the game is already lost: they already control the execution flow,
so what's the point in trying to prevent branching to a specific address ?

> 
> The former is a bit easier to deal with.  The latter has the nice
> property that you can't subvert one rseq_cs to land somewhere else,
> but it's not clear to me how what actual attack this prevents, so I
> think I prefer #1.  I just think that your variant is asking for
> trouble down the road with incompatible userspace.

As described above, user-space can easily make the signature randomization
work by having all users patch code within constructors.

Thanks,

Mathieu


> 
> --Andy

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


Re: [RFC PATCH for 4.15 00/24] Restartable sequences and CPU op vector v11

2017-11-14 Thread Mathieu Desnoyers
- On Nov 14, 2017, at 11:12 PM, Andy Lutomirski l...@amacapital.net wrote:

>> On Nov 14, 2017, at 1:32 PM, Mathieu Desnoyers 
>> 
>> wrote:
>>
>> - On Nov 14, 2017, at 4:15 PM, Andy Lutomirski l...@amacapital.net wrote:
>>
>>
>> One thing I kept however that diverge from your recommendation is the
>> "sign" parameter to the rseq syscall. I prefer this flexible
>> approach to a hardcoded signature value. We never know when we may
>> need to randomize or change this in the future.
>>
>> Regarding abort target signature the vs x86 disassemblers, I used a
>> 5-byte no-op on x86 32/64:
>>
>>  x86-32: nopl 
>>  x86-64: nopl (%rip)
> 
> I still don't see how this can possibly work well with libraries.  If
> glibc or whatever issues the syscall and registers some signature,
> that signature *must* match the expectation of all libraries used in
> that thread or it's not going to work.

Here is how I envision this signature can eventually be randomized:

A librseq.so provided by glibc manages rseq thread registration. That
library could generate a random uint32_t value as signature for each
process within a constructor, as well as lazily upon first call to
signature query function (whichever comes first).

The constructors of every program/library using rseq would invoke
a signature getter function to query the random value, and iterate over
a section of pointers to signatures, and update those as part of the
constructors (temporarily mprotecting the pages as writeable).

Given that this would prevent page sharing across processes due to
CoW, I would not advise going for this randomized signature solution
unless necessary, but I think it's good to keep the door open to this
by keeping a uint32_t sig argument to sys_rseq.


> I can see two reasonable ways
> to handle it:
> 
> 1. The signature is just a well-known constant.  If you have an rseq
> abort landing site, you end up with something like:
> 
> nopl $11223344(%rip)
> landing_site:
> 
> or whatever the constant is.

If librseq.so passes a hardcoded constant to sys_rseq, then my solution
is very similar to this one, except that mine can allow randomized
signatures in the future for a kernel ABI perspective.


> 
> 2. The signature varies depending on the rseq_cs in use.  So you get:
> 
> static struct rseq_cs this_cs = {
>  .signature = 0x55667788;
>  ...
> };
> 
> and then the abort landing site has:
> 
> nopl $11223344(%rip)
> nopl $55667788(%rax)
> landing_site:

AFAIU, this solution defeats the purpose of having code signatures in the
in the first place. An attacker simply has to:

1) Craft a dummy struct rseq_cs on the stack, with:

struct rseq_cs {
  .signature = ,
  .start_ip = 0x0,
  .len = -1UL,
  .abort_ip = ,
}

2) Store the address of this dummy struct rseq_cs into __rseq_abi.rseq_cs.

3) Profit.

You should _never_ compare the signature in the code with an integer
value which can end up being controlled by the attacker.

Passing the signature to the system call upon registration leaves to the
kernel the job of keeping that signature around. An attacker would need
to first invoke sys_rseq to unregister the current __rseq_abi and re-register
with another signature in order to make this work. If an attacker has that
much access to control program execution and issue system calls at will,
then the game is already lost: they already control the execution flow,
so what's the point in trying to prevent branching to a specific address ?

> 
> The former is a bit easier to deal with.  The latter has the nice
> property that you can't subvert one rseq_cs to land somewhere else,
> but it's not clear to me how what actual attack this prevents, so I
> think I prefer #1.  I just think that your variant is asking for
> trouble down the road with incompatible userspace.

As described above, user-space can easily make the signature randomization
work by having all users patch code within constructors.

Thanks,

Mathieu


> 
> --Andy

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com


Re: [PATCH] samples: replace FSF address with web source in license notices

2017-11-14 Thread Greg KH
On Tue, Nov 14, 2017 at 10:50:37AM +0100, Martin Kepplinger wrote:
> A few years ago the FSF moved and "59 Temple Place" is wrong. Having this
> still in our source files feels old and unmaintained.
> 
> Let's take the license statement serious and not confuse users.
> 
> As https://www.gnu.org/licenses/gpl-howto.html suggests, we replace the
> postal address with "" in the samples
> directory.

What would be best is to just put the SPDX single line at the top of the
files, and then remove this license "boilerplate" entirely.  I've
started to do that with some subsystems already (drivers/usb/ and
drivers/tty/ are almost finished, see Linus's tree for details), and
I've sent out a patch series for drivers/s390/ yesterday if you want to
see an example of how to do it.

Could you do that here instead of this patch as well?

thanks,

greg k-h


Re: [PATCH] samples: replace FSF address with web source in license notices

2017-11-14 Thread Greg KH
On Tue, Nov 14, 2017 at 10:50:37AM +0100, Martin Kepplinger wrote:
> A few years ago the FSF moved and "59 Temple Place" is wrong. Having this
> still in our source files feels old and unmaintained.
> 
> Let's take the license statement serious and not confuse users.
> 
> As https://www.gnu.org/licenses/gpl-howto.html suggests, we replace the
> postal address with "" in the samples
> directory.

What would be best is to just put the SPDX single line at the top of the
files, and then remove this license "boilerplate" entirely.  I've
started to do that with some subsystems already (drivers/usb/ and
drivers/tty/ are almost finished, see Linus's tree for details), and
I've sent out a patch series for drivers/s390/ yesterday if you want to
see an example of how to do it.

Could you do that here instead of this patch as well?

thanks,

greg k-h


Re: [PATCH 1/2] mm,vmscan: Kill global shrinker lock.

2017-11-14 Thread Shakeel Butt
On Tue, Nov 14, 2017 at 4:56 PM, Minchan Kim  wrote:
> On Tue, Nov 14, 2017 at 06:37:42AM +0900, Tetsuo Handa wrote:
>> When shrinker_rwsem was introduced, it was assumed that
>> register_shrinker()/unregister_shrinker() are really unlikely paths
>> which are called during initialization and tear down. But nowadays,
>> register_shrinker()/unregister_shrinker() might be called regularly.
>> This patch prepares for allowing parallel registration/unregistration
>> of shrinkers.
>>
>> Since do_shrink_slab() can reschedule, we cannot protect shrinker_list
>> using one RCU section. But using atomic_inc()/atomic_dec() for each
>> do_shrink_slab() call will not impact so much.
>>
>> This patch uses polling loop with short sleep for unregister_shrinker()
>> rather than wait_on_atomic_t(), for we can save reader's cost (plain
>> atomic_dec() compared to atomic_dec_and_test()), we can expect that
>> do_shrink_slab() of unregistering shrinker likely returns shortly, and
>> we can avoid khungtaskd warnings when do_shrink_slab() of unregistering
>> shrinker unexpectedly took so long.
>>
>> Signed-off-by: Tetsuo Handa 
>
> Before reviewing this patch, can't we solve the problem with more
> simple way? Like this.
>
> Shakeel, What do you think?
>

Seems simple enough. I will run my test (running fork bomb in one
memcg and separately time a mount operation) and update if numbers
differ significantly.

> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 13d711dd8776..cbb624cb9baa 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -498,6 +498,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> sc.nid = 0;
>
> freed += do_shrink_slab(, shrinker, nr_scanned, 
> nr_eligible);
> +   /*
> +* bail out if someone want to register a new shrinker to 
> prevent
> +* long time stall by parallel ongoing shrinking.
> +*/
> +   if (rwsem_is_contended(_rwsem)) {
> +   freed = 1;

freed = freed ?: 1;

> +   break;
> +   }
> }
>
> up_read(_rwsem);


Re: [PATCH 1/2] mm,vmscan: Kill global shrinker lock.

2017-11-14 Thread Shakeel Butt
On Tue, Nov 14, 2017 at 4:56 PM, Minchan Kim  wrote:
> On Tue, Nov 14, 2017 at 06:37:42AM +0900, Tetsuo Handa wrote:
>> When shrinker_rwsem was introduced, it was assumed that
>> register_shrinker()/unregister_shrinker() are really unlikely paths
>> which are called during initialization and tear down. But nowadays,
>> register_shrinker()/unregister_shrinker() might be called regularly.
>> This patch prepares for allowing parallel registration/unregistration
>> of shrinkers.
>>
>> Since do_shrink_slab() can reschedule, we cannot protect shrinker_list
>> using one RCU section. But using atomic_inc()/atomic_dec() for each
>> do_shrink_slab() call will not impact so much.
>>
>> This patch uses polling loop with short sleep for unregister_shrinker()
>> rather than wait_on_atomic_t(), for we can save reader's cost (plain
>> atomic_dec() compared to atomic_dec_and_test()), we can expect that
>> do_shrink_slab() of unregistering shrinker likely returns shortly, and
>> we can avoid khungtaskd warnings when do_shrink_slab() of unregistering
>> shrinker unexpectedly took so long.
>>
>> Signed-off-by: Tetsuo Handa 
>
> Before reviewing this patch, can't we solve the problem with more
> simple way? Like this.
>
> Shakeel, What do you think?
>

Seems simple enough. I will run my test (running fork bomb in one
memcg and separately time a mount operation) and update if numbers
differ significantly.

> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 13d711dd8776..cbb624cb9baa 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -498,6 +498,14 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
> sc.nid = 0;
>
> freed += do_shrink_slab(, shrinker, nr_scanned, 
> nr_eligible);
> +   /*
> +* bail out if someone want to register a new shrinker to 
> prevent
> +* long time stall by parallel ongoing shrinking.
> +*/
> +   if (rwsem_is_contended(_rwsem)) {
> +   freed = 1;

freed = freed ?: 1;

> +   break;
> +   }
> }
>
> up_read(_rwsem);


Re: [PATCH] net: Convert net_mutex into rw_semaphore and down read it on net->init/->exit

2017-11-14 Thread Eric W. Biederman
Kirill Tkhai  writes:

> Curently mutex is used to protect pernet operations list. It makes
> cleanup_net() to execute ->exit methods of the same operations set,
> which was used on the time of ->init, even after net namespace is
> unlinked from net_namespace_list.
>
> But the problem is it's need to synchronize_rcu() after net is removed
> from net_namespace_list():
>
> Destroy net_ns:
> cleanup_net()
>   mutex_lock(_mutex)
>   list_del_rcu(>list)
>   synchronize_rcu()  <--- Sleep there for ages
>   list_for_each_entry_reverse(ops, _list, list)
> ops_exit_list(ops, _exit_list)
>   list_for_each_entry_reverse(ops, _list, list)
> ops_free_list(ops, _exit_list)
>   mutex_unlock(_mutex)
>
> This primitive is not fast, especially on the systems with many processors
> and/or when preemptible RCU is enabled in config. So, all the time, while
> cleanup_net() is waiting for RCU grace period, creation of new net namespaces
> is not possible, the tasks, who makes it, are sleeping on the same mutex:
>
> Create net_ns:
> copy_net_ns()
>   mutex_lock_killable(_mutex)<--- Sleep there for ages
>
> The solution is to convert net_mutex to the rw_semaphore. Then,
> pernet_operations::init/::exit methods, modifying the net-related data,
> will require down_read() locking only, while down_write() will be used
> for changing pernet_list.
>
> This gives signify performance increase, like you may see below. There
> is measured sequential net namespace creation in a cycle, in single
> thread, without other tasks (single user mode):
>
> 1)int main(int argc, char *argv[])
> {
> unsigned nr;
> if (argc < 2) {
> fprintf(stderr, "Provide nr iterations arg\n");
> return 1;
> }
> nr = atoi(argv[1]);
> while (nr-- > 0) {
> if (unshare(CLONE_NEWNET)) {
> perror("Can't unshare");
> return 1;
> }
> }
> return 0;
> }
>
> Origin, 10 unshare():
> 0.03user 23.14system 1:39.85elapsed 23%CPU
>
> Patched, 10 unshare():
> 0.03user 67.49system 1:08.34elapsed 98%CPU
>
> 2)for i in {1..1}; do unshare -n bash -c exit; done
>
> Origin:
> real 1m24,190s
> user 0m6,225s
> sys 0m15,132s
>
> Patched:
> real 0m18,235s   (4.6 times faster)
> user 0m4,544s
> sys 0m13,796s
>
> This patch requires commit 76f8507f7a64 "locking/rwsem: Add 
> down_read_killable()"
> from Linus tree (not in net-next yet).

Using a rwsem to protect the list of operations makes sense.

That should allow removing the sing

I am not wild about taking a the rwsem down_write in
rtnl_link_unregister, and net_ns_barrier.  I think that works but it
goes from being a mild hack to being a pretty bad hack and something
else that can kill the parallelism you are seeking it add.

There are about 204 instances of struct pernet_operations.  That is a
lot of code to have carefully audited to ensure it can in parallel all
at once.  The existence of the exit_batch method, net_ns_barrier,
for_each_net and taking of net_mutex in rtnl_link_unregister all testify
to the fact that there are data structures accessed by multiple network
namespaces.

My preference would be to:

- Add the net_sem in addition to net_mutex with down_write only held in
  register and unregister, and maybe net_ns_barrier and
  rtnl_link_unregister.

- Factor out struct pernet_ops out of struct pernet_operations.  With
  struct pernet_ops not having the exit_batch method.  With pernet_ops
  being embedded an anonymous member of the old struct pernet_operations.

- Add [un]register_pernet_{sys,dev} functions that take a struct
  pernet_ops, that don't take net_mutex.  Have them order the
  pernet_list as:

  pernet_sys
  pernet_subsys
  pernet_device
  pernet_dev

  With the chunk in the middle taking the net_mutex.

  I think I would enforce the ordering with a failure to register
  if a subsystem or a device tried to register out of order.  

- Disable use of the single threaded workqueue if nothing needs the
  net_mutex.

- Add a test mode that deliberartely spawns threads on multiple
  processors and deliberately creates multiple network namespaces
  at the same time.

- Add a test mode that deliberately spawns threads on multiple
  processors and delibearate destrosy multiple network namespaces
  at the same time.
  
- Convert the code to unlocked operation one pernet_operations to at a
  time.  Being careful with the loopback device it's order in the list
  strongly matters.

- Finally remove the unnecessary code.


At the end of the day because all of the operations for one network
namespace will run in parallel with all of the operations for another
network namespace all of the sophistication that goes into batching the
cleanup of multiple network namespaces can be removed.  As different
tasks (not sharing a lock) can wait in syncrhonize_rcu at the same time
without 

Re: [PATCH] net: Convert net_mutex into rw_semaphore and down read it on net->init/->exit

2017-11-14 Thread Eric W. Biederman
Kirill Tkhai  writes:

> Curently mutex is used to protect pernet operations list. It makes
> cleanup_net() to execute ->exit methods of the same operations set,
> which was used on the time of ->init, even after net namespace is
> unlinked from net_namespace_list.
>
> But the problem is it's need to synchronize_rcu() after net is removed
> from net_namespace_list():
>
> Destroy net_ns:
> cleanup_net()
>   mutex_lock(_mutex)
>   list_del_rcu(>list)
>   synchronize_rcu()  <--- Sleep there for ages
>   list_for_each_entry_reverse(ops, _list, list)
> ops_exit_list(ops, _exit_list)
>   list_for_each_entry_reverse(ops, _list, list)
> ops_free_list(ops, _exit_list)
>   mutex_unlock(_mutex)
>
> This primitive is not fast, especially on the systems with many processors
> and/or when preemptible RCU is enabled in config. So, all the time, while
> cleanup_net() is waiting for RCU grace period, creation of new net namespaces
> is not possible, the tasks, who makes it, are sleeping on the same mutex:
>
> Create net_ns:
> copy_net_ns()
>   mutex_lock_killable(_mutex)<--- Sleep there for ages
>
> The solution is to convert net_mutex to the rw_semaphore. Then,
> pernet_operations::init/::exit methods, modifying the net-related data,
> will require down_read() locking only, while down_write() will be used
> for changing pernet_list.
>
> This gives signify performance increase, like you may see below. There
> is measured sequential net namespace creation in a cycle, in single
> thread, without other tasks (single user mode):
>
> 1)int main(int argc, char *argv[])
> {
> unsigned nr;
> if (argc < 2) {
> fprintf(stderr, "Provide nr iterations arg\n");
> return 1;
> }
> nr = atoi(argv[1]);
> while (nr-- > 0) {
> if (unshare(CLONE_NEWNET)) {
> perror("Can't unshare");
> return 1;
> }
> }
> return 0;
> }
>
> Origin, 10 unshare():
> 0.03user 23.14system 1:39.85elapsed 23%CPU
>
> Patched, 10 unshare():
> 0.03user 67.49system 1:08.34elapsed 98%CPU
>
> 2)for i in {1..1}; do unshare -n bash -c exit; done
>
> Origin:
> real 1m24,190s
> user 0m6,225s
> sys 0m15,132s
>
> Patched:
> real 0m18,235s   (4.6 times faster)
> user 0m4,544s
> sys 0m13,796s
>
> This patch requires commit 76f8507f7a64 "locking/rwsem: Add 
> down_read_killable()"
> from Linus tree (not in net-next yet).

Using a rwsem to protect the list of operations makes sense.

That should allow removing the sing

I am not wild about taking a the rwsem down_write in
rtnl_link_unregister, and net_ns_barrier.  I think that works but it
goes from being a mild hack to being a pretty bad hack and something
else that can kill the parallelism you are seeking it add.

There are about 204 instances of struct pernet_operations.  That is a
lot of code to have carefully audited to ensure it can in parallel all
at once.  The existence of the exit_batch method, net_ns_barrier,
for_each_net and taking of net_mutex in rtnl_link_unregister all testify
to the fact that there are data structures accessed by multiple network
namespaces.

My preference would be to:

- Add the net_sem in addition to net_mutex with down_write only held in
  register and unregister, and maybe net_ns_barrier and
  rtnl_link_unregister.

- Factor out struct pernet_ops out of struct pernet_operations.  With
  struct pernet_ops not having the exit_batch method.  With pernet_ops
  being embedded an anonymous member of the old struct pernet_operations.

- Add [un]register_pernet_{sys,dev} functions that take a struct
  pernet_ops, that don't take net_mutex.  Have them order the
  pernet_list as:

  pernet_sys
  pernet_subsys
  pernet_device
  pernet_dev

  With the chunk in the middle taking the net_mutex.

  I think I would enforce the ordering with a failure to register
  if a subsystem or a device tried to register out of order.  

- Disable use of the single threaded workqueue if nothing needs the
  net_mutex.

- Add a test mode that deliberartely spawns threads on multiple
  processors and deliberately creates multiple network namespaces
  at the same time.

- Add a test mode that deliberately spawns threads on multiple
  processors and delibearate destrosy multiple network namespaces
  at the same time.
  
- Convert the code to unlocked operation one pernet_operations to at a
  time.  Being careful with the loopback device it's order in the list
  strongly matters.

- Finally remove the unnecessary code.


At the end of the day because all of the operations for one network
namespace will run in parallel with all of the operations for another
network namespace all of the sophistication that goes into batching the
cleanup of multiple network namespaces can be removed.  As different
tasks (not sharing a lock) can wait in syncrhonize_rcu at the same time
without slowing each other 

[PATCH] usb: dwc3: Enable the USB snooping

2017-11-14 Thread Ran Wang
Add support for USB3 snooping by asserting bits
in register DWC3_GSBUSCFG0 for data and descriptor.

Signed-off-by: Changming Huang 
Signed-off-by: Rajesh Bhagat 
Signed-off-by: Ran Wang 
---
 drivers/usb/dwc3/core.c | 24 
 drivers/usb/dwc3/core.h | 10 ++
 2 files changed, 34 insertions(+)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 07832509584f..ffc078ab4a1c 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -236,6 +236,26 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
return -ETIMEDOUT;
 }
 
+/*
+ * dwc3_enable_snooping - Enable snooping feature
+ * @dwc3: Pointer to our controller context structure
+ */
+static void dwc3_enable_snooping(struct dwc3 *dwc)
+{
+   u32 cfg;
+
+   cfg = dwc3_readl(dwc->regs, DWC3_GSBUSCFG0);
+   if (dwc->dma_coherent) {
+   cfg &= ~DWC3_GSBUSCFG0_SNP_MASK;
+   cfg |= (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DATARD_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DESCRD_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DATAWR_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DESCWR_SHIFT);
+   }
+
+   dwc3_writel(dwc->regs, DWC3_GSBUSCFG0, cfg);
+}
+
 /*
  * dwc3_frame_length_adjustment - Adjusts frame length if required
  * @dwc3: Pointer to our controller context structure
@@ -776,6 +796,8 @@ static int dwc3_core_init(struct dwc3 *dwc)
/* Adjust Frame Length */
dwc3_frame_length_adjustment(dwc);
 
+   dwc3_enable_snooping(dwc);
+
usb_phy_set_suspend(dwc->usb2_phy, 0);
usb_phy_set_suspend(dwc->usb3_phy, 0);
ret = phy_power_on(dwc->usb2_generic_phy);
@@ -1021,6 +1043,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
_threshold);
dwc->usb3_lpm_capable = device_property_read_bool(dev,
"snps,usb3_lpm_capable");
+   dwc->dma_coherent = device_property_read_bool(dev,
+   "dma-coherent");
 
dwc->disable_scramble_quirk = device_property_read_bool(dev,
"snps,disable_scramble_quirk");
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 4a4a4c98508c..6e6a66650e53 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -153,6 +153,14 @@
 
 /* Bit fields */
 
+/* Global SoC Bus Configuration Register 0 */
+#define AXI3_CACHE_TYPE_SNP0x2 /* cacheable */
+#define DWC3_GSBUSCFG0_DATARD_SHIFT28
+#define DWC3_GSBUSCFG0_DESCRD_SHIFT24
+#define DWC3_GSBUSCFG0_DATAWR_SHIFT20
+#define DWC3_GSBUSCFG0_DESCWR_SHIFT16
+#define DWC3_GSBUSCFG0_SNP_MASK0x
+
 /* Global Debug Queue/FIFO Space Available Register */
 #define DWC3_GDBGFIFOSPACE_NUM(n)  ((n) & 0x1f)
 #define DWC3_GDBGFIFOSPACE_TYPE(n) (((n) << 5) & 0x1e0)
@@ -859,6 +867,7 @@ struct dwc3_scratchpad_array {
  * 3   - Reserved
  * @imod_interval: set the interrupt moderation interval in 250ns
  * increments or 0 to disable.
+ * @dma_coherent: set if enable dma-coherent.
  */
 struct dwc3 {
struct work_struct  drd_work;
@@ -990,6 +999,7 @@ struct dwc3 {
unsignedsetup_packet_pending:1;
unsignedthree_stage_setup:1;
unsignedusb3_lpm_capable:1;
+   unsigneddma_coherent:1;
 
unsigneddisable_scramble_quirk:1;
unsignedu2exit_lfps_quirk:1;
-- 
2.14.1



[PATCH] usb: dwc3: Enable the USB snooping

2017-11-14 Thread Ran Wang
Add support for USB3 snooping by asserting bits
in register DWC3_GSBUSCFG0 for data and descriptor.

Signed-off-by: Changming Huang 
Signed-off-by: Rajesh Bhagat 
Signed-off-by: Ran Wang 
---
 drivers/usb/dwc3/core.c | 24 
 drivers/usb/dwc3/core.h | 10 ++
 2 files changed, 34 insertions(+)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 07832509584f..ffc078ab4a1c 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -236,6 +236,26 @@ static int dwc3_core_soft_reset(struct dwc3 *dwc)
return -ETIMEDOUT;
 }
 
+/*
+ * dwc3_enable_snooping - Enable snooping feature
+ * @dwc3: Pointer to our controller context structure
+ */
+static void dwc3_enable_snooping(struct dwc3 *dwc)
+{
+   u32 cfg;
+
+   cfg = dwc3_readl(dwc->regs, DWC3_GSBUSCFG0);
+   if (dwc->dma_coherent) {
+   cfg &= ~DWC3_GSBUSCFG0_SNP_MASK;
+   cfg |= (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DATARD_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DESCRD_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DATAWR_SHIFT) |
+   (AXI3_CACHE_TYPE_SNP << DWC3_GSBUSCFG0_DESCWR_SHIFT);
+   }
+
+   dwc3_writel(dwc->regs, DWC3_GSBUSCFG0, cfg);
+}
+
 /*
  * dwc3_frame_length_adjustment - Adjusts frame length if required
  * @dwc3: Pointer to our controller context structure
@@ -776,6 +796,8 @@ static int dwc3_core_init(struct dwc3 *dwc)
/* Adjust Frame Length */
dwc3_frame_length_adjustment(dwc);
 
+   dwc3_enable_snooping(dwc);
+
usb_phy_set_suspend(dwc->usb2_phy, 0);
usb_phy_set_suspend(dwc->usb3_phy, 0);
ret = phy_power_on(dwc->usb2_generic_phy);
@@ -1021,6 +1043,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
_threshold);
dwc->usb3_lpm_capable = device_property_read_bool(dev,
"snps,usb3_lpm_capable");
+   dwc->dma_coherent = device_property_read_bool(dev,
+   "dma-coherent");
 
dwc->disable_scramble_quirk = device_property_read_bool(dev,
"snps,disable_scramble_quirk");
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 4a4a4c98508c..6e6a66650e53 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -153,6 +153,14 @@
 
 /* Bit fields */
 
+/* Global SoC Bus Configuration Register 0 */
+#define AXI3_CACHE_TYPE_SNP0x2 /* cacheable */
+#define DWC3_GSBUSCFG0_DATARD_SHIFT28
+#define DWC3_GSBUSCFG0_DESCRD_SHIFT24
+#define DWC3_GSBUSCFG0_DATAWR_SHIFT20
+#define DWC3_GSBUSCFG0_DESCWR_SHIFT16
+#define DWC3_GSBUSCFG0_SNP_MASK0x
+
 /* Global Debug Queue/FIFO Space Available Register */
 #define DWC3_GDBGFIFOSPACE_NUM(n)  ((n) & 0x1f)
 #define DWC3_GDBGFIFOSPACE_TYPE(n) (((n) << 5) & 0x1e0)
@@ -859,6 +867,7 @@ struct dwc3_scratchpad_array {
  * 3   - Reserved
  * @imod_interval: set the interrupt moderation interval in 250ns
  * increments or 0 to disable.
+ * @dma_coherent: set if enable dma-coherent.
  */
 struct dwc3 {
struct work_struct  drd_work;
@@ -990,6 +999,7 @@ struct dwc3 {
unsignedsetup_packet_pending:1;
unsignedthree_stage_setup:1;
unsignedusb3_lpm_capable:1;
+   unsigneddma_coherent:1;
 
unsigneddisable_scramble_quirk:1;
unsignedu2exit_lfps_quirk:1;
-- 
2.14.1



Re: Coccinelle: badzero.cocci failure

2017-11-14 Thread Julia Lawall


On Tue, 14 Nov 2017, Masahiro Yamada wrote:

> Hi Julia,
>
>
> 2017-11-14 18:07 GMT+09:00 Julia Lawall :
> >> coccicheck failed
> >> $ cat cocci-debug.txt
> >> /home/masahiro/bin/spatch -D report --no-show-diff --very-quiet
> >> --cocci-file scripts/coccinelle/null/badzero.cocci --dir . -I
> >> ./arch/x86/include -I ./arch/x86/include/generated -I ./include -I
> >> ./arch/x86/include/uapi -I ./arch/x86/include/generated/uapi -I
> >> ./include/uapi -I ./include/generated/uapi --include
> >> ./include/linux/kconfig.h --jobs 8 --chunksize 1
> >> Fatal error: exception
> >> Yes_prepare_ocamlcocci.LinkFailure("/tmp/ocaml_cocci_18c9f9.cmxs")
> >
> > Does your Coccinelle support OCaml?  I'm not sure what is the proper way to
> > check for this, but in my coccinelle/config.log file I have
> >
> > FEATURE_OCAML='1'
>
>
> Yes.  I also see this line in my config.log
>
>
> > spatch --version gives:
> >
> > spatch version 1.0.6-00147-g19f9421 compiled with OCaml version 4.02.3
> > Flags passed to the configure script: [none]
> > Python scripting support: yes
> > Syntax of regular expresssions: Str
>
> My version output looks like follows:
>
> $ spatch --version
> spatch version 1.0.6-00345-g2ca0bef compiled with OCaml version 4.02.3
> Flags passed to the configure script: --prefix=/home/masahiro
> Python scripting support: yes
> Syntax of regular expresssions: PCRE
>
>
> > I'm not sure why it doesn't give feedback on whether OCaml scripting is
> > supported.  I will check on this.

Can you try the following semantic patch (called eg nothing.cocci):

@script:ocaml@
@@

()

on any .c file, ie

spatch --sp-file nothing.cocci test.c

julia


Re: Coccinelle: badzero.cocci failure

2017-11-14 Thread Julia Lawall


On Tue, 14 Nov 2017, Masahiro Yamada wrote:

> Hi Julia,
>
>
> 2017-11-14 18:07 GMT+09:00 Julia Lawall :
> >> coccicheck failed
> >> $ cat cocci-debug.txt
> >> /home/masahiro/bin/spatch -D report --no-show-diff --very-quiet
> >> --cocci-file scripts/coccinelle/null/badzero.cocci --dir . -I
> >> ./arch/x86/include -I ./arch/x86/include/generated -I ./include -I
> >> ./arch/x86/include/uapi -I ./arch/x86/include/generated/uapi -I
> >> ./include/uapi -I ./include/generated/uapi --include
> >> ./include/linux/kconfig.h --jobs 8 --chunksize 1
> >> Fatal error: exception
> >> Yes_prepare_ocamlcocci.LinkFailure("/tmp/ocaml_cocci_18c9f9.cmxs")
> >
> > Does your Coccinelle support OCaml?  I'm not sure what is the proper way to
> > check for this, but in my coccinelle/config.log file I have
> >
> > FEATURE_OCAML='1'
>
>
> Yes.  I also see this line in my config.log
>
>
> > spatch --version gives:
> >
> > spatch version 1.0.6-00147-g19f9421 compiled with OCaml version 4.02.3
> > Flags passed to the configure script: [none]
> > Python scripting support: yes
> > Syntax of regular expresssions: Str
>
> My version output looks like follows:
>
> $ spatch --version
> spatch version 1.0.6-00345-g2ca0bef compiled with OCaml version 4.02.3
> Flags passed to the configure script: --prefix=/home/masahiro
> Python scripting support: yes
> Syntax of regular expresssions: PCRE
>
>
> > I'm not sure why it doesn't give feedback on whether OCaml scripting is
> > supported.  I will check on this.

Can you try the following semantic patch (called eg nothing.cocci):

@script:ocaml@
@@

()

on any .c file, ie

spatch --sp-file nothing.cocci test.c

julia


Re: 4.14 kernel and acpi INT3400:00: Unsupported event [0x86]

2017-11-14 Thread Zhang Rui
Hi, Brian,

thanks for your quick fix, as it is in merge window right now, I will
queue it for for next -rc2.

thanks,
rui

On Tue, 2017-11-14 at 10:50 -0700, Brian Bian wrote:
> I have submitted a patch to suppress such messages. The INT3400
> driver
> currently handles 0x83 thermal-relationship-table-change event
> only, and all other ACPI notification codes are unknown/irrelevant
> to the INT3400 driver.
> 
> Thanks,
> -Brian
> 
> On Mon, 13 Nov 2017, Arkadiusz Miskiewicz wrote:
> 
> > 
> > On Monday 13 of November 2017, Zhang Rui wrote:
> > > 
> > > On Sun, 2017-11-12 at 23:25 +0100, Arkadiusz Miskiewicz wrote:
> > > > 
> > > > Hello.
> > > > 
> > > > On Dell XPS 9530 and 4.14 kernel dmesg is flooded with:
> > > > 
> > > > [  292.580807] acpi INT3400:00: Unsupported event [0x86]
> > > > [  299.284648] acpi INT3400:00: Unsupported event [0x86]
> > > > [  305.648079] acpi INT3400:00: Unsupported event [0x86]
> > > > [  315.444799] acpi INT3400:00: Unsupported event [0x86]
> > > > [  317.432412] acpi INT3400:00: Unsupported event [0x86]
> > > > [  319.420239] acpi INT3400:00: Unsupported event [0x86]
> > > > [  321.408476] acpi INT3400:00: Unsupported event [0x86]
> > > > [  323.400304] acpi INT3400:00: Unsupported event [0x86]
> > > > [  325.388358] acpi INT3400:00: Unsupported event [0x86]
> > > > 
> > > > What 0x86 might mean?
> > > please attach the acpidump output.
> > Attached.
> > 
> > > 
> > > 
> > > thanks,
> > > rui
> > 
> > 
> > -- 
> > Arkadiusz Miśkiewicz, arekm / ( maven.pl | pld-linux.org )


Re: 4.14 kernel and acpi INT3400:00: Unsupported event [0x86]

2017-11-14 Thread Zhang Rui
Hi, Brian,

thanks for your quick fix, as it is in merge window right now, I will
queue it for for next -rc2.

thanks,
rui

On Tue, 2017-11-14 at 10:50 -0700, Brian Bian wrote:
> I have submitted a patch to suppress such messages. The INT3400
> driver
> currently handles 0x83 thermal-relationship-table-change event
> only, and all other ACPI notification codes are unknown/irrelevant
> to the INT3400 driver.
> 
> Thanks,
> -Brian
> 
> On Mon, 13 Nov 2017, Arkadiusz Miskiewicz wrote:
> 
> > 
> > On Monday 13 of November 2017, Zhang Rui wrote:
> > > 
> > > On Sun, 2017-11-12 at 23:25 +0100, Arkadiusz Miskiewicz wrote:
> > > > 
> > > > Hello.
> > > > 
> > > > On Dell XPS 9530 and 4.14 kernel dmesg is flooded with:
> > > > 
> > > > [  292.580807] acpi INT3400:00: Unsupported event [0x86]
> > > > [  299.284648] acpi INT3400:00: Unsupported event [0x86]
> > > > [  305.648079] acpi INT3400:00: Unsupported event [0x86]
> > > > [  315.444799] acpi INT3400:00: Unsupported event [0x86]
> > > > [  317.432412] acpi INT3400:00: Unsupported event [0x86]
> > > > [  319.420239] acpi INT3400:00: Unsupported event [0x86]
> > > > [  321.408476] acpi INT3400:00: Unsupported event [0x86]
> > > > [  323.400304] acpi INT3400:00: Unsupported event [0x86]
> > > > [  325.388358] acpi INT3400:00: Unsupported event [0x86]
> > > > 
> > > > What 0x86 might mean?
> > > please attach the acpidump output.
> > Attached.
> > 
> > > 
> > > 
> > > thanks,
> > > rui
> > 
> > 
> > -- 
> > Arkadiusz Miśkiewicz, arekm / ( maven.pl | pld-linux.org )


Re: rpmsg: qcom_glink_native: no module license, taints kernel

2017-11-14 Thread Bjorn Andersson
On Sun 12 Nov 09:17 PST 2017, Randy Dunlap wrote:

> [44098.635339] qcom_glink_native: module license 'unspecified' taints kernel.

Thanks for reporting this.

Regards,
Bjorn


Re: rpmsg: qcom_glink_native: no module license, taints kernel

2017-11-14 Thread Bjorn Andersson
On Sun 12 Nov 09:17 PST 2017, Randy Dunlap wrote:

> [44098.635339] qcom_glink_native: module license 'unspecified' taints kernel.

Thanks for reporting this.

Regards,
Bjorn


Re: [PATCH] lost path_put in perf_fill_ns_link_info

2017-11-14 Thread Vasily Averin
On 2017-11-08 16:04, Vasily Averin wrote:
> On 2017-11-08 15:09, Alexander Shishkin wrote:
>> On Mon, Nov 06, 2017 at 09:22:18AM +0300, Vasily Averin wrote:
>>> Fixes: commit e422267322cd ("perf: Add PERF_RECORD_NAMESPACES to include 
>>> namespaces related info")
>>> Signed-off-by: Vasily Averin 
>>
>> The change description is missing. One needs to open the source code and
>> look for proof of correctness for this patch.
> 
> perf_fill_ns_link_info() calls ns_get_path()
> it returns ns_path with increased mnt and dentry counters.
> 
> Problem is that nodody decrement these counters.
> 
> You can call ./perf record --namespaces unshare -m
> and look how grows mount counter on nsfs_mnt. 

Situation is even worse, leaked dentry does not allow to free related 
namespaces.

[root@localhost ~]# uname -a
Linux localhost.localdomain 4.14.0+ #10 SMP Wed Nov 15 00:31:34 MSK 2017 x86_64 
x86_64 x86_64 GNU/Linux

VvS:  without --namespace perf works correctly

[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  0  0   2568   128 : tunables000 : 
slabdata  0  0  0
user_namespace 0  0824   398 : tunables000 : 
slabdata  0  0  0
net_namespace  0  5   627258 : tunables000 : 
slabdata  1  1  0
[root@localhost ~]# perf record  -q  unshare -n -U  -p --fork true 
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  0 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 0 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  0  5   627258 : tunables000 : 
slabdata  1  1  0

VvS: with --namespace perf leaks namespaces

[root@localhost ~]# perf record  -q  --namespace unshare -n -U  -p --fork true
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  1 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 1 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  1  5   627258 : tunables000 : 
slabdata  1  1  0

VvS: ... and once again, to be sure

[root@localhost ~]# perf record  -q  --namespace unshare -n -U  -p --fork true
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  2 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 2 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  2  5   627258 : tunables000 : 
slabdata  1  1  0


kmemleak also detects leaks dentry, inode, related structures and namespaces

unreferenced object 0x998008e76738 (size 192): <<< dentry
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] __ns_get_path+0xf5/0x160
[] ns_get_path+0x28/0x50
[] perf_fill_ns_link_info+0x20/0x80
[] perf_event_namespaces.part.101+0xd7/0x120
[] copy_process.part.34+0x171d/0x1ae0
[] _do_fork+0xcc/0x390
[] do_syscall_64+0x61/0x170
[] return_from_SYSCALL_64+0x0/0x65
[] 0x

unreferenced object 0x998008e7c928 (size 600): <<< inode
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] new_inode_pseudo+0xe/0x60
[] __ns_get_path+0x42/0x160
[] ns_get_path+0x28/0x50
 ...

unreferenced object 0x99800cacd320 (size 40): <<< inode_security_struct
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] security_inode_alloc+0x36/0x50
[] inode_init_always+0xf5/0x1d0
[] alloc_inode+0x2b/0x80
[] new_inode_pseudo+0xe/0x60
[] __ns_get_path+0x42/0x160
 ...

unreferenced object 0x99800cb88a10 (size 2232): <<< pid_namespace
  comm "unshare", pid 1436, jiffies 4294786439 (age 509.214s)
[] create_new_namespaces+0xd4/0x1b0
[] unshare_nsproxy_namespaces+0x59/0xb0
[] SyS_unshare+0x1e5/0x370
[] entry_SYSCALL_64_fastpath+0x1a/0x7d
[] 0x

I've resend the patch as  "[PATCH] memory leaks triggered by perf --namespace"

Thank you,
Vasily Averin


Re: [PATCH] lost path_put in perf_fill_ns_link_info

2017-11-14 Thread Vasily Averin
On 2017-11-08 16:04, Vasily Averin wrote:
> On 2017-11-08 15:09, Alexander Shishkin wrote:
>> On Mon, Nov 06, 2017 at 09:22:18AM +0300, Vasily Averin wrote:
>>> Fixes: commit e422267322cd ("perf: Add PERF_RECORD_NAMESPACES to include 
>>> namespaces related info")
>>> Signed-off-by: Vasily Averin 
>>
>> The change description is missing. One needs to open the source code and
>> look for proof of correctness for this patch.
> 
> perf_fill_ns_link_info() calls ns_get_path()
> it returns ns_path with increased mnt and dentry counters.
> 
> Problem is that nodody decrement these counters.
> 
> You can call ./perf record --namespaces unshare -m
> and look how grows mount counter on nsfs_mnt. 

Situation is even worse, leaked dentry does not allow to free related 
namespaces.

[root@localhost ~]# uname -a
Linux localhost.localdomain 4.14.0+ #10 SMP Wed Nov 15 00:31:34 MSK 2017 x86_64 
x86_64 x86_64 GNU/Linux

VvS:  without --namespace perf works correctly

[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  0  0   2568   128 : tunables000 : 
slabdata  0  0  0
user_namespace 0  0824   398 : tunables000 : 
slabdata  0  0  0
net_namespace  0  5   627258 : tunables000 : 
slabdata  1  1  0
[root@localhost ~]# perf record  -q  unshare -n -U  -p --fork true 
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  0 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 0 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  0  5   627258 : tunables000 : 
slabdata  1  1  0

VvS: with --namespace perf leaks namespaces

[root@localhost ~]# perf record  -q  --namespace unshare -n -U  -p --fork true
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  1 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 1 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  1  5   627258 : tunables000 : 
slabdata  1  1  0

VvS: ... and once again, to be sure

[root@localhost ~]# perf record  -q  --namespace unshare -n -U  -p --fork true
[root@localhost ~]# grep namespace /proc/slabinfo
pid_namespace  2 12   2568   128 : tunables000 : 
slabdata  1  1  0
user_namespace 2 39824   398 : tunables000 : 
slabdata  1  1  0
net_namespace  2  5   627258 : tunables000 : 
slabdata  1  1  0


kmemleak also detects leaks dentry, inode, related structures and namespaces

unreferenced object 0x998008e76738 (size 192): <<< dentry
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] __ns_get_path+0xf5/0x160
[] ns_get_path+0x28/0x50
[] perf_fill_ns_link_info+0x20/0x80
[] perf_event_namespaces.part.101+0xd7/0x120
[] copy_process.part.34+0x171d/0x1ae0
[] _do_fork+0xcc/0x390
[] do_syscall_64+0x61/0x170
[] return_from_SYSCALL_64+0x0/0x65
[] 0x

unreferenced object 0x998008e7c928 (size 600): <<< inode
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] new_inode_pseudo+0xe/0x60
[] __ns_get_path+0x42/0x160
[] ns_get_path+0x28/0x50
 ...

unreferenced object 0x99800cacd320 (size 40): <<< inode_security_struct
  comm "unshare", pid 1436, jiffies 4294786539 (age 509.114s)
[] security_inode_alloc+0x36/0x50
[] inode_init_always+0xf5/0x1d0
[] alloc_inode+0x2b/0x80
[] new_inode_pseudo+0xe/0x60
[] __ns_get_path+0x42/0x160
 ...

unreferenced object 0x99800cb88a10 (size 2232): <<< pid_namespace
  comm "unshare", pid 1436, jiffies 4294786439 (age 509.214s)
[] create_new_namespaces+0xd4/0x1b0
[] unshare_nsproxy_namespaces+0x59/0xb0
[] SyS_unshare+0x1e5/0x370
[] entry_SYSCALL_64_fastpath+0x1a/0x7d
[] 0x

I've resend the patch as  "[PATCH] memory leaks triggered by perf --namespace"

Thank you,
Vasily Averin


Re: [PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-11-14 Thread Baoquan He
Hi Dave,

Thanks for your effort to push this into upstream. While I have one
concern, please see the inline comments.

On 10/24/17 at 01:31pm, Dave Young wrote:
> Now crashkernel=X will fail if there's not enough memory at low region
> (below 896M) when trying to reserve large memory size.  One can use
> crashkernel=xM,high to reserve it at high region (>4G) but it is more
> convinient to improve crashkernel=X to: 
> 
>  - First try to reserve X below 896M (for being compatible with old
>kexec-tools).
>  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
>  - If fails, try to reserve X from MAXMEM top down.
> 
> It's more transparent and user-friendly.
> 
> If crashkernel is large and the reserved is beyond 896M, old kexec-tools
> is not compatible with new kernel because old kexec-tools can not load
> kernel at high memory region, there was an old discussion below:
> https://lkml.org/lkml/2013/10/15/601
> 
> But actually the behavior is consistent during my test. Suppose
> old kernel fail to reserve memory at low areas, kdump does not
> work because no meory reserved. With this patch, suppose new kernel
> successfully reserved memory at high areas, old kexec-tools still fail
> to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
> worry about the compatibility.
> 
> Here is the test result (kexec-tools 2.0.2, no high memory load
> support):
> Crashkernel over 4G:
> # cat /proc/iomem|grep Crash
>   be00-cdff : Crash kernel
>   21300-21eff : Crash kernel
> # ./kexec  -p /boot/vmlinuz-`uname -r`
> Memory for crashkernel is not reserved
> Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
> Then try loading kdump kernel
> 
> crashkernel: 896M-4G:
> # cat /proc/iomem|grep Crash
>   9600-cdef : Crash kernel
> # ./kexec -p /boot/vmlinuz-4.14.0-rc4+
> ELF core (kcore) parse failed
> Cannot load /boot/vmlinuz-4.14.0-rc4+
> 
> Signed-off-by: Dave Young 
> ---
>  arch/x86/kernel/setup.c |   16 
>  1 file changed, 16 insertions(+)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
>   high ? CRASH_ADDR_HIGH_MAX
>: CRASH_ADDR_LOW_MAX,
>   crash_size, CRASH_ALIGN);
> +#ifdef CONFIG_X86_64
> + /*
> +  * crashkernel=X reserve below 896M fails? Try below 4G
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + (1ULL << 32),
> + crash_size, CRASH_ALIGN);
> + /*
> +  * crashkernel=X reserve below 4G fails? Try MAXMEM
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_HIGH_MAX,
> + crash_size, CRASH_ALIGN);

For kdump, most of systems are x86 64. If both Yinghai and Vivek have no
objection to search an available region of crash_size above 896M
naturely, why don't we search it with function
__memblock_find_range_bottom_up(). It can search from below 896M to
above 4G, almost the same as the change you have made currently. Mainly
the code will be much simpler.

The several times of searching looks not good and a little confusing.

What do you think?

Thanks
Baoquan

> +#endif
>   if (!crash_base) {
>   pr_info("crashkernel reservation failed - No suitable 
> area found.\n");
>   return;
> 
> 


Re: [PATCH 2/3] X86/kdump: crashkernel=X try to reserve below 896M first then below 4G and MAXMEM

2017-11-14 Thread Baoquan He
Hi Dave,

Thanks for your effort to push this into upstream. While I have one
concern, please see the inline comments.

On 10/24/17 at 01:31pm, Dave Young wrote:
> Now crashkernel=X will fail if there's not enough memory at low region
> (below 896M) when trying to reserve large memory size.  One can use
> crashkernel=xM,high to reserve it at high region (>4G) but it is more
> convinient to improve crashkernel=X to: 
> 
>  - First try to reserve X below 896M (for being compatible with old
>kexec-tools).
>  - If fails, try to reserve X below 4G (swiotlb need to stay below 4G).
>  - If fails, try to reserve X from MAXMEM top down.
> 
> It's more transparent and user-friendly.
> 
> If crashkernel is large and the reserved is beyond 896M, old kexec-tools
> is not compatible with new kernel because old kexec-tools can not load
> kernel at high memory region, there was an old discussion below:
> https://lkml.org/lkml/2013/10/15/601
> 
> But actually the behavior is consistent during my test. Suppose
> old kernel fail to reserve memory at low areas, kdump does not
> work because no meory reserved. With this patch, suppose new kernel
> successfully reserved memory at high areas, old kexec-tools still fail
> to load kdump kernel (tested 2.0.2), so it is acceptable, no need to
> worry about the compatibility.
> 
> Here is the test result (kexec-tools 2.0.2, no high memory load
> support):
> Crashkernel over 4G:
> # cat /proc/iomem|grep Crash
>   be00-cdff : Crash kernel
>   21300-21eff : Crash kernel
> # ./kexec  -p /boot/vmlinuz-`uname -r`
> Memory for crashkernel is not reserved
> Please reserve memory by passing "crashkernel=X@Y" parameter to the kernel
> Then try loading kdump kernel
> 
> crashkernel: 896M-4G:
> # cat /proc/iomem|grep Crash
>   9600-cdef : Crash kernel
> # ./kexec -p /boot/vmlinuz-4.14.0-rc4+
> ELF core (kcore) parse failed
> Cannot load /boot/vmlinuz-4.14.0-rc4+
> 
> Signed-off-by: Dave Young 
> ---
>  arch/x86/kernel/setup.c |   16 
>  1 file changed, 16 insertions(+)
> 
> --- linux-x86.orig/arch/x86/kernel/setup.c
> +++ linux-x86/arch/x86/kernel/setup.c
> @@ -568,6 +568,22 @@ static void __init reserve_crashkernel(v
>   high ? CRASH_ADDR_HIGH_MAX
>: CRASH_ADDR_LOW_MAX,
>   crash_size, CRASH_ALIGN);
> +#ifdef CONFIG_X86_64
> + /*
> +  * crashkernel=X reserve below 896M fails? Try below 4G
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + (1ULL << 32),
> + crash_size, CRASH_ALIGN);
> + /*
> +  * crashkernel=X reserve below 4G fails? Try MAXMEM
> +  */
> + if (!high && !crash_base)
> + crash_base = memblock_find_in_range(CRASH_ALIGN,
> + CRASH_ADDR_HIGH_MAX,
> + crash_size, CRASH_ALIGN);

For kdump, most of systems are x86 64. If both Yinghai and Vivek have no
objection to search an available region of crash_size above 896M
naturely, why don't we search it with function
__memblock_find_range_bottom_up(). It can search from below 896M to
above 4G, almost the same as the change you have made currently. Mainly
the code will be much simpler.

The several times of searching looks not good and a little confusing.

What do you think?

Thanks
Baoquan

> +#endif
>   if (!crash_base) {
>   pr_info("crashkernel reservation failed - No suitable 
> area found.\n");
>   return;
> 
> 


  1   2   3   4   5   6   7   8   9   10   >