Re: [PATCHv4 2/6] Staging: most: mostcore/core.c. Fix "Using plain integer as NULL pointer" warnings

2015-08-03 Thread Sudip Mukherjee
On Mon, Aug 03, 2015 at 11:22:35PM +0200, Adrian Remonda wrote:
> This patch fixes the warning generated by sparse: "Using plain integer
> as NULL pointer" by replacing the offending 0 with NULL.
> 
> Signed-off-by: Adrian Remonda 
> ---
This patch will not apply because of 7ac5c9f0a022 ("Staging: most: fix
snprintf() is printing too much").

You need to update your tree.

regards
sudip
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface

2015-08-03 Thread yalin wang

> On Jul 28, 2015, at 21:21, Peter Zijlstra  wrote:
> 
> There are various problems and short-comings with the current
> static_key interface:
> 
> - static_key_{true,false}() read like a branch depending on the key
>   value, instead of the actual likely/unlikely branch depending on
>   init value.
> 
> - static_key_{true,false}() are, as stated above, tied to the
>   static_key init values STATIC_KEY_INIT_{TRUE,FALSE}.
> 
> - we're limited to the 2 (out of 4) possible options that compile to
>   a default NOP because that's what our arch_static_branch() assembly
>   emits.
> 
> So provide a new static_key interface:
> 
>  DEFINE_STATIC_KEY_TRUE(name);
>  DEFINE_STATIC_KEY_FALSE(name);
> 
> Which define a key of different types with an initial true/false
> value.
> 
> Then allow:
> 
>   static_branch_likely()
>   static_branch_unlikely()
> 
> to take a key of either type and emit the right instruction for the
> case.
> 
> This means adding a second arch_static_branch_jump() assembly helper
> which emits a JMP per default.
> 
> In order to determine the right instruction for the right state,
> encode the branch type in the LSB of jump_entry::key.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
> 
is this means static_key_{true,false}() are deprecated ?
do you need mark static_key_{true,false}() as deprecated?
like this:
static __always_inline  __deprecated bool static_key_false(struct static_key 
*key)  ?
Thanks


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks

2015-08-03 Thread Lin Huang
pclk_pd_pmu needs to keep running and with the upcoming gpio clock
handling this is not always the case anymore. So add it to the list
of critical clocks for now.

Signed-off-by: Heiko Stuebner 
Signed-off-by: Lin Huang 
---
Changes in v3:
-match the author and Signed-off-by name

 drivers/clk/rockchip/clk-rk3288.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/rockchip/clk-rk3288.c 
b/drivers/clk/rockchip/clk-rk3288.c
index 0df5bae..9040878 100644
--- a/drivers/clk/rockchip/clk-rk3288.c
+++ b/drivers/clk/rockchip/clk-rk3288.c
@@ -780,6 +780,7 @@ static const char *const rk3288_critical_clocks[] 
__initconst = {
"aclk_cpu",
"aclk_peri",
"hclk_peri",
+   "pclk_pd_pmu",
 };
 
 #ifdef CONFIG_PM_SLEEP
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3 2/2] pinctrl: rockchip: only enable gpio clock when it setting

2015-08-03 Thread Lin Huang
gpio can keep state even the clock disable, for save power
consumption, only enable gpio clock when it setting

Signed-off-by: Heiko Stuebner 
Signed-off-by: Lin Huang 
---
Changes in v3:
-match author and Signed-off-by name

 drivers/pinctrl/pinctrl-rockchip.c | 57 +++---
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index cc2843a..70a4539 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -945,6 +945,7 @@ static int _rockchip_pmx_gpio_set_direction(struct 
gpio_chip *chip,
if (ret < 0)
return ret;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -953,9 +954,11 @@ static int _rockchip_pmx_gpio_set_direction(struct 
gpio_chip *chip,
data |= BIT(pin);
else
data &= ~BIT(pin);
+
writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR);
 
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 
return 0;
 }
@@ -1389,6 +1392,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, 
unsigned offset, int value)
unsigned long flags;
u32 data;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl(reg);
@@ -1398,6 +1402,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, 
unsigned offset, int value)
writel(data, reg);
 
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 }
 
 /*
@@ -1409,7 +1414,9 @@ static int rockchip_gpio_get(struct gpio_chip *gc, 
unsigned offset)
struct rockchip_pin_bank *bank = gc_to_pin_bank(gc);
u32 data;
 
+   clk_enable(bank->clk);
data = readl(bank->reg_base + GPIO_EXT_PORT);
+   clk_disable(bank->clk);
data >>= offset;
data &= 1;
return data;
@@ -1546,6 +1553,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
if (ret < 0)
return ret;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -1603,6 +1611,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
default:
irq_gc_unlock(gc);
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
return -EINVAL;
}
 
@@ -1611,6 +1620,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
 
irq_gc_unlock(gc);
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 
return 0;
 }
@@ -1620,8 +1630,10 @@ static void rockchip_irq_suspend(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct rockchip_pin_bank *bank = gc->private;
 
+   clk_enable(bank->clk);
bank->saved_masks = irq_reg_readl(gc, GPIO_INTMASK);
irq_reg_writel(gc, ~gc->wake_active, GPIO_INTMASK);
+   clk_disable(bank->clk);
 }
 
 static void rockchip_irq_resume(struct irq_data *d)
@@ -1629,7 +1641,27 @@ static void rockchip_irq_resume(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct rockchip_pin_bank *bank = gc->private;
 
+   clk_enable(bank->clk);
irq_reg_writel(gc, bank->saved_masks, GPIO_INTMASK);
+   clk_disable(bank->clk);
+}
+
+static void rockchip_irq_gc_mask_clr_bit(struct irq_data *d)
+{
+   struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+   struct rockchip_pin_bank *bank = gc->private;
+
+   clk_enable(bank->clk);
+   irq_gc_mask_clr_bit(d);
+}
+
+void rockchip_irq_gc_mask_set_bit(struct irq_data *d)
+{
+   struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+   struct rockchip_pin_bank *bank = gc->private;
+
+   irq_gc_mask_set_bit(d);
+   clk_disable(bank->clk);
 }
 
 static int rockchip_interrupts_register(struct platform_device *pdev,
@@ -1640,7 +1672,7 @@ static int rockchip_interrupts_register(struct 
platform_device *pdev,
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
struct irq_chip_generic *gc;
int ret;
-   int i;
+   int i, j;
 
for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
if (!bank->valid) {
@@ -1649,11 +1681,19 @@ static int rockchip_interrupts_register(struct 
platform_device *pdev,
continue;
}
 
+   ret = clk_enable(bank->clk);
+   if (ret) {
+   dev_err(&pdev->dev, "failed to enable clock for bank 
%s\n",
+   bank->name);
+   continue;
+   }
+
bank->domain = irq_domain_add_linear(b

Re: [PATCH] megaraid_sas: fix missing { } braces

2015-08-03 Thread Johannes Thumshirn

Hi Colin,

Colin King  writes:

> From: Colin Ian King 
>
> Static analysis by smatch indicated that there was a curly
> braces issue:
>
> drivers/scsi/megaraid/megaraid_sas_base.c:6139
>   megasas_mgmt_fw_ioctl() warn: curly braces intended?
>
> Add braces in the appropriate place so that kbuf_arr[i] gets
> set to NULL only when we need to.  Also, remove whitespace
> between kbuff_arr and [].

I don't quite get where you removed that whitespace. Might it be a left
over from another version of the patch?

>
> Signed-off-by: Colin Ian King 
> ---
>  drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c 
> b/drivers/scsi/megaraid/megaraid_sas_base.c
> index 71b884d..8face78 100644
> --- a/drivers/scsi/megaraid/megaraid_sas_base.c
> +++ b/drivers/scsi/megaraid/megaraid_sas_base.c
> @@ -6131,12 +6131,13 @@ megasas_mgmt_fw_ioctl(struct megasas_instance 
> *instance,
>   }
>  
>   for (i = 0; i < ioc->sge_count; i++) {
> - if (kbuff_arr[i])
> + if (kbuff_arr[i]) {
>   dma_free_coherent(&instance->pdev->dev,
> le32_to_cpu(kern_sge32[i].length),
> kbuff_arr[i],
> le32_to_cpu(kern_sge32[i].phys_addr));
>   kbuff_arr[i] = NULL;
> + }
>   }
>  
>   megasas_return_cmd(instance, cmd);

For the curly braces part:
Reviewed-by: Johannes Thumshirn 

While you're up to it, care to check drivers/scsi/bfa as well? There was
a patch somewhen in the past but it seems it wasn't applied. But if
found this bugzilla entry
https://bugzilla.kernel.org/show_bug.cgi?id=98261
for it.

Thanks,
Johannes

-- 
Johannes Thumshirn   Storage
jthumsh...@suse.de +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600  D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/6] mtd: spi-nor: change return value of read/write

2015-08-03 Thread Michal Suchanek
On 3 August 2015 at 23:46, Marek Vasut  wrote:
> On Monday, August 03, 2015 at 08:39:01 PM, Michal Suchanek wrote:
>> Change the return value of spi-nor device read and write methods to
>> allow returning amount of data transferred and errors as
>> read(2)/write(2) does.
>>
>> Signed-off-by: Michal Suchanek 
>> ---
>>  include/linux/mtd/spi-nor.h | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
>> index e540952..7d782cb 100644
>> --- a/include/linux/mtd/spi-nor.h
>> +++ b/include/linux/mtd/spi-nor.h
>> @@ -185,9 +185,9 @@ struct spi_nor {
>>   int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
>>   int write_enable);
>>
>> - int (*read)(struct spi_nor *nor, loff_t from,
>> + ssize_t (*read)(struct spi_nor *nor, loff_t from,
>>   size_t len, size_t *retlen, u_char *read_buf);
>> - void (*write)(struct spi_nor *nor, loff_t to,
>> + ssize_t (*write)(struct spi_nor *nor, loff_t to,
>>   size_t len, size_t *retlen, const u_char *write_buf);
>>   int (*erase)(struct spi_nor *nor, loff_t offs);
>
> You realize that if someone does bisect and has only this patch applied,
> the compiler will complain loudly about mismatching data types, right ? :)

Yes, the compiler prints a warning. However, only the return value
which is not used changes so it should not cause any real problem. The
data type in the fsl-quadspi and m25p80 drivers is matched in the
following two patches.

Thanks

Michal
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] sched: Trace point sched_stat_sleep should cover iowait case

2015-08-03 Thread Oliver Yang


On 2015/8/4 2:43, Peter Zijlstra wrote:
> On Mon, Aug 03, 2015 at 01:35:28PM -0400, Steven Rostedt wrote:
>> On Mon, 27 Jul 2015 09:11:52 -0400
>> yangoliver  wrote:
>>
>>> Per sched_stat_sleep definition in sched.h, it should include
>>> iowait case. This can also relect the design of sum_sleep_runtime
>>> statistic, as this counter also includes the io_wait.
>>>
>>> Signed-off-by: Yong Yang 
>>> ---
>>>  kernel/sched/fair.c | 2 ++
>>>  1 file changed, 2 insertions(+)
>>>
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>> index d113c3b..85677bf 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -3018,6 +3018,8 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, 
>>> struct sched_entity *se)
>>> se->statistics.sum_sleep_runtime += delta;
>>>  
>>> if (tsk) {
>>> +   trace_sched_stat_sleep(tsk, delta);
>>> +
>>> if (tsk->in_iowait) {
>>> se->statistics.iowait_sum += delta;
>>> se->statistics.iowait_count++;
> No, that's broken in two ways. Firstly you don't change semantics of
> stuff just because of a comment and secondly iowait has nothing what all
> to do with INTERRUPTIBLE/sleep vs UNINTERRUPTIBLE/blocked.
Peter,

Sorry for missing key person in this mail thread.

Another reason I think sched_stat_sleep should cover UNINTERRUPTIBLE/blocked 
case
is, the sum_sleep_runtime counter get increased for both INTERRUPTIBLE and
UNINTERRUPTIBLE cases. We can find below statement for both cases in the code,
   
se->statistics.sum_sleep_runtime += delta;

Plus below comments, I guessed the sched_stat_sleep trace point is originally
designed for cover all kind of sleep cases: interruptible and uninterruptible,

/*
 * Tracepoint for accounting sleep time (time the task is not runnable,
 * including iowait, see below).
 */
DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
 TP_PROTO(struct task_struct *tsk, u64 delay),
 TP_ARGS(tsk, delay));

Do you think we should make sched_stat_sleep meaning similar with the
sum_sleep_runtime counter?

If not, we may need fix the comments in sched.h above.
>
> And wtf are you doing sending sched patches and not Cc maintainers.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH net] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread Vivien Didelot
If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
an EDSA frame is prepended with a 802.1q header once queued.

To fix this, push the VLAN tag to the payload if present, before
checking the frame protocol.

[note: we may prefer to access directly VLAN TCI from hwaccel frames,
but this approach is simpler.]

Signed-off-by: Vivien Didelot 
---
 net/dsa/tag_edsa.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 2288c80..3ada4eb 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -9,6 +9,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include "dsa_priv.h"
@@ -21,6 +22,10 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct 
net_device *dev)
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *edsa_header;
 
+   skb = vlan_hwaccel_push_inside(skb);
+   if (unlikely(!skb))
+   return NULL;
+
/*
 * Convert the outermost 802.1q tag to a DSA tag and prepend
 * a DSA ethertype field is the packet is tagged, or insert
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH net-next 4/4] net: dsa: mv88e6xxx: refactor FDB routines

2015-08-03 Thread Vivien Didelot
Refactor mv88e6xxx_port_fdb_{add,del,getnext} to respect the new DSA
switch driver FDB access routines.

The Marvell 88E6xxx switches support up to 4094 FIDs (from 1 to 0xfff;
FID 0 means that multiple address databases are not being used). So
change the fid_mask for a fid_bitmap of 4096 bits.

FIDs 1 to num_ports will be reserved for non-bridged ports and bridge
groups (a bridge group gets the FID of its first member). The remaining
bits will then be used for VLANs.

Also do not consider an address (yet) if it is trunk mapped.

This change is a need to welcome the future support for hardware VLANs.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 +
 drivers/net/dsa/mv88e6352.c |   3 +
 drivers/net/dsa/mv88e6xxx.c | 205 +++-
 drivers/net/dsa/mv88e6xxx.h |  31 +--
 4 files changed, 172 insertions(+), 70 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index cfa21ed..735f04c 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,6 +116,9 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
+   .port_fdb_add   = mv88e6xxx_port_fdb_add,
+   .port_fdb_del   = mv88e6xxx_port_fdb_del,
+   .port_fdb_getnext   = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index eb4630f..191fb25 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,6 +341,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
+   .port_fdb_add   = mv88e6xxx_port_fdb_add,
+   .port_fdb_del   = mv88e6xxx_port_fdb_del,
+   .port_fdb_getnext   = mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 438c73e..f576a39 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -964,7 +965,7 @@ static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int 
fid, u16 cmd)
 {
int ret;
 
-   ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid);
+   ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_FID, fid);
if (ret < 0)
return ret;
 
@@ -1091,7 +1092,7 @@ int mv88e6xxx_join_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
ps->bridge_mask[fid] = br_port_mask;
 
if (fid != ps->fid[port]) {
-   ps->fid_mask |= 1 << ps->fid[port];
+   clear_bit(ps->fid[port], ps->fid_bitmap);
ps->fid[port] = fid;
ret = _mv88e6xxx_update_bridge_config(ds, fid);
}
@@ -1125,9 +1126,16 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
 
mutex_lock(&ps->smi_mutex);
 
-   newfid = __ffs(ps->fid_mask);
+   newfid = find_next_zero_bit(ps->fid_bitmap, VLAN_N_VID, 1);
+   if (unlikely(newfid > ps->num_ports)) {
+   netdev_err(ds->ports[port], "all first %d FIDs are used\n",
+  ps->num_ports);
+   ret = -ENOSPC;
+   goto unlock;
+   }
+
ps->fid[port] = newfid;
-   ps->fid_mask &= ~(1 << newfid);
+   set_bit(newfid, ps->fid_bitmap);
ps->bridge_mask[fid] &= ~(1 << port);
ps->bridge_mask[newfid] = 1 << port;
 
@@ -1135,6 +1143,7 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int 
port, u32 br_port_mask)
if (!ret)
ret = _mv88e6xxx_update_bridge_config(ds, newfid);
 
+unlock:
mutex_unlock(&ps->smi_mutex);
 
return ret;
@@ -1174,8 +1183,8 @@ int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int 
port, u8 state)
return 0;
 }
 
-static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
- const unsigned char *addr)
+static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds,
+   const u8 addr[ETH_ALEN])
 {
int i, ret;
 
@@ -1190,7 +1199,7 @@ static int __mv88e6xxx_write_addr(struct dsa_switch *ds,
return 0;
 }
 
-static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr)
+static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, u8 addr[ETH_ALEN])
 {
int i, ret;
 
@@ -1206,109 +1215,184 @@ static int __mv88e6xxx_read_addr(struct dsa_switch 
*ds, unsigned char *addr)
return 0;
 }
 
-static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port,
-   const un

Re: [PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS allocations

2015-08-03 Thread Hugh Dickins
Hi Michal,

On Thu, 2 Jul 2015, Michal Hocko wrote:
> On Thu 02-07-15 10:25:51, Theodore Ts'o wrote:
> > On Wed, Jul 01, 2015 at 03:37:15PM +0200, Michal Hocko wrote:
> From: Michal Hocko 
> Date: Thu, 2 Jul 2015 17:05:05 +0200
> Subject: [PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS
>  allocations
> 
> Nikolay has reported a hang when a memcg reclaim got stuck with the
> following backtrace...

Sorry, I couldn't manage more than to ignore you when you Cc'ed me on
this a month ago.  Dave's perfectly correct, we had ourselves come to
notice that recently: although in an ideal world a filesystem would
only mark PageWriteback once the IO is all ready to go, in the real
world that's not quite so, and a memory allocation may stand between.
Which leaves my v3.6 c3b94f44fcb0 in danger of deadlocking.

And suddenly now, in v4.2-rc or perhaps in v4.1 also, that has started
hitting me too (I don't know which release Nicolay noticed this on).
And it has become urgent to fix: I've added Linus to the Cc because
I believe his comment in the rc5 announcement, "There's also a pending
question about some of the VM changes", reflects this.  Twice when I
was trying to verify fixes to the dcache issue which came up at the
end of last week, I was frustrated by unrelated hangs in my load.
The first time I didn't recognize it, but the second time I did,
and then came to realize that your patch is just what is needed.

But I have modified it a little, I don't think you'll mind.  As you
suggested yourself, I actually prefer to test may_enter_fs there, rather
than __GFP_FS: not a big deal, I certainly wouldn't want to delay the
fix if someone thinks differently; but I tend to feel that may_enter_fs
is what we already use for such decisions there, so better to use it.
(And the SwapCache case immune to ext4 or xfs IO submission pattern.)

I've fixed up the patch and updated the comments, since Tejun has
meanwhile introduced sane_reclaim(sc) - I'm staying on in the insane
asylum for now (and sane_reclaim is clearly unaffected by the change).

I've omitted your hunk unindenting Case 3 wait_on_page_writeback(page):
I prefer your style too, but thought it better to minimize the patch,
especially if this is heading to the stables.  (I was tempted to add in
my unlock_page there, that we discussed once before: but again thought
it better to minimize the fix - it is "selfish" not to unlock_page,
but I think that anything heading for deadlock on the locked page would
in other circumstances be heading for deadlock on the writeback page -
I've never found that change critical.)

And I've done quite a bit of testing.  The loads that hung at the
weekend have been running nicely for 24 hours now, no problem with the
writeback hang and no problem with the dcache ENOTDIR issue.  Though
I've no idea of what recent VM change turned this into a hot issue.

And more testing on the history of it, considering your stable 3.6+
designation that I wasn't satisfied with.  Getting out that USB stick
again, I find that 3.6, 3.7 and 3.8 all OOM if their __GFP_IO test
is updated to a may_enter_fs test; but something happened in 3.9
to make it and subsequent releases safe with the may_enter_fs test.
You can certainly argue that the remote chance of a deadlock is
worse than the fair chance of a spurious OOM; but if you insist
on 3.6+, then I think it would have to go back even further,
because we marked that commit for stable itself.  I suggest 3.9+.


[PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS allocations

From: Michal Hocko 

Nikolay has reported a hang when a memcg reclaim got stuck with the
following backtrace:
PID: 18308  TASK: 883d7c9b0a30  CPU: 1   COMMAND: "rsync"
 #0 [88177374ac60] __schedule at 815ab152
 #1 [88177374acb0] schedule at 815ab76e
 #2 [88177374acd0] schedule_timeout at 815ae5e5
 #3 [88177374ad70] io_schedule_timeout at 815aad6a
 #4 [88177374ada0] bit_wait_io at 815abfc6
 #5 [88177374adb0] __wait_on_bit at 815abda5
 #6 [88177374ae00] wait_on_page_bit at 8111fd4f
 #7 [88177374ae50] shrink_page_list at 81135445
 #8 [88177374af50] shrink_inactive_list at 81135845
 #9 [88177374b060] shrink_lruvec at 81135ead
 #10 [88177374b150] shrink_zone at 811360c3
 #11 [88177374b220] shrink_zones at 81136eff
 #12 [88177374b2a0] do_try_to_free_pages at 8113712f
 #13 [88177374b300] try_to_free_mem_cgroup_pages at 811372be
 #14 [88177374b380] try_charge at 81189423
 #15 [88177374b430] mem_cgroup_try_charge at 8118c6f5
 #16 [88177374b470] __add_to_page_cache_locked at 8112137d
 #17 [88177374b4e0] add_to_page_cache_lru at 81121618
 #18 [88177374b510] pagecache_get_page at 8112170b
 #19 [88177374b560] grow_dev_page at 811c8297
 #20 [88177374b5c0] __getblk_slow at 811c91d6
 #21 

[PATCH net-next 2/4] net: switchdev: support static FDB addresses

2015-08-03 Thread Vivien Didelot
This patch adds a is_static boolean to the switchdev_obj_fdb structure,
in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE.

Signed-off-by: Vivien Didelot 
---
 include/net/switchdev.h   | 1 +
 net/switchdev/switchdev.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e90e1a0..0e296b8 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
struct switchdev_obj_fdb {  /* PORT_FDB */
u8 addr[ETH_ALEN];
u16 vid;
+   bool is_static;
} fdb;
} u;
 };
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 28786e8..b75897c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
ndm->ndm_flags   = NTF_SELF;
ndm->ndm_type= 0;
ndm->ndm_ifindex = dev->ifindex;
-   ndm->ndm_state   = NUD_REACHABLE;
+   ndm->ndm_state   = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE;
 
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
goto nla_put_failure;
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH net-next 1/4] net: switchdev: change fdb addr for a byte array

2015-08-03 Thread Vivien Didelot
The address in the switchdev_obj_fdb structure is currently represented
as a pointer. Replacing it for a 6-byte array allows switchdev to carry
addresses directly read from hardware registers, not stored by the
switch chip driver (as in Rocker).

Signed-off-by: Vivien Didelot 
---
 drivers/net/ethernet/rocker/rocker.c | 2 +-
 include/net/switchdev.h  | 2 +-
 net/bridge/br_fdb.c  | 2 +-
 net/switchdev/switchdev.c| 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 4cd5a71..faa5db0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
if (found->key.pport != rocker_port->pport)
continue;
-   fdb->addr = found->key.addr;
+   memcpy(fdb->addr, found->key.addr, ETH_ALEN);
fdb->vid = rocker_port_vlan_to_vid(rocker_port,
   found->key.vlan_id);
err = obj->cb(rocker_port->dev, obj);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da893..e90e1a0 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,7 +70,7 @@ struct switchdev_obj {
u32 tb_id;
} ipv4_fib;
struct switchdev_obj_fdb {  /* PORT_FDB */
-   const unsigned char *addr;
+   u8 addr[ETH_ALEN];
u16 vid;
} fdb;
} u;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..2c64b6a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct 
net_bridge_fdb_entry *f)
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = f->addr.addr,
.vid = f->vlan_id,
},
};
 
+   memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN);
switchdev_port_obj_del(f->dst->dev, &obj);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 33bafa2..28786e8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_add(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_del(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH net-next 3/4] net: dsa: add support for switchdev FDB objects

2015-08-03 Thread Vivien Didelot
Remove the fdb_{add,del,getnext} function pointer in favor of new
port_fdb_{add,del,getnext}.

Implement the switchdev_port_obj_{add,del,dump} functions in DSA to
support the SWITCHDEV_OBJ_PORT_FDB objects.

These functions are called from switchdev_port_bridge_{get,set,del}link.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 -
 drivers/net/dsa/mv88e6352.c |   3 -
 include/net/dsa.h   |  16 ++--
 net/dsa/slave.c | 221 
 4 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index 1c78084..cfa21ed 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index af210ef..eb4630f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63b..a090c8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,16 @@ struct dsa_switch_driver {
 u32 br_port_mask);
int (*port_stp_update)(struct dsa_switch *ds, int port,
   u8 state);
-   int (*fdb_add)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_del)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_getnext)(struct dsa_switch *ds, int port,
-  unsigned char *addr, bool *is_static);
+
+   /*
+* Forwarding database
+*/
+   int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
+   u8 addr[ETH_ALEN], bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0010c69..0f99a17 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***/
@@ -200,105 +201,6 @@ out:
return 0;
 }
 
-static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid, u16 nlm_flags)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_add)
-   ret = ds->drv->fdb_add(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_del)
-   ret = ds->drv->fdb_del(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
-  const unsigned char *addr, u16 vid,
-  bool is_static,
-  u32 portid, u32 seq, int type,
-  unsigned int flags)
-{
-   struct nlmsghdr *nlh;
-   struct ndmsg *ndm;
-
-   nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
-   if (!nlh)
-   return -EMSGSIZE;
-
-   ndm = nlmsg_data(nlh);
-   ndm->ndm_family  = AF_BRIDGE;
-   ndm->ndm_pad1= 0;
-   ndm->ndm_pad2= 0;
-   ndm->ndm_flags   = NTF_EXT_LEARNED;

[PATCH net-next 0/4] net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot
This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
objects.

The first two patches add minor but necessary changes to switchdev, the third
one implements the switchdev glue in DSA for FDB routines, and the forth one
refactors the FDB access functions in the mv88e6xxx code.

Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1):

# bridge fdb add 3c:97:0e:11:30:6e dev swp2
# bridge fdb add 3c:97:0e:11:40:78 dev swp3
# bridge fdb add 3c:97:0e:11:50:86 dev swp4
# bridge fdb del 3c:97:0e:11:40:78 dev swp3
# bridge fdb
01:00:5e:00:00:01 dev eth0 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
00:50:d2:10:78:15 dev swp0 master br0 permanent
3c:97:0e:11:30:6e dev swp2 self static
00:50:d2:10:78:15 dev swp3 master br1 permanent
3c:97:0e:11:50:86 dev swp4 self static
# cat /sys/kernel/debug/dsa0/atu
# DB   T/P  Vec State Addr
# 001  Port 004   e   3c:97:0e:11:30:6e
# 004  Port 010   e   3c:97:0e:11:50:86

For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged
ports and bridge groups, and the remaining will be later used by VLANs.

This change is necessary to welcome the support for hardware VLANs (which will
follow soon).

Cheers,
-v

Vivien Didelot (4):
  net: switchdev: change fdb addr for a byte array
  net: switchdev: support static FDB addresses
  net: dsa: add support for switchdev FDB objects
  net: dsa: mv88e6xxx: refactor FDB routines

 drivers/net/dsa/mv88e6171.c  |   6 +-
 drivers/net/dsa/mv88e6352.c  |   6 +-
 drivers/net/dsa/mv88e6xxx.c  | 205 ++--
 drivers/net/dsa/mv88e6xxx.h  |  31 +++--
 drivers/net/ethernet/rocker/rocker.c |   2 +-
 include/net/dsa.h|  16 ++-
 include/net/switchdev.h  |   3 +-
 net/bridge/br_fdb.c  |   2 +-
 net/dsa/slave.c  | 221 +++
 net/switchdev/switchdev.c|   6 +-
 10 files changed, 308 insertions(+), 190 deletions(-)

-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 4/5] clk: Hi6220: add stub clock driver

2015-08-03 Thread Leo Yan
Hi Stephen,

On Mon, Aug 03, 2015 at 02:37:52PM -0700, Stephen Boyd wrote:
> On 08/03, Leo Yan wrote:
> > diff --git a/drivers/clk/hisilicon/clk-hi6220-stub.c 
> > b/drivers/clk/hisilicon/clk-hi6220-stub.c
> > new file mode 100644
> > index 000..0931666
> > --- /dev/null
> > +++ b/drivers/clk/hisilicon/clk-hi6220-stub.c
> > @@ -0,0 +1,279 @@
> > +/*
> > + * Hi6220 stub clock driver
> > + *
> > + * Copyright (c) 2015 Hisilicon Limited.
> > + * Copyright (c) 2015 Linaro Limited.
> > + *
> > + * Author: Leo Yan 
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + *
> > + */
> > +
> > +#include 
> 
> Is this include used?
> 
> > +#include 
> > +#include 
> 
> Is this include used?
> 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include 
> > +
> > +/* Stub clocks id */
> > +#define HI6220_STUB_ACPU0  0
> > +#define HI6220_STUB_ACPU1  1
> > +#define HI6220_STUB_GPU2
> > +#define HI6220_STUB_DDR5
> > +
> > +/* Mailbox message */
> > +#define HI6220_MBOX_MSG_LEN8
> > +
> > +#define HI6220_MBOX_FREQ   (0xA)
> > +#define HI6220_MBOX_CMD_SET(0x3)
> > +#define HI6220_MBOX_OBJ_AP (0x0)
> > +
> > +/* CPU dynamic frequency scaling */
> > +#define ACPU_DFS_FREQ_MAX  (0x1724)
> > +#define ACPU_DFS_CUR_FREQ  (0x17CC)
> > +#define ACPU_DFS_FLAG  (0x1B30)
> > +#define ACPU_DFS_FREQ_REQ  (0x1B34)
> > +#define ACPU_DFS_FREQ_LMT  (0x1B38)
> > +#define ACPU_DFS_LOCK_FLAG (0xAEAEAEAE)
> 
> We don't need parenthesis around single values in these macros.
> 
> > +
> > +#define to_stub_clk(hw) container_of(hw, struct hi6220_stub_clk, hw)
> > +
> > +struct hi6220_stub_clk {
> > +   u32 id;
> > +   u32 rate;
> > +
> > +   struct device *dev;
> > +   struct clk_hw hw;
> > +
> > +   struct regmap *dfs_map;
> > +   struct mbox_client cl;
> > +   struct mbox_chan *mbox;
> > +};
> > +
> > +struct hi6220_mbox_msg {
> > +   unsigned char type;
> > +   unsigned char cmd;
> > +   unsigned char obj;
> > +   unsigned char src;
> > +   unsigned char para[4];
> > +};
> > +
> > +union hi6220_mbox_data {
> > +   unsigned int data[HI6220_MBOX_MSG_LEN];
> > +   struct hi6220_mbox_msg msg;
> > +};
> > +
> > +static unsigned int hi6220_acpu_get_freq(struct hi6220_stub_clk *stub_clk)
> > +{
> > +   unsigned int freq;
> > +
> > +   regmap_read(stub_clk->dfs_map, ACPU_DFS_CUR_FREQ, &freq);
> > +   return freq;
> > +}
> > +
> > +static int hi6220_acpu_set_freq(struct hi6220_stub_clk *stub_clk,
> > +   unsigned int freq)
> > +{
> > +   union hi6220_mbox_data data;
> > +
> > +   stub_clk->mbox = mbox_request_channel(&stub_clk->cl, 0);
> 
> Why not request the channel once in probe?
> 
> > +   if (IS_ERR(stub_clk->mbox)) {
> > +   dev_err(stub_clk->dev, "failed get mailbox channel\n");
> > +   return PTR_ERR(stub_clk->mbox);
> > +   };
> > +
> > +   /* set the frequency in sram */
> > +   regmap_write(stub_clk->dfs_map, ACPU_DFS_FREQ_REQ, freq);
> > +
> > +   /* compound mailbox message */
> > +   data.msg.type = HI6220_MBOX_FREQ;
> > +   data.msg.cmd  = HI6220_MBOX_CMD_SET;
> > +   data.msg.obj  = HI6220_MBOX_OBJ_AP;
> > +   data.msg.src  = HI6220_MBOX_OBJ_AP;
> > +
> > +   mbox_send_message(stub_clk->mbox, &data);
> > +   mbox_free_channel(stub_clk->mbox);
> > +   return 0;
> > +}
> > +
> > +static int hi6220_acpu_round_freq(struct hi6220_stub_clk *stub_clk,
> > + unsigned int freq)
> > +{
> > +   unsigned int limit_flag, limit_freq = UINT_MAX;
> > +   unsigned int max_freq;
> > +
> > +   /* check the constrainted frequency */
> 
> s/constrainted/constrained/ ?
> 
> > +   regmap_read(stub_clk->dfs_map, ACPU_DFS_FLAG, &limit_flag);
> > +   if (limit_flag == ACPU_DFS_LOCK_FLAG)
> > +   regmap_read(stub_clk->dfs_map, ACPU_DFS_FREQ_LMT, &limit_freq);
> > +
> > +   /* check the supported maximum frequency */
> > +   regmap_read(stub_clk->dfs_map, ACPU_DFS_FREQ_MAX, &max_freq);
> > +
> > +   /* calculate the real maximum frequency */
> > +   max_freq = min(max_freq, limit_freq);
> > +
> > +   if (WARN_ON(freq > max_freq))
> > +   freq = max_freq;
> > +
> > +   return freq;
> > +}
> > +
> > +static unsigned long hi6220_stub_clk_recalc_rate(struct clk_hw *hw,
> > +   unsigned long parent_rate)
> > +{
> > +   u32 rate = 0;
> > +   struct hi6220_stub_clk *stub_clk = to_stub_clk(hw);
> > +
> > +   switch (stub_clk->id) {
> > +   case HI6220_STUB_ACPU0:
> > +   rate = hi6220_acpu_get_freq(stub_clk);
> > +
> > +   /* convert from KHz to Hz */
> 
> s/KHz/kHz/ ?
> 
> > +   rate *= 1000;
> > +   break;
> > +
> > +   default:
> > +   de

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread David Miller
From: Vivien Didelot 
Date: Tue, 4 Aug 2015 02:01:18 -0400 (EDT)

> Dully noted. Should I resend it?

Yes, please.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] clk: pxa: pxa3xx: fix CKEN register access

2015-08-03 Thread Robert Jarzmik
Stephen Boyd  writes:

> On 08/03/2015 12:58 PM, Robert Jarzmik wrote:
>> Clocks 0 to 31 are on CKENA, and not CKENB. The clock register names
>> were inadequately inverted. As a consequence, all clock operations were
>> happening on CKENB, because almost all but 2 clocks are on CKENA.
>>
>> As the clocks were activated by the bootloader in the former tests, it
>> escaped the testing that the wrong clock gate was manipulated. The error
>> was revealed by changing the pxa3xx-and driver to a module, where tupon
>> unloading the wrong clock was disabled in CKENB.
>>
>> Signed-off-by: Robert Jarzmik 
>> ---
>
> Did you want a fixes tag to send this back to stable?
Ah yes, good point, v2 on its way.

Stephen and Mike, do you think this can still get in -rc6 ?

Cheers.

-- 
Robert
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] clk: pxa: pxa3xx: fix CKEN register access

2015-08-03 Thread Robert Jarzmik
Clocks 0 to 31 are on CKENA, and not CKENB. The clock register names
were inadequately inverted. As a consequence, all clock operations were
happening on CKENB, because almost all but 2 clocks are on CKENA.

As the clocks were activated by the bootloader in the former tests, it
escaped the testing that the wrong clock gate was manipulated. The error
was revealed by changing the pxa3xx-and driver to a module, where tupon
unloading the wrong clock was disabled in CKENB.

Fixes: 9bbb8a338fb2 ("clk: pxa: add pxa3xx clock driver")
Signed-off-by: Robert Jarzmik 
---
Since v1: added Fixes:
---
 drivers/clk/pxa/clk-pxa3xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c
index c677b9ab5367..4af4eed5f89f 100644
--- a/drivers/clk/pxa/clk-pxa3xx.c
+++ b/drivers/clk/pxa/clk-pxa3xx.c
@@ -126,7 +126,7 @@ PARENTS(pxa3xx_ac97_bus) = { "ring_osc_60mhz", "ac97" };
 PARENTS(pxa3xx_sbus) = { "ring_osc_60mhz", "system_bus" };
 PARENTS(pxa3xx_smemcbus) = { "ring_osc_60mhz", "smemc" };
 
-#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENA : &CKENB)
+#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENB : &CKENA)
 #define PXA3XX_CKEN(dev_id, con_id, parents, mult_lp, div_lp, mult_hp, \
div_hp, bit, is_lp, flags)  \
PXA_CKEN(dev_id, con_id, bit, parents, mult_lp, div_lp, \
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] media: atmel-isi: move configure_geometry() to start_streaming()

2015-08-03 Thread Josh Wu

Hi, Laurent

On 8/3/2015 9:27 PM, Laurent Pinchart wrote:

Hi Josh,

On Monday 03 August 2015 11:56:01 Josh Wu wrote:

On 7/31/2015 10:37 PM, Laurent Pinchart wrote:

On Wednesday 17 June 2015 18:39:39 Josh Wu wrote:

As in set_fmt() function we only need to know which format is been set,
we don't need to access the ISI hardware in this moment.

So move the configure_geometry(), which access the ISI hardware, to
start_streaming() will make code more consistent and simpler.

Signed-off-by: Josh Wu 
---

   drivers/media/platform/soc_camera/atmel-isi.c | 17 +
   1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/media/platform/soc_camera/atmel-isi.c
b/drivers/media/platform/soc_camera/atmel-isi.c index 8bc40ca..b01086d
100644
--- a/drivers/media/platform/soc_camera/atmel-isi.c
+++ b/drivers/media/platform/soc_camera/atmel-isi.c
@@ -390,6 +390,11 @@ static int start_streaming(struct vb2_queue *vq,
unsigned int count) /* Disable all interrupts */

isi_writel(isi, ISI_INTDIS, (u32)~0UL);

+   ret = configure_geometry(isi, icd->user_width, icd->user_height,
+   icd->current_fmt->code);

I would also make configure_geometry a void function, as the only failure
case really can't occur.

I think this case can be reached if user require a RGB565 format to
capture and sensor also support RGB565 format.
As atmel-isi driver will provide RGB565 support via the pass-through
mode (maybe we need re-consider this part).

So that will cause the configure_geometry() returns an error since it
found the bus format is not Y8 or YUV422.

In my opinion, we should not change configure_geometry()'s return type,
until we add a insanity format check before we call configure_geometry()
in future.

It will really confuse the user if S_FMT accepts a format but STREAMON fails
due to the format being unsupported. Could that be fixed by defaulting to a
known supported format in S_FMT if the requested format isn't support ?


yes, it's the right way to go.


You
could then remove the error check in configure_geometry().


So I will send a v2 patches, which will add one more patch to add 
insanity check on the S_FMT and remove the error check code in 
configure_geometry().


And for this patch in v2, I will add your reviewed-by tag. Is that Okay 
for you?


Best Regards,
Josh Wu


Apart from that,

Reviewed-by: Laurent Pinchart 

Thanks for the review.

Best Regards,
Josh Wu


+   if (ret < 0)
+   return ret;
+

spin_lock_irq(&isi->lock);
/* Clear any pending interrupt */
isi_readl(isi, ISI_STATUS);

@@ -477,8 +482,6 @@ static int isi_camera_init_videobuf(struct vb2_queue
*q, static int isi_camera_set_fmt(struct soc_camera_device *icd,

  struct v4l2_format *f)
   
   {


-   struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-   struct atmel_isi *isi = ici->priv;

struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
const struct soc_camera_format_xlate *xlate;
struct v4l2_pix_format *pix = &f->fmt.pix;

@@ -511,16 +514,6 @@ static int isi_camera_set_fmt(struct
soc_camera_device
*icd, if (mf->code != xlate->code)

return -EINVAL;

-   /* Enable PM and peripheral clock before operate isi registers */
-   pm_runtime_get_sync(ici->v4l2_dev.dev);
-
-   ret = configure_geometry(isi, pix->width, pix->height, xlate->code);
-
-   pm_runtime_put(ici->v4l2_dev.dev);
-
-   if (ret < 0)
-   return ret;
-

pix->width   = mf->width;
pix->height  = mf->height;
pix->field   = mf->field;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot
Hi,

On Aug 4, 2015, at 1:54 AM, Vivien Didelot vivien.dide...@savoirfairelinux.com 
wrote:

> This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
> objects.
> 
> The first two patches add minor but necessary changes to switchdev, the third
> one implements the switchdev glue in DSA for FDB routines, and the forth one
> refactors the FDB access functions in the mv88e6xxx code.

For some reason the patch 4/4 didn't follow. I also missed the net-next
prefix, as mentioned by David earlier. Please ignore this serie, I will
retry soon.

Thanks,
-v
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] i2c: mediatek: fix transfer error handling

2015-08-03 Thread Eddie Huang
On Fri, 2015-07-31 at 13:00 +0200, Wolfram Sang wrote:
> On Tue, Jul 28, 2015 at 11:38:05AM +0800, Eddie Huang wrote:
> > From: Liguo Zhang 
> > 
> > Reset i2c dma engine in hw init function.
> > When occur i2c ack error, mtk_i2c_irq may is twice,
> > first is the ack error interrupt, then the complete interrupt,
> > so i2c->irq_stat need keep the two interrupt value, and only
> > call complete() for the complete interrupt.
> > 
> > Signed-off-by: Liguo Zhang 
> > Signed-off-by: Eddie Huang 
> 
> Looks to me this patch needs to be split up into one patch per issue?
OK, I can split 

> And doesn't it kill the auto_restart functionality? Sascha?

No. restart_flag already set in mtk_i2c_do_transfer() function.It is not
necessary check restart_flag again in mtk_i2c_irq(). It is simpler to
just read status bit and write back to clear interrupt status.

Eddie
Thanks

> 
> > ---
> >  drivers/i2c/busses/i2c-mt65xx.c |   25 ++---
> >  1 file changed, 18 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/i2c/busses/i2c-mt65xx.c 
> > b/drivers/i2c/busses/i2c-mt65xx.c
> > index 9920eef..57d11b7 100644
> > --- a/drivers/i2c/busses/i2c-mt65xx.c
> > +++ b/drivers/i2c/busses/i2c-mt65xx.c
> > @@ -59,6 +59,7 @@
> >  #define I2C_DMA_START_EN   0x0001
> >  #define I2C_DMA_INT_FLAG_NONE  0x
> >  #define I2C_DMA_CLR_FLAG   0x
> > +#define I2C_DMA_HARD_RST   0x0002
> >  
> >  #define I2C_DEFAULT_SPEED  10  /* hz */
> >  #define MAX_FS_MODE_SPEED  40
> > @@ -81,6 +82,7 @@ enum DMA_REGS_OFFSET {
> > OFFSET_INT_FLAG = 0x0,
> > OFFSET_INT_EN = 0x04,
> > OFFSET_EN = 0x08,
> > +   OFFSET_RST = 0x0c,
> > OFFSET_CON = 0x18,
> > OFFSET_TX_MEM_ADDR = 0x1c,
> > OFFSET_RX_MEM_ADDR = 0x20,
> > @@ -262,6 +264,10 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c)
> >   I2C_CONTROL_CLK_EXT_EN | I2C_CONTROL_DMA_EN;
> > writew(control_reg, i2c->base + OFFSET_CONTROL);
> > writew(I2C_DELAY_LEN, i2c->base + OFFSET_DELAY_LEN);
> > +
> > +   writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST);
> > +   udelay(50);
> > +   writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST);
> >  }
> >  
> >  /*
> > @@ -550,16 +556,20 @@ err_exit:
> >  static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id)
> >  {
> > struct mtk_i2c *i2c = dev_id;
> > -   u16 restart_flag = 0;
> > +   u16 intr_stat = 0;
> >  
> > -   if (i2c->dev_comp->auto_restart)
> > -   restart_flag = I2C_RS_TRANSFER;
> > +   intr_stat = readw(i2c->base + OFFSET_INTR_STAT);
> > +   writew(intr_stat, i2c->base + OFFSET_INTR_STAT);
> >  
> > -   i2c->irq_stat = readw(i2c->base + OFFSET_INTR_STAT);
> > -   writew(restart_flag | I2C_HS_NACKERR | I2C_ACKERR
> > -   | I2C_TRANSAC_COMP, i2c->base + OFFSET_INTR_STAT);
> > +   /*
> > +* when occurs i2c ack error, mtk_i2c_irq is called twice,
> > +* first is the ack error interrupt, then the complete interrupt,
> > +* i2c->irq_stat need keep the two interrupt value.
> > +*/
> > +   i2c->irq_stat |= intr_stat;
> >  
> > -   complete(&i2c->msg_complete);
> > +   if (i2c->irq_stat & I2C_TRANSAC_COMP)
> > +   complete(&i2c->msg_complete);
> >  
> > return IRQ_HANDLED;
> >  }
> > @@ -729,3 +739,4 @@ module_platform_driver(mtk_i2c_driver);
> >  MODULE_LICENSE("GPL v2");
> >  MODULE_DESCRIPTION("MediaTek I2C Bus Driver");
> >  MODULE_AUTHOR("Xudong Chen ");
> > +MODULE_AUTHOR("Liguo Zhang ");
> > -- 
> > 1.7.9.5
> > 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] spi: ppc44x: Remove chipselect from setupxfer

2015-08-03 Thread Nicolas Boichat
The chipselect operation is already done in spi_bitbang_transfer_one,
or in spi_bitbang_setup, so there is no need to do it in setupxfer
as well.

Signed-off-by: Nicolas Boichat 
---
 drivers/spi/spi-ppc4xx.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 54fb984..55947f6 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -210,13 +210,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, 
struct spi_transfer *t)
if (in_8(&hw->regs->cdm) != cdm)
out_8(&hw->regs->cdm, cdm);
 
-   spin_lock(&hw->bitbang.lock);
-   if (!hw->bitbang.busy) {
-   hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
-   /* Need to ndelay here? */
-   }
-   spin_unlock(&hw->bitbang.lock);
-
return 0;
 }
 
-- 
2.5.0.rc2.392.g76e840b

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] spi: s3c24xx: Convert spinlock to mutex

2015-08-03 Thread Nicolas Boichat
bitbang->lock is now a mutex: replace spinlock function calls
by mutex functions.

Signed-off-by: Nicolas Boichat 
---
 drivers/spi/spi-s3c24xx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index f36bc32..b1d03e5 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -198,12 +198,11 @@ static int s3c24xx_spi_setup(struct spi_device *spi)
if (ret)
return ret;
 
-   spin_lock(&hw->bitbang.lock);
-   if (!hw->bitbang.busy) {
+   if (mutex_trylock(&hw->bitbang.lock)) {
hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE);
/* need to ndelay for 0.5 clocktick ? */
+   mutex_unlock(&hw->bitbang.lock);
}
-   spin_unlock(&hw->bitbang.lock);
 
return 0;
 }
-- 
2.5.0.rc2.392.g76e840b

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] spi: bitbang: Replace spinlock by mutex when calling chipselect

2015-08-03 Thread Nicolas Boichat
Enabling CONFIG_DEBUG_ATOMIC_SLEEP in kernel configuration, we get
this warning in spi_gpio_setup:
[1.177747] BUG: sleeping function called from invalid context at 
drivers/gpio/gpiolib.c:1431
[1.190182] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0
[1.196922] 3 locks held by swapper/0/1:
[1.200812]  #0:  (&dev->mutex){..}, at: [] 
__driver_attach+0x58/0x98
[1.209147]  #1:  (spi_add_lock){+.+.+.}, at: [] 
spi_add_device+0x80/0x14c
[1.217564]  #2:  (&(&bitbang->lock)->rlock){..}, at: 
[] spi_bitbang_setup+0x84/0xc4
[1.227185] irq event stamp: 279856
[1.230645] hardirqs last  enabled at (279855): [] 
__mutex_unlock_slowpath+0x158/0x16c
[1.240070] hardirqs last disabled at (279856): [] 
_raw_spin_lock_irqsave+0x20/0x6c
[1.249233] softirqs last  enabled at (262072): [] 
bdi_register+0x124/0x1d0
[1.257707] softirqs last disabled at (262070): [] 
bdi_register+0x100/0x1d0
[1.266185] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 3.18.0 #608
[1.277419] Call trace:
[1.279848] [] dump_backtrace+0x0/0x12c
[1.285209] [] show_stack+0x10/0x1c
[1.290223] [] dump_stack+0x80/0xb4
[1.295238] [] __might_sleep+0x110/0x11c
[1.300687] [] gpiod_set_raw_value_cansleep+0x24/0x4c
[1.307255] [] spi_gpio_chipselect+0x74/0x88
[1.313045] [] spi_bitbang_setup+0x98/0xc4
[1.318664] [] spi_gpio_setup+0x50/0xc8
[1.324022] [] spi_setup+0xe4/0xf8
[1.328950] [] spi_add_device+0xd0/0x14c
[1.334396] [] spi_register_master+0x6a8/0x718
[1.340359] [] spi_bitbang_start+0xe8/0x108
[1.346064] [] spi_gpio_probe+0x3b4/0x448
[1.351595] [] platform_drv_probe+0x4c/0x9c
[1.357301] [] driver_probe_device+0xd4/0x23c
[1.363180] [] __driver_attach+0x68/0x98
[1.368627] [] bus_for_each_dev+0x7c/0xb0
[1.374160] [] driver_attach+0x1c/0x28
[1.379434] [] bus_add_driver+0xd8/0x1e0
[1.384881] [] driver_register+0xbc/0x10c
[1.390412] [] __platform_driver_register+0x5c/0x68
[1.396808] [] spi_gpio_driver_init+0x14/0x20
[1.402685] [] do_one_initcall+0x18c/0x1ac
[1.408306] [] kernel_init_freeable+0x228/0x2e0
[1.414356] [] kernel_init+0x10/0xd8

chipselect (in this case, spi_gpio_chipselect, which calls
gpiod_set_raw_value_cansleep), can sleep, so we should not hold
a spinlock while calling it.

This issue was introduced by this commit, which converted spi-gpio
to cansleep variants:
d9dda5a191 "spi: spi-gpio: Use 'cansleep' variants to access GPIO"

Replace spinlock + busy variable by a mutex, and get rid of
spi_bitbang_prepare_hardware and spi_bitbang_unprepare_hardware,
which are not useful anymore.

Signed-off-by: Nicolas Boichat 
---

Actually, I'm not sure if I understand the existing code: why are we not
waiting for busy to go down to 0, then call chipselect, instead of not calling
it at all if the bus happens to be busy when we setup the device? With the
current approach, it would be easy to just use an unconditional mutex_lock.

Also, is it harmful to deactivate the newly setup device in spi_bitbang_setup,
even if the bus is busy with another device? chipselect should be independent
for each device (or is it not?). So I'm not clear why we need any locking at
all...

Hopefully someone can shine some light on this...

Anyway, this patch series does not change the existing behaviour, applies on
top of broonie-sound/for-next, and, along with the 2 follow-up patches, was
compile-tested on x86-64/arm (allyesconfig) and ppc44x (defconfig+SPI driver),
and runtime-tested on an arm platform.

 drivers/spi/spi-bitbang.c   | 42 +++--
 include/linux/spi/spi_bitbang.h |  3 +--
 2 files changed, 8 insertions(+), 37 deletions(-)

diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 840a498..931c37e 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -180,7 +180,6 @@ int spi_bitbang_setup(struct spi_device *spi)
 {
struct spi_bitbang_cs   *cs = spi->controller_state;
struct spi_bitbang  *bitbang;
-   unsigned long   flags;
 
bitbang = spi_master_get_devdata(spi->master);
 
@@ -210,12 +209,11 @@ int spi_bitbang_setup(struct spi_device *spi)
 */
 
/* deselect chip (low or high) */
-   spin_lock_irqsave(&bitbang->lock, flags);
-   if (!bitbang->busy) {
+   if (mutex_trylock(&bitbang->lock)) {
bitbang->chipselect(spi, BITBANG_CS_INACTIVE);
ndelay(cs->nsecs);
+   mutex_unlock(&bitbang->lock);
}
-   spin_unlock_irqrestore(&bitbang->lock, flags);
 
return 0;
 }
@@ -252,20 +250,6 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct 
spi_transfer *t)
  * transfer-at-a-time ones to leverage dma or fifo hardware.
  */
 
-static int spi_bitbang_prepare_hardware(struct spi_master *spi)
-{
-   struct spi_bitbang  *bitbang;
-   unsigned long   flags;
-
-   bitbang = spi_master_get_devdata

RE: [RFC 0/2] VFIO: Add virtual MSI doorbell support.

2015-08-03 Thread Bhushan Bharat


> -Original Message-
> From: Pranavkumar Sawargaonkar [mailto:pranavku...@linaro.org]
> Sent: Tuesday, August 04, 2015 11:18 AM
> To: Bhushan Bharat-R65777
> Cc: k...@vger.kernel.org; Alex Williamson; kvm...@lists.cs.columbia.edu;
> linux-arm-ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> christoffer.d...@linaro.org; marc.zyng...@arm.com; will.dea...@arm.com;
> bhelg...@google.com; a...@arndb.de; rob.herr...@linaro.org;
> eric.au...@linaro.org; patc...@apm.com; Yoder Stuart-B08248
> Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support.
> 
> Hi Bharat,
> 
> On 28 July 2015 at 23:28, Alex Williamson 
> wrote:
> > On Tue, 2015-07-28 at 17:23 +, Bhushan Bharat wrote:
> >> Hi Alex,
> >>
> >> > -Original Message-
> >> > From: Alex Williamson [mailto:alex.william...@redhat.com]
> >> > Sent: Tuesday, July 28, 2015 9:52 PM
> >> > To: Pranavkumar Sawargaonkar
> >> > Cc: k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; linux-arm-
> >> > ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> >> > christoffer.d...@linaro.org; marc.zyng...@arm.com;
> >> > will.dea...@arm.com; bhelg...@google.com; a...@arndb.de;
> >> > rob.herr...@linaro.org; eric.au...@linaro.org; patc...@apm.com;
> >> > Bhushan Bharat-R65777; Yoder
> >> > Stuart-B08248
> >> > Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support.
> >> >
> >> > On Fri, 2015-07-24 at 14:33 +0530, Pranavkumar Sawargaonkar wrote:
> >> > > In current VFIO MSI/MSI-X implementation, linux host kernel
> >> > > allocates MSI/MSI-X vectors when userspace requests through vfio
> ioctls.
> >> > > Vfio creates irqfd mappings to notify MSI/MSI-X interrupts to the
> >> > > userspace when raised.
> >> > > Guest OS will see emulated MSI/MSI-X controller and receives an
> >> > > interrupt when kernel notifies the same via irqfd.
> >> > >
> >> > > Host kernel allocates MSI/MSI-X using standard linux routines
> >> > > like
> >> > > pci_enable_msix_range() and pci_enable_msi_range().
> >> > > These routines along with requset_irq() in host kernel sets up
> >> > > MSI/MSI-X vectors with Physical MSI/MSI-X addresses provided by
> >> > > interrupt controller driver in host kernel.
> >> > >
> >> > > This means when a device is assigned with the guest OS, MSI/MSI-X
> >> > > addresses present in PCIe EP are the PAs programmed by the host
> >> > > linux
> >> > kernel.
> >> > >
> >> > > In x86 MSI/MSI-X physical address range is reserved and iommu is
> >> > > aware about these addreses and transalation is bypassed for these
> address range.
> >> > >
> >> > > Unlike x86, ARM/ARM64 does not reserve MSI/MSI-X Physical address
> >> > > range and all the transactions including MSI go through
> >> > > iommu/smmu
> >> > without bypass.
> >> > > This requires extending current vfio MSI layer with additional
> >> > > functionality for ARM/ARM64 by 1. Programing IOVA (referred as a
> >> > > MSI virtual doorbell address)
> >> > >in device's MSI vector as a MSI address.
> >> > >This IOVA will be provided by the userspace based on the
> >> > >MSI/MSI-X addresses reserved for the guest.
> >> > > 2. Create an IOMMU mapping between this IOVA and
> >> > >Physical address (PA) assigned to the MSI vector.
> >> > >
> >> > > This RFC is proposing a solution for MSI/MSI-X passthrough for
> >> > ARM/ARM64.
> >> >
> >> >
> >> > Hi Pranavkumar,
> >> >
> >> > Freescale has the same, or very similar, need, so any solution in
> >> > this space will need to work for both ARM and powerpc.  I'm not a
> >> > big fan of this approach as it seems to require the user to
> >> > configure MSI/X via ioctl and then call a separate ioctl mapping
> >> > the doorbells.  That's more code for the user, more code to get
> >> > wrong and potentially a gap between configuring MSI/X and enabling
> mappings where we could see IOMMU faults.
> >> >
> >> > If we know that doorbell mappings are required, why can't we set
> >> > aside a bank of IOVA space and have them mapped automatically as
> >> > MSI/X is being configured?  Then the user's need for special
> >> > knowledge and handling of this case is limited to setup.  The IOVA
> >> > space will be mapped and used as needed, we only need the user to
> >> > specify the IOVA space reserved for this.  Thanks,
> >>
> >> We probably need a mix of both to support Freescale PowerPC and ARM
> >> based machines.
> >> In this mix mode kernel vfio driver will reserve some IOVA for
> >> mapping MSI page/s.
> >
> > If vfio is reserving pages independently from the user, this becomes
> > what Marc called "shaping" the VM and what x86 effectively does.  An
> > interface extension should expose these implicit regions so the user
> > can avoid them for DMA memory mapping.
> >
> >>  If any other iova mapping will overlap with this then it will return
> >> error and user-space. Ideally this should be choosen in such a way
> >> that it never overlap, which is easy on some systems but can be
> >> tricky on some other system like Freescale Pow

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread Vivien Didelot
Hi David,

On Aug 4, 2015, at 1:21 AM, David da...@davemloft.net wrote:

> From: Vivien Didelot 
> Date: Sun,  2 Aug 2015 21:46:02 -0400
> 
>> If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
>> an EDSA frame is prepended with a 802.1q header once queued.
>> 
>> To fix this, push the VLAN tag to the payload if present, before
>> checking the frame protocol.
>> 
>> [note: we may prefer to access directly VLAN TCI from hwaccel frames,
>> but this approach is simpler.]
>> 
>> Signed-off-by: Vivien Didelot 
> 
> This is a bug fix so should target 'net', but you generated the patch
> against 'net-next'.
> 
> In any event, you should be explicit about the tree you are targetting
> in order to not waste my time like this, by simply specifying the
> tree in your "[PATCH xxx]" text in your subject line.   Either
> "[PATCH net]" or "[PATCH net-next]".

Dully noted. Should I resend it?

Thanks,
-v
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime

2015-08-03 Thread Willy Tarreau
On Tue, Aug 04, 2015 at 05:54:51AM +0200, Borislav Petkov wrote:
> On Mon, Aug 03, 2015 at 11:45:24AM -0700, Andy Lutomirski wrote:
> > P.P.P.S.  Who thought that IRET faults unmasking NMIs made any sense
> > whatsoever when NMIs run on an IST stack?  Seriously, people?
> 
> What happened with asking Intel for a sane IRET-NG?
> 
> Should be relatively easy - take the current IRET microcode, get rid
> of the nasty crap, allocate a new opcode and done. Validation should
> actually have *less* to do and can reuse all current test cases.

Even easier, just add a few flags (probably 2 or 3 only) that IRET can
check to adjust its behaviour. Basically "don't re-enable NMIs yet",
maybe something to adjust the behaviour on bad CS/SS/SP/IP and a few
such things could possibly help. Maybe all of this could be summarized
as a single flag "I'm in a fault handler".

Willy

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4] net: switchdev: change fdb addr for a byte array

2015-08-03 Thread Vivien Didelot
The address in the switchdev_obj_fdb structure is currently represented
as a pointer. Replacing it for a 6-byte array allows switchdev to carry
addresses directly read from hardware registers, not stored by the
switch chip driver (as in Rocker).

Signed-off-by: Vivien Didelot 
---
 drivers/net/ethernet/rocker/rocker.c | 2 +-
 include/net/switchdev.h  | 2 +-
 net/bridge/br_fdb.c  | 2 +-
 net/switchdev/switchdev.c| 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index 4cd5a71..faa5db0 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port 
*rocker_port,
hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) {
if (found->key.pport != rocker_port->pport)
continue;
-   fdb->addr = found->key.addr;
+   memcpy(fdb->addr, found->key.addr, ETH_ALEN);
fdb->vid = rocker_port_vlan_to_vid(rocker_port,
   found->key.vlan_id);
err = obj->cb(rocker_port->dev, obj);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 89da893..e90e1a0 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -70,7 +70,7 @@ struct switchdev_obj {
u32 tb_id;
} ipv4_fib;
struct switchdev_obj_fdb {  /* PORT_FDB */
-   const unsigned char *addr;
+   u8 addr[ETH_ALEN];
u16 vid;
} fdb;
} u;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875d..2c64b6a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct 
net_bridge_fdb_entry *f)
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = f->addr.addr,
.vid = f->vlan_id,
},
};
 
+   memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN);
switchdev_port_obj_del(f->dst->dev, &obj);
 }
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 33bafa2..28786e8 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_add(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct 
nlattr *tb[],
struct switchdev_obj obj = {
.id = SWITCHDEV_OBJ_PORT_FDB,
.u.fdb = {
-   .addr = addr,
.vid = vid,
},
};
 
+   memcpy(obj.u.fdb.addr, addr, ETH_ALEN);
return switchdev_port_obj_del(dev, &obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] net: dsa: add support for switchdev FDB objects

2015-08-03 Thread Vivien Didelot
Remove the fdb_{add,del,getnext} function pointer in favor of new
port_fdb_{add,del,getnext}.

Implement the switchdev_port_obj_{add,del,dump} functions in DSA to
support the SWITCHDEV_OBJ_PORT_FDB objects.

These functions are called from switchdev_port_bridge_{get,set,del}link.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6171.c |   3 -
 drivers/net/dsa/mv88e6352.c |   3 -
 include/net/dsa.h   |  16 ++--
 net/dsa/slave.c | 221 
 4 files changed, 129 insertions(+), 114 deletions(-)

diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c
index 1c78084..cfa21ed 100644
--- a/drivers/net/dsa/mv88e6171.c
+++ b/drivers/net/dsa/mv88e6171.c
@@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6171");
diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index af210ef..eb4630f 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
.port_join_bridge   = mv88e6xxx_join_bridge,
.port_leave_bridge  = mv88e6xxx_leave_bridge,
.port_stp_update= mv88e6xxx_port_stp_update,
-   .fdb_add= mv88e6xxx_port_fdb_add,
-   .fdb_del= mv88e6xxx_port_fdb_del,
-   .fdb_getnext= mv88e6xxx_port_fdb_getnext,
 };
 
 MODULE_ALIAS("platform:mv88e6172");
diff --git a/include/net/dsa.h b/include/net/dsa.h
index fbca63b..a090c8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -296,12 +296,16 @@ struct dsa_switch_driver {
 u32 br_port_mask);
int (*port_stp_update)(struct dsa_switch *ds, int port,
   u8 state);
-   int (*fdb_add)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_del)(struct dsa_switch *ds, int port,
-  const unsigned char *addr, u16 vid);
-   int (*fdb_getnext)(struct dsa_switch *ds, int port,
-  unsigned char *addr, bool *is_static);
+
+   /*
+* Forwarding database
+*/
+   int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid,
+   u8 addr[ETH_ALEN]);
+   int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid,
+   u8 addr[ETH_ALEN], bool *is_static);
 };
 
 void register_switch_driver(struct dsa_switch_driver *type);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0010c69..0f99a17 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "dsa_priv.h"
 
 /* slave mii_bus handling ***/
@@ -200,105 +201,6 @@ out:
return 0;
 }
 
-static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid, u16 nlm_flags)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_add)
-   ret = ds->drv->fdb_add(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-struct net_device *dev,
-const unsigned char *addr, u16 vid)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->parent;
-   int ret = -EOPNOTSUPP;
-
-   if (ds->drv->fdb_del)
-   ret = ds->drv->fdb_del(ds, p->port, addr, vid);
-
-   return ret;
-}
-
-static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
-  const unsigned char *addr, u16 vid,
-  bool is_static,
-  u32 portid, u32 seq, int type,
-  unsigned int flags)
-{
-   struct nlmsghdr *nlh;
-   struct ndmsg *ndm;
-
-   nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
-   if (!nlh)
-   return -EMSGSIZE;
-
-   ndm = nlmsg_data(nlh);
-   ndm->ndm_family  = AF_BRIDGE;
-   ndm->ndm_pad1= 0;
-   ndm->ndm_pad2= 0;
-   ndm->ndm_flags   = NTF_EXT_LEARNED;

[PATCH 2/4] net: switchdev: support static FDB addresses

2015-08-03 Thread Vivien Didelot
This patch adds a is_static boolean to the switchdev_obj_fdb structure,
in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE.

Signed-off-by: Vivien Didelot 
---
 include/net/switchdev.h   | 1 +
 net/switchdev/switchdev.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e90e1a0..0e296b8 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -72,6 +72,7 @@ struct switchdev_obj {
struct switchdev_obj_fdb {  /* PORT_FDB */
u8 addr[ETH_ALEN];
u16 vid;
+   bool is_static;
} fdb;
} u;
 };
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 28786e8..b75897c 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
ndm->ndm_flags   = NTF_SELF;
ndm->ndm_type= 0;
ndm->ndm_ifindex = dev->ifindex;
-   ndm->ndm_state   = NUD_REACHABLE;
+   ndm->ndm_state   = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE;
 
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
goto nla_put_failure;
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


net: dsa: support switchdev FDB objects

2015-08-03 Thread Vivien Didelot
This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB
objects.

The first two patches add minor but necessary changes to switchdev, the third
one implements the switchdev glue in DSA for FDB routines, and the forth one
refactors the FDB access functions in the mv88e6xxx code.

Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1):

# bridge fdb add 3c:97:0e:11:30:6e dev swp2
# bridge fdb add 3c:97:0e:11:40:78 dev swp3
# bridge fdb add 3c:97:0e:11:50:86 dev swp4
# bridge fdb del 3c:97:0e:11:40:78 dev swp3
# bridge fdb
01:00:5e:00:00:01 dev eth0 self permanent
01:00:5e:00:00:01 dev eth1 self permanent
00:50:d2:10:78:15 dev swp0 master br0 permanent
3c:97:0e:11:30:6e dev swp2 self static
00:50:d2:10:78:15 dev swp3 master br1 permanent
3c:97:0e:11:50:86 dev swp4 self static
# cat /sys/kernel/debug/dsa0/atu
# DB   T/P  Vec State Addr
# 001  Port 004   e   3c:97:0e:11:30:6e
# 004  Port 010   e   3c:97:0e:11:50:86

For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged
ports and bridge groups, and the remaining will be later used by VLANs.

This change is necessary to welcome the support for hardware VLANs (which will
follow soon).

Cheers,
-v

Vivien Didelot (4):
  net: switchdev: change fdb addr for a byte array
  net: switchdev: support static FDB addresses
  net: dsa: add support for switchdev FDB objects
  net: dsa: mv88e6xxx: refactor FDB routines

 drivers/net/dsa/mv88e6171.c  |   6 +-
 drivers/net/dsa/mv88e6352.c  |   6 +-
 drivers/net/dsa/mv88e6xxx.c  | 205 ++--
 drivers/net/dsa/mv88e6xxx.h  |  31 +++--
 drivers/net/ethernet/rocker/rocker.c |   2 +-
 include/net/dsa.h|  16 ++-
 include/net/switchdev.h  |   3 +-
 net/bridge/br_fdb.c  |   2 +-
 net/dsa/slave.c  | 221 +++
 net/switchdev/switchdev.c|   6 +-
 10 files changed, 308 insertions(+), 190 deletions(-)

-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] perf: Clear MSRs on kexec

2015-08-03 Thread Jiri Olsa
On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > hi,
> > I'm getting following message on the kdump kernel start
> > 
> >   Broken BIOS detected, complain to your hardware vendor.\
> >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > 
> > it seems to be caused by NMI watchdog being configured
> > and fixed counter values stays in MSRs, which triggers
> > warning in check_hw_exists and disables perf support
> > in kdump kernel.. which probably does not hurt ;-)
> > 
> > zeroing MSRs during kdump shutdown seems to work (attached)
> > but I'm not sure thats correct place for kdump perf callback
> 
> Right, but why bother? All that kernel needs to do is write a memory
> dump to someplace and reboot, right? The less you do, the less can go
> wrong.

well, I was hunting that 'Broken BIOS..' message which is wrong

I wouldn't think anyone wants to use perf under kdump kernel,
but you never know ;-)

jirka
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 3/3] cpuidle/coupled: Add sanity check for safe_state_index

2015-08-03 Thread Xunlei Pang
From: Xunlei Pang 

Since we're using cpuidle_driver::safe_state_index directly as the
target state index, it's better to add the sanity check at the point
of registering the driver.

Signed-off-by: Xunlei Pang 
---
 drivers/cpuidle/driver.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 5db1478..def299e 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -223,10 +223,23 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
 static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 {
int ret;
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
+   int i;
+#endif
 
if (!drv || !drv->state_count)
return -EINVAL;
 
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
+   for (i = drv->state_count - 1; i >= 0; i--) {
+   if (cpuidle_state_is_coupled(drv, i) &&
+   (drv->safe_state_index == i ||
+drv->safe_state_index < 0 ||
+drv->safe_state_index >= drv->state_count))
+   return -EINVAL;
+   }
+#endif
+
if (cpuidle_disabled())
return -ENODEV;
 
-- 
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 1/3] cpuidle/coupled: Remove cpuidle_device::safe_state_index

2015-08-03 Thread Xunlei Pang
From: Xunlei Pang 

cpuidle_device::safe_state_index need to be initialized before
use, it should be the same as cpuidle_driver::safe_state_index.

We tackled this issue by removing the safe_state_index from the
cpuidle_device structure and use the one in the cpuidle_driver
structure instead.

Suggested-by: Daniel Lezcano 
Signed-off-by: Xunlei Pang 
---
 drivers/cpuidle/coupled.c | 4 ++--
 include/linux/cpuidle.h   | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 7936dce..6493e40 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -473,7 +473,7 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
return entered_state;
}
entered_state = cpuidle_enter_state(dev, drv,
-   dev->safe_state_index);
+   drv->safe_state_index);
local_irq_disable();
}
 
@@ -521,7 +521,7 @@ retry:
}
 
entered_state = cpuidle_enter_state(dev, drv,
-   dev->safe_state_index);
+   drv->safe_state_index);
local_irq_disable();
}
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index d075d34..786ad32 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -84,7 +84,6 @@ struct cpuidle_device {
struct list_headdevice_list;
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
-   int safe_state_index;
cpumask_t   coupled_cpus;
struct cpuidle_coupled  *coupled;
 #endif
-- 
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/3] cpuidle/coupled: Remove redundant 'dev' argument of cpuidle_state_is_coupled()

2015-08-03 Thread Xunlei Pang
From: Xunlei Pang 

For cpuidle_state_is_coupled(), 'dev' is not used, so remove it.

Signed-off-by: Xunlei Pang 
---
 drivers/cpuidle/coupled.c | 4 +---
 drivers/cpuidle/cpuidle.c | 4 ++--
 drivers/cpuidle/cpuidle.h | 7 +++
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 6493e40..1523e2d 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -176,14 +176,12 @@ void cpuidle_coupled_parallel_barrier(struct 
cpuidle_device *dev, atomic_t *a)
 
 /**
  * cpuidle_state_is_coupled - check if a state is part of a coupled set
- * @dev: struct cpuidle_device for the current cpu
  * @drv: struct cpuidle_driver for the platform
  * @state: index of the target state in drv->states
  *
  * Returns true if the target state is coupled with cpus besides this one
  */
-bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
-   struct cpuidle_driver *drv, int state)
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state)
 {
return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
 }
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 3325393..17a6dc0 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -214,7 +214,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct 
cpuidle_driver *drv,
tick_broadcast_exit();
}
 
-   if (!cpuidle_state_is_coupled(dev, drv, entered_state))
+   if (!cpuidle_state_is_coupled(drv, entered_state))
local_irq_enable();
 
diff = ktime_to_us(ktime_sub(time_end, time_start));
@@ -263,7 +263,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct 
cpuidle_device *dev)
 int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
  int index)
 {
-   if (cpuidle_state_is_coupled(dev, drv, index))
+   if (cpuidle_state_is_coupled(drv, index))
return cpuidle_enter_state_coupled(dev, drv, index);
return cpuidle_enter_state(dev, drv, index);
 }
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index ee97e96..178c5ad 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -34,15 +34,14 @@ extern int cpuidle_add_sysfs(struct cpuidle_device *dev);
 extern void cpuidle_remove_sysfs(struct cpuidle_device *dev);
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
-bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
-   struct cpuidle_driver *drv, int state);
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state);
 int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int next_state);
 int cpuidle_coupled_register_device(struct cpuidle_device *dev);
 void cpuidle_coupled_unregister_device(struct cpuidle_device *dev);
 #else
-static inline bool cpuidle_state_is_coupled(struct cpuidle_device *dev,
-   struct cpuidle_driver *drv, int state)
+static inline
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state)
 {
return false;
 }
-- 
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support.

2015-08-03 Thread Pranavkumar Sawargaonkar
Hi Bharat,

On 28 July 2015 at 23:28, Alex Williamson  wrote:
> On Tue, 2015-07-28 at 17:23 +, Bhushan Bharat wrote:
>> Hi Alex,
>>
>> > -Original Message-
>> > From: Alex Williamson [mailto:alex.william...@redhat.com]
>> > Sent: Tuesday, July 28, 2015 9:52 PM
>> > To: Pranavkumar Sawargaonkar
>> > Cc: k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; linux-arm-
>> > ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
>> > christoffer.d...@linaro.org; marc.zyng...@arm.com; will.dea...@arm.com;
>> > bhelg...@google.com; a...@arndb.de; rob.herr...@linaro.org;
>> > eric.au...@linaro.org; patc...@apm.com; Bhushan Bharat-R65777; Yoder
>> > Stuart-B08248
>> > Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support.
>> >
>> > On Fri, 2015-07-24 at 14:33 +0530, Pranavkumar Sawargaonkar wrote:
>> > > In current VFIO MSI/MSI-X implementation, linux host kernel allocates
>> > > MSI/MSI-X vectors when userspace requests through vfio ioctls.
>> > > Vfio creates irqfd mappings to notify MSI/MSI-X interrupts to the
>> > > userspace when raised.
>> > > Guest OS will see emulated MSI/MSI-X controller and receives an
>> > > interrupt when kernel notifies the same via irqfd.
>> > >
>> > > Host kernel allocates MSI/MSI-X using standard linux routines like
>> > > pci_enable_msix_range() and pci_enable_msi_range().
>> > > These routines along with requset_irq() in host kernel sets up
>> > > MSI/MSI-X vectors with Physical MSI/MSI-X addresses provided by
>> > > interrupt controller driver in host kernel.
>> > >
>> > > This means when a device is assigned with the guest OS, MSI/MSI-X
>> > > addresses present in PCIe EP are the PAs programmed by the host linux
>> > kernel.
>> > >
>> > > In x86 MSI/MSI-X physical address range is reserved and iommu is aware
>> > > about these addreses and transalation is bypassed for these address 
>> > > range.
>> > >
>> > > Unlike x86, ARM/ARM64 does not reserve MSI/MSI-X Physical address
>> > > range and all the transactions including MSI go through iommu/smmu
>> > without bypass.
>> > > This requires extending current vfio MSI layer with additional
>> > > functionality for ARM/ARM64 by 1. Programing IOVA (referred as a MSI
>> > > virtual doorbell address)
>> > >in device's MSI vector as a MSI address.
>> > >This IOVA will be provided by the userspace based on the
>> > >MSI/MSI-X addresses reserved for the guest.
>> > > 2. Create an IOMMU mapping between this IOVA and
>> > >Physical address (PA) assigned to the MSI vector.
>> > >
>> > > This RFC is proposing a solution for MSI/MSI-X passthrough for
>> > ARM/ARM64.
>> >
>> >
>> > Hi Pranavkumar,
>> >
>> > Freescale has the same, or very similar, need, so any solution in this 
>> > space
>> > will need to work for both ARM and powerpc.  I'm not a big fan of this
>> > approach as it seems to require the user to configure MSI/X via ioctl and 
>> > then
>> > call a separate ioctl mapping the doorbells.  That's more code for the 
>> > user,
>> > more code to get wrong and potentially a gap between configuring MSI/X
>> > and enabling mappings where we could see IOMMU faults.
>> >
>> > If we know that doorbell mappings are required, why can't we set aside a
>> > bank of IOVA space and have them mapped automatically as MSI/X is being
>> > configured?  Then the user's need for special knowledge and handling of 
>> > this
>> > case is limited to setup.  The IOVA space will be mapped and used as 
>> > needed,
>> > we only need the user to specify the IOVA space reserved for this.  Thanks,
>>
>> We probably need a mix of both to support Freescale PowerPC and ARM
>> based machines.
>> In this mix mode kernel vfio driver will reserve some IOVA for mapping
>> MSI page/s.
>
> If vfio is reserving pages independently from the user, this becomes
> what Marc called "shaping" the VM and what x86 effectively does.  An
> interface extension should expose these implicit regions so the user can
> avoid them for DMA memory mapping.
>
>>  If any other iova mapping will overlap with this then it will return
>> error and user-space. Ideally this should be choosen in such a way
>> that it never overlap, which is easy on some systems but can be tricky
>> on some other system like Freescale PowerPC. This is not sufficient
>> for at-least Freescale PowerPC based SOC. This is because of hardware
>> limitation, where we need to fit this reserved iova address within
>> aperture decided by user-space. So if we allow user-space to change
>> this reserved iova address to a value decided by user-spece itself
>> then we can support both ARM/PowerPC based solutions.
>
> Yes, that's my intention, to allow userspace to specify the reserved
> region.  I believe you have some additional restrictions on the number
> of MSI banks available and whether MSI banks can be shared, but I would
> hope that doesn't preclude a shared interface with ARM.
>
>> I have some implementation ready/tested with this approach and if this
>> approach looks good then I ca

Re: [RFC][PATCH] ecryptfs: Allow only one instance per lower path

2015-08-03 Thread Richard Weinberger
Tyler,

Am 04.08.2015 um 01:07 schrieb Tyler Hicks:
>> Okay, then I'd argument to give my patch a try although it is not the 
>> solution
>> to the problem I've reported. :-)
>> If you don't mind I'll resend with a proper changelog.
> 
> That patch isn't correct since it assumes that all eCryptfs super blocks
> are equal if the lower paths (and, ultimately, the lower inode) are
> equal. However, the lower path is only one of many properties of an
> eCryptfs superblock. For example, the second mount may have been
> configured to use a different file encryption key.

How would this work if I mount /foo using AES to /mnt_a
and /foo again using 3DES to /mnt_b?
Wouldn't both ecrytpfs instances kill each other's files?

Thanks,
//richard
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3] iio: adc: xilinx-xadc: Push interrupts into threaded context

2015-08-03 Thread Shubhrajyoti Datta
On Fri, Jul 24, 2015 at 6:08 PM, Lars-Peter Clausen  wrote:
> Hi,
>
> Sorry, but I don't think this patch has been sufficiently tested against a
> mainline kernel. The driver wont even probe the way it is right now.
>
> On 07/21/2015 01:14 AM, Xander Huff wrote:
>>
>> The driver currently registers a pair of irq handlers using
>> request_threaded_irq(), however the synchronization mechanism between the
>> hardirq and the threadedirq handler is a regular spinlock.
>
>
> If everything runs in threaded context we don't really need the spinlock
> anymore and can use the mutex throughout.

that should be better from the performance point of view.

>
>>
>> Unfortunately, this breaks PREEMPT_RT builds, where a spinlock can sleep,
>> and is thus not able to be acquired from a hardirq handler. This patch
>> gets
>> rid of the hardirq handler and pushes all interrupt handling into the
>> threaded context.
>
>
> We actually might as well run everything in the hardirq handler (which will
> be threaded in PREEMPT_RT). The reason why we have the threaded handler is
> because xadc_handle_event() used to sleep, but it doesn't do this anymore.

The point is why have the hard irq. If we use hardirq then not mutex
can be used and spinlock will
be busy.

is there something i may be missing?
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: Tree for Aug 4

2015-08-03 Thread Stephen Rothwell
Hi all,

Changes since 20150803:

The security tree gained a conflict against Linus' tree.

Non-merge commits (relative to Linus' tree): 5232
 5240 files changed, 257463 insertions(+), 119966 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig for x86_64,
a multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), it is also built with powerpc allnoconfig
(32 and 64 bit), ppc44x_defconfig and allyesconfig (this fails its final
link) and i386, sparc, sparc64 and arm defconfig.

Below is a summary of the state of the merge.

I am currently merging 224 trees (counting Linus' and 32 trees of patches
pending for Linus' tree).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

$ git checkout master
$ git reset --hard stable
Merging origin/master (7e884479bf50 Merge branch 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client)
Merging fixes/master (c7e9ad7da219 Merge branch 'perf-urgent-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip)
Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on 
module install)
Merging arc-current/for-curr (e4140819dadc ARC: signal handling robustify)
Merging arm-current/fixes (3473f26592c1 ARM: 8405/1: VDSO: fix regression with 
toolchains lacking ld.bfd executable)
Merging m68k-current/for-linus (1214c525484c m68k: Use for_each_sg())
Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached 
build errors)
Merging mips-fixes/mips-fixes (1795cd9b3a91 Linux 3.16-rc5)
Merging powerpc-fixes/fixes (b8d65e9662b1 powerpc/eeh-powernv: Fix unbalanced 
IRQ warning)
Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2)
Merging sparc/master (4a10a91756ef Merge branch 'upstream' of 
git://git.infradead.org/users/pcmoore/audit)
Merging net/master (636dba8e12d7 act_mirred: avoid calling tcf_hash_release() 
when binding)
Merging ipsec/master (158cd4af8ded packet: missing dev_put() in 
packet_do_bind())
Merging sound-current/for-linus (8ec7cfce3762 ALSA: oxygen: Fix 
logical-not-parentheses warning)
Merging pci-current/for-linus (c9ddbac9c891 PCI: Restore PCI_MSIX_FLAGS_BIRMASK 
definition)
Merging wireless-drivers/master (741e3b9902d1 rtlwifi: rtl8723be: Add module 
parameter for MSI interrupts)
Merging driver-core.current/driver-core-linus (cbfe8fa6cd67 Linux 4.2-rc4)
Merging tty.current/tty-linus (cbfe8fa6cd67 Linux 4.2-rc4)
Merging usb.current/usb-linus (0f79fd807a24 Merge tag 'fixes-for-v4.2-rc6' of 
git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus)
Merging usb-gadget-fixes/fixes (c93e64e91248 usb: udc: core: add device_del() 
call to error pathway)
Merging usb-serial-fixes/usb-linus (74472233233f USB: sierra: add 1199:68AB 
device ID)
Merging staging.current/staging-linus (40c3ef9d2f14 staging: comedi: das1800: 
add missing break in switch)
Merging char-misc.current/char-misc-linus (eaf7e98d43c1 Merge tag 
'extcon-fixes-for-4.2-rc5' of 
git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon into 
char-misc-linus)
Merging input-current/for-linus (073e570d7c2c Input: alps - only Dell laptops 
have separate button bits for v2 dualpoint sticks)
Merging crypto-current/master (17fb874dee09 hwrng: core - correct error check 
of kthread_run call)
Merging ide/master (d681f1166919 ide: remove deprecated use of pci api)
Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test 
for PPC_PSERIES)
Merging rr-fixes/fixes (fe0d34d242fa module: weaken locking assertion for oops 
path.)
Merging vfio-fixes/for-linus (4bc94d5dc95d vfio: Fix lockdep issue)
Merging kselftest-fixes/fixes (fee50f3c8427 selftests/futex: Fix 
futex_cmp_requeue_pi() error handling)
Merging backlight-fixes/for-backlight-fixes (68feaca0b13e backlight: pwm: 
H

[PATCH v2 net-next 2/2] RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.

2015-08-03 Thread Sowmini Varadhan
Register pernet subsys init/stop functions that will set up
and tear down per-net RDS-TCP listen endpoints. Unregister
pernet subusys functions on 'modprobe -r' to clean up these
end points.

Enable keepalive on both accept and connect socket endpoints.
The keepalive timer expiration will ensure that client socket
endpoints will be removed as appropriate from the netns when
an interface is removed from a namespace.

Register a device notifier callback that will clean up all
sockets (and thus avoid the need to wait for keepalive timeout)
when the loopback device is unregistered from the netns indicating
that the netns is getting deleted.

Signed-off-by: Sowmini Varadhan 
---
v2: net_device notifier for synchronous cleanup of sockets.

 net/rds/tcp.c |  163 -
 net/rds/tcp.h |7 ++-
 net/rds/tcp_connect.c |6 +-
 net/rds/tcp_listen.c  |   38 +++-
 4 files changed, 164 insertions(+), 50 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 98f5de3..339392b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -35,6 +35,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "rds.h"
 #include "tcp.h"
@@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void)
}
 }
 
-static void rds_tcp_exit(void)
-{
-   rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-   rds_tcp_listen_stop();
-   rds_tcp_destroy_conns();
-   rds_trans_unregister(&rds_tcp_transport);
-   rds_tcp_recv_exit();
-   kmem_cache_destroy(rds_tcp_conn_slab);
-}
-module_exit(rds_tcp_exit);
+static void rds_tcp_exit(void);
 
 struct rds_transport rds_tcp_transport = {
.laddr_check= rds_tcp_laddr_check,
@@ -281,6 +275,138 @@ struct rds_transport rds_tcp_transport = {
.t_prefer_loopback  = 1,
 };
 
+static int rds_tcp_netid;
+
+/* per-network namespace private data for this module */
+struct rds_tcp_net {
+   struct socket *rds_tcp_listen_sock;
+   struct work_struct rds_tcp_accept_w;
+};
+
+static void rds_tcp_accept_worker(struct work_struct *work)
+{
+   struct rds_tcp_net *rtn = container_of(work,
+  struct rds_tcp_net,
+  rds_tcp_accept_w);
+
+   while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0)
+   cond_resched();
+}
+
+void rds_tcp_accept_work(struct sock *sk)
+{
+   struct net *net = sock_net(sk);
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   queue_work(rds_wq, &rtn->rds_tcp_accept_w);
+}
+
+static __net_init int rds_tcp_init_net(struct net *net)
+{
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+   if (!rtn->rds_tcp_listen_sock) {
+   pr_warn("could not set up listen sock\n");
+   return -EAFNOSUPPORT;
+   }
+   INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
+   return 0;
+}
+
+static void __net_exit rds_tcp_exit_net(struct net *net)
+{
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   /* If rds_tcp_exit_net() is called as a result of netns deletion,
+* the rds_tcp_kill_sock() device notifier would already have cleaned
+* up the listen socket, thus there is no work to do in this function.
+*
+* If rds_tcp_exit_net() is called as a result of module unload,
+* i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
+* we do need to clean up the listen socket here.
+*/
+   if (rtn->rds_tcp_listen_sock) {
+   rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+   rtn->rds_tcp_listen_sock = NULL;
+   flush_work(&rtn->rds_tcp_accept_w);
+   }
+}
+
+static struct pernet_operations rds_tcp_net_ops = {
+   .init = rds_tcp_init_net,
+   .exit = rds_tcp_exit_net,
+   .id = &rds_tcp_netid,
+   .size = sizeof(struct rds_tcp_net),
+};
+
+static void rds_tcp_kill_sock(struct net *net)
+{
+   struct rds_tcp_connection *tc, *_tc;
+   struct sock *sk;
+   struct list_head tmp_list;
+   struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+   rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+   rtn->rds_tcp_listen_sock = NULL;
+   flush_work(&rtn->rds_tcp_accept_w);
+   INIT_LIST_HEAD(&tmp_list);
+   spin_lock_irq(&rds_tcp_conn_lock);
+   list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+   struct net *c_net = read_pnet(&tc->conn->c_net);
+
+   if (net != c_net || !tc->t_sock)
+   continue;
+   list_del(&tc->t_tcp_node);
+   list_add_tail(&tc->t_tcp_node, &tmp_list);
+   }
+   spin_unlock_irq(&rds_tcp_conn_lock);
+   list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
+   sk = tc->t_s

[PATCH v2 net-next 1/2] RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net

2015-08-03 Thread Sowmini Varadhan
Open the sockets calling sock_create_kern() with the correct struct net
pointer, and use that struct net pointer when verifying the
address passed to rds_bind().

Signed-off-by: Sowmini Varadhan 
---
v2: David Ahern comments.

 net/rds/bind.c|3 ++-
 net/rds/connection.c  |   16 ++--
 net/rds/ib.c  |2 +-
 net/rds/ib_cm.c   |5 +++--
 net/rds/iw.c  |2 +-
 net/rds/iw_cm.c   |5 +++--
 net/rds/rds.h |   23 +++
 net/rds/send.c|3 ++-
 net/rds/tcp.c |4 ++--
 net/rds/tcp_connect.c |3 ++-
 net/rds/tcp_listen.c  |   16 
 net/rds/transport.c   |4 ++--
 12 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 4ebd29c..dd666fb 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -185,7 +185,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, 
int addr_len)
ret = 0;
goto out;
}
-   trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+   trans = rds_trans_get_preferred(sock_net(sock->sk),
+   sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index da6da57..d4fecb2 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -117,7 +117,8 @@ static void rds_conn_reset(struct rds_connection *conn)
  * For now they are not garbage collected once they're created.  They
  * are torn down as the module is removed, if ever.
  */
-static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
+static struct rds_connection *__rds_conn_create(struct net *net,
+   __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp,
   int is_outgoing)
 {
@@ -157,6 +158,7 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
conn->c_faddr = faddr;
spin_lock_init(&conn->c_lock);
conn->c_next_tx_seq = 1;
+   rds_conn_net_set(conn, net);
 
init_waitqueue_head(&conn->c_waitq);
INIT_LIST_HEAD(&conn->c_send_queue);
@@ -174,7 +176,7 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
 * can bind to the destination address then we'd rather the messages
 * flow through loopback rather than either transport.
 */
-   loop_trans = rds_trans_get_preferred(faddr);
+   loop_trans = rds_trans_get_preferred(net, faddr);
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
@@ -260,17 +262,19 @@ static struct rds_connection *__rds_conn_create(__be32 
laddr, __be32 faddr,
return conn;
 }
 
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+  __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp)
 {
-   return __rds_conn_create(laddr, faddr, trans, gfp, 0);
+   return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+   __be32 laddr, __be32 faddr,
   struct rds_transport *trans, gfp_t gfp)
 {
-   return __rds_conn_create(laddr, faddr, trans, gfp, 1);
+   return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ba2dffe..1381422 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -317,7 +317,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned 
int len,
  * allowed to influence which paths have priority.  We could call userspace
  * asserting this policy "routing".
  */
-static int rds_ib_laddr_check(__be32 addr)
+static int rds_ib_laddr_check(struct net *net, __be32 addr)
 {
int ret;
struct rdma_cm_id *cm_id;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0da2a45..f40d8f5 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -448,8 +448,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 (unsigned long long)be64_to_cpu(lguid),
 (unsigned long long)be64_to_cpu(fguid));
 
-   conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
-  GFP_KERNEL);
+   /* RDS/IB is not currently netns aware, thus init_net */
+   conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+  &rds_ib_transport, GFP_KERNEL);
if (IS_ERR(conn)) {

[PATCH v2 net-next 0/2] RDS-TCP: Network namespace support

2015-08-03 Thread Sowmini Varadhan
This patch series contains the set of changes to correctly set up 
the infra for PF_RDS sockets that use TCP as the transport in multiple
network namespaces.

Patch 1 in the series is the minimal set of changes to allow
a single instance of RDS-TCP to run in any (i.e init_net or other) net
namespace.  The changes in this patch set ensure that the execution of 
'modprobe [-r] rds_tcp' sets up the kernel TCP sockets 
relative to the current netns, so that RDS applications can send/recv
packets from that netns, and the netns can later be deleted cleanly.

Patch 2 of the series further allows multiple RDS-TCP instances,
one per network namespace. The changes in this patch allows dynamic
creation/tear-down of RDS-TCP client and server sockets  across all
current and future namespaces. 

v2 changes from RFC sent out earlier:
David Ahern comments in patch 1, net_device notifier in patch 2, 
patch 3 broken off and submitted separately.

Sowmini Varadhan (2):
  Make RDS-TCP work correctly when it is set up in a netns other than
init_net
  Support multiple RDS-TCP listen endpoints, one per netns.

 net/rds/bind.c|3 +-
 net/rds/connection.c  |   16 +++--
 net/rds/ib.c  |2 +-
 net/rds/ib_cm.c   |5 +-
 net/rds/iw.c  |2 +-
 net/rds/iw_cm.c   |5 +-
 net/rds/rds.h |   23 ++-
 net/rds/send.c|3 +-
 net/rds/tcp.c |  167 +++-
 net/rds/tcp.h |7 ++-
 net/rds/tcp_connect.c |9 ++-
 net/rds/tcp_listen.c  |   40 
 net/rds/transport.c   |4 +-
 13 files changed, 216 insertions(+), 70 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: perf eBPF patch ordering. was: Re: perf test LLVM was: Re: [GIT PULL 00/39] perf tools: filtering events using eBPF programs

2015-08-03 Thread Wangnan (F)

Hi Arnaldo,

The following changes since commit 922cc21746202956acb41c89a6190bb50805fa31:

  perf tools: Introduce llvm config options (2015-07-31 12:17:50 -0300)

are available in the git repository at:

  https://github.com/WangNan0/linux.git ebpf

for you to fetch changes up to d85bf4b6470b8d860bbae25418e5ae3ccd9711e8:

  perf tools: Support attach BPF program on uprobe events (2015-08-04 
04:59:20 +)




The new cset has following improvements:

1. Improve error message: now don't dump LLVM environment setting messages
   if clang is found. Also, describe how to pre-compile .c file into .o.

   See: perf tools: Call clang to compile C source to object code
https://github.com/WangNan0/linux/commit/264676a5b922aaf1e9be3800fe06d5b67b06cd12

2. Reorder patches, so when 'perf record' is able to accept '--event 
file.c', the BPF filter
   should work. Also, an example BPF script file is provided, and the 
compilation

   method is described in commit message.

   See:
perf tools: Infrastructure for compiling scriptlets when 
passing '.c' to --event

https://github.com/WangNan0/linux/commit/eca622f4a88e1a791fc2405c398256ad572eba54

3. Introduce 'perf test BPF', which uses previous introduced scriptlet, 
fork a

   'perf record' to utilise it and uses 'perf report' to check the result.

   See: perf tests: Enforce LLVM test for BPF test
https://github.com/WangNan0/linux/commit/a7cdab453863c580446dc2c3a3f3a86f21b770ce

perf test: Enable 'perf test' run as test targets
https://github.com/WangNan0/linux/commit/b14f2627e95d348be5ec19bd24a5117e8c2ffe46
and
perf test: Add 'perf test BPF'
https://github.com/WangNan0/linux/commit/8414217dbfa57df4dbb55642dc26205e1c7cbdf1

4. Fix a bug that if the filename doesn't contain '/' it is recongnised 
as event name then
   failed to be applied by order adjusting in parse-events.l: bring 
{bpf_object} and

   {bpf_source} ahead.

You need to pop 9 patches from your perf/ebpf tree and rebase my tree. 
However, until
patch "perf tools: Enable passing bpf object file to --event" the 
changes is tiny. Please

check.

Thank you.

On 2015/8/4 3:49, Arnaldo Carvalho de Melo wrote:

Em Mon, Aug 03, 2015 at 01:11:16PM -0300, Arnaldo Carvalho de Melo escreveu:

  ERROR:unable to compile ./foo.c
  Hint:Check error message shown above.
 LLVM 3.7 or newer is required. Which can be found from http://llvm.org
 You may want to try git trunk:
 git clone http://llvm.org/git/llvm.git
  and



  or: perf record [] --  []
  -e, --eventevent selector. use 'perf list' to list available 
events
  [root@felicio ~]#
Now to find a hello.c BPF scriptlet...

So, we do not need to provide all this LLVM environment installation
hints when we get to any error, i.e. the one above was just becasuse
"./foo.c" doesn't exist, clang ran successfully, so no need for telling
the user how to install it.

The following error also shouldn't emit those hints:

   [root@felicio ~]# perf record -e ./lock_page.bpf.c sleep 1
   /root/./lock_page.bpf.c:1:5: error: expected parameter declarator
   SEC("lock_page=__lock_page page->flags")
   ^
   /root/./lock_page.bpf.c:1:5: error: expected ')'
   /root/./lock_page.bpf.c:1:4: note: to match this '('
   SEC("lock_page=__lock_page page->flags")
  ^
   /root/./lock_page.bpf.c:1:1: warning: type specifier missing, defaults to 
'int' [-Wimplicit-int]
   SEC("lock_page=__lock_page page->flags")
   ^
   /root/./lock_page.bpf.c:1:41: error: expected ';' after top level declarator
   SEC("lock_page=__lock_page page->flags")
   ^
   ;
   /root/./lock_page.bpf.c:2:22: warning: declaration of 'struct pt_regs' will 
not be visible outside of this function [-Wvisibility]
   int lock_page(struct pt_regs *ctx, int err, unsigned long flags)
  ^
   2 warnings and 3 errors generated.
   ERROR:   unable to compile ./lock_page.bpf.c
   Hint:Check error message shown above.
LLVM 3.7 or newer is required. Which can be found from http://llvm.org
You may want to try git trunk:
git clone http://llvm.org/git/llvm.git
 and
git clone http://llvm.org/git/clang.git

Or fetch the latest clang/llvm 3.7 from pre-built llvm packages for
debian/ubuntu:
http://llvm.org/apt

If you are using old version of clang, change 'clang-bpf-cmd-template'
option in [llvm] section of ~/.perfconfig to:

  "$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \
 -working-directory $WORKING_DIR -c $CLANG_SOURCE \
 -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -"
(Replace /path/to/llc with path to your llc)

   Hint:You can also pre-compile it into .o
   invalid or unsupported event: './

Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame

2015-08-03 Thread David Miller
From: Vivien Didelot 
Date: Sun,  2 Aug 2015 21:46:02 -0400

> If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX,
> an EDSA frame is prepended with a 802.1q header once queued.
> 
> To fix this, push the VLAN tag to the payload if present, before
> checking the frame protocol.
> 
> [note: we may prefer to access directly VLAN TCI from hwaccel frames,
> but this approach is simpler.]
> 
> Signed-off-by: Vivien Didelot 

This is a bug fix so should target 'net', but you generated the patch
against 'net-next'.

In any event, you should be explicit about the tree you are targetting
in order to not waste my time like this, by simply specifying the
tree in your "[PATCH xxx]" text in your subject line.   Either
"[PATCH net]" or "[PATCH net-next]".

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] cpuidle/coupled: Init cpuidle_device::safe_state_index

2015-08-03 Thread pang . xunlei
Hi Daniel,

Daniel Lezcano  wrote 2015-08-04 AM 12:22:54:
> Re: [PATCH] cpuidle/coupled: Init cpuidle_device::safe_state_index
> 
> On 07/23/2015 02:31 PM, Xunlei Pang wrote:
> > From: Xunlei Pang 
> >
> > cpuidle_device::safe_state_index need to be initialized before use,
> > so assign the driver's safe_state_index to it.
> >
> > Signed-off-by: Xunlei Pang 
> > ---
> >   drivers/cpuidle/cpuidle.c | 2 ++
> >   1 file changed, 2 insertions(+)
> >
> > diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> > index e8e2775..ed5c8efe 100644
> > --- a/drivers/cpuidle/cpuidle.c
> > +++ b/drivers/cpuidle/cpuidle.c
> > @@ -585,6 +585,8 @@ int cpuidle_register(struct cpuidle_driver *drv,
> >  */
> > if (coupled_cpus)
> >device->coupled_cpus = *coupled_cpus;
> > +
> > +  device->safe_state_index = drv->safe_state_index;
> 
> Hey, good catch. We are lucky the safe_state_index is always zero.
> 
> I think we can simplify the code by removing the safe_state_index from 
> the cpuidle_device structure and use the one in the cpuidle_driver 
> structure in coupled.c

Will do, thanks!

Regards,
-Xunlei


ZTE Information Security Notice: The information contained in this mail (and 
any attachment transmitted herewith) is privileged and confidential and is 
intended for the exclusive use of the addressee(s).  If you are not an intended 
recipient, any disclosure, reproduction, distribution or other dissemination or 
use of the information contained is strictly prohibited.  If you have received 
this mail in error, please delete it and notify us immediately.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] smaps: fill missing fields for vma(VM_HUGETLB)

2015-08-03 Thread Naoya Horiguchi
On Tue, Aug 04, 2015 at 02:55:30AM +, Naoya Horiguchi wrote:
> On Wed, Jul 29, 2015 at 04:20:59PM -0700, Mike Kravetz wrote:
> > On 07/29/2015 12:08 PM, David Rientjes wrote:
> > >On Tue, 28 Jul 2015, Jörn Engel wrote:
> > >
> > >>Well, we definitely need something.  Having a 100GB process show 3GB of
> > >>rss is not very useful.  How would we notice a memory leak if it only
> > >>affects hugepages, for example?
> > >>
> > >
> > >Since the hugetlb pool is a global resource, it would also be helpful to
> > >determine if a process is mapping more than expected.  You can't do that
> > >just by adding a huge rss metric, however: if you have 2MB and 1GB
> > >hugepages configured you wouldn't know if a process was mapping 512 2MB
> > >hugepages or 1 1GB hugepage.
> > >
> > >That's the purpose of hugetlb_cgroup, after all, and it supports usage
> > >counters for all hstates.  The test could be converted to use that to
> > >measure usage if configured in the kernel.
> > >
> > >Beyond that, I'm not sure how a per-hstate rss metric would be exported to
> > >userspace in a clean way and other ways of obtaining the same data are
> > >possible with hugetlb_cgroup.  I'm not sure how successful you'd be in
> > >arguing that we need separate rss counters for it.
> >
> > If I want to track hugetlb usage on a per-task basis, do I then need to
> > create one cgroup per task?
> >
> > For example, suppose I have many tasks using hugetlb and the global pool
> > is getting low on free pages.  It might be useful to know which tasks are
> > using hugetlb pages, and how many they are using.
> >
> > I don't actually have this need (I think), but it appears to be what
> > Jörn is asking for.
> 
> One possible way to get hugetlb metric in per-task basis is to walk page
> table via /proc/pid/pagemap, and counting page flags for each mapped page
> (we can easily do this with tools/vm/page-types.c like "page-types -p 
> -b huge"). This is obviously slower than just storing the counter as
> in-kernel data and just exporting it, but might be useful in some situation.

BTW, currently smaps doesn't report any meaningful info for vma(VM_HUGETLB).
I wrote the following patch, which hopefully is helpful for your purpose.

Thanks,
Naoya Horiguchi

---
From: Naoya Horiguchi 
Subject: [PATCH] smaps: fill missing fields for vma(VM_HUGETLB)

Currently smaps reports many zero fields for vma(VM_HUGETLB), which is
inconvenient when we want to know per-task or per-vma base hugetlb usage.
This patch enables these fields by introducing smaps_hugetlb_range().

before patch:

  Size:  20480 kB
  Rss:   0 kB
  Pss:   0 kB
  Shared_Clean:  0 kB
  Shared_Dirty:  0 kB
  Private_Clean: 0 kB
  Private_Dirty: 0 kB
  Referenced:0 kB
  Anonymous: 0 kB
  AnonHugePages: 0 kB
  Swap:  0 kB
  KernelPageSize: 2048 kB
  MMUPageSize:2048 kB
  Locked:0 kB
  VmFlags: rd wr mr mw me de ht

after patch:

  Size:  20480 kB
  Rss:   18432 kB
  Pss:   18432 kB
  Shared_Clean:  0 kB
  Shared_Dirty:  0 kB
  Private_Clean: 0 kB
  Private_Dirty: 18432 kB
  Referenced:18432 kB
  Anonymous: 18432 kB
  AnonHugePages: 0 kB
  Swap:  0 kB
  KernelPageSize: 2048 kB
  MMUPageSize:2048 kB
  Locked:0 kB
  VmFlags: rd wr mr mw me de ht

Signed-off-by: Naoya Horiguchi 
---
 fs/proc/task_mmu.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ca1e091881d4..c7218603306d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -610,12 +610,39 @@ static void show_smap_vma_flags(struct seq_file *m, 
struct vm_area_struct *vma)
seq_putc(m, '\n');
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
+unsigned long addr, unsigned long end,
+struct mm_walk *walk)
+{
+   struct mem_size_stats *mss = walk->private;
+   struct vm_area_struct *vma = walk->vma;
+   struct page *page = NULL;
+
+   if (pte_present(*pte)) {
+   page = vm_normal_page(vma, addr, *pte);
+   } else if (is_swap_pte(*pte)) {
+   swp_entry_t swpent = pte_to_swp_entry(*pte);
+
+   if (is_migration_entry(swpent))
+   page = migration_entry_to_page(swpent);
+   }
+   if (page)
+   smaps_account(mss, page, huge_page_size(hstate_vma(vma)),
+ pte_young(*pte), pte_dirty(*pte));
+   return 0;
+}
+#endif /* HUGETLB_PAGE */
+
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
struct vm_area_struct *vma = v;
struct mem_size_stats mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_ran

Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread David Miller
From: Joe Perches 
Date: Mon, 03 Aug 2015 21:02:21 -0700

> On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote:
>> On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
>> > This patch replaces calls to net_dbg_ratelimited when !DEBUG with
>> > no_printk, keeping with the idiom of all the other debug print helpers.
>> 
>> Makes sense, thanks Jason.
> 
> Perhaps better still would be to use if (0) no_printk so that
> the call and whatever argument calls the net_dbg_ratelimited
> makes are completely eliminated.

Agreed. Jason please respin your patch to work this way.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: 4.2-rc5 rcu stalls.

2015-08-03 Thread Sasha Levin
On 08/03/2015 06:03 PM, Paul E. McKenney wrote:
>> > Ugh, that doesn't revert cleanly.  Got something handy ?
> I do not, but perhaps either Sasha or Frederic do.

I've attached a revert courtesy of Peter.


Thanks,
Sasha

 include/linux/preempt.h | 12 
 kernel/sched/core.c | 34 +++---
 2 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 84991f185173..3a93d4cdcce9 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -137,18 +137,6 @@ extern void preempt_count_sub(int val);
 #define preempt_count_inc() preempt_count_add(1)
 #define preempt_count_dec() preempt_count_sub(1)
 
-#define preempt_active_enter() \
-do { \
-	preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
-	barrier(); \
-} while (0)
-
-#define preempt_active_exit() \
-do { \
-	barrier(); \
-	preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \
-} while (0)
-
 #ifdef CONFIG_PREEMPT_COUNT
 
 #define preempt_disable() \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78b4bad10081..bd378bd21a0e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2983,7 +2983,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
  *  - return from syscall or exception to user-space
  *  - return from interrupt-handler to user-space
  *
- * WARNING: must be called with preemption disabled!
+ * WARNING: all callers must re-check need_resched() afterward and reschedule
+ * accordingly in case an event triggered the need for rescheduling (such as
+ * an interrupt waking up a task) while preemption was disabled in __schedule().
  */
 static void __sched __schedule(void)
 {
@@ -2992,6 +2994,7 @@ static void __sched __schedule(void)
 	struct rq *rq;
 	int cpu;
 
+	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
 	rcu_note_context_switch();
@@ -3058,6 +3061,8 @@ static void __sched __schedule(void)
 	}
 
 	balance_callback(rq);
+
+	sched_preempt_enable_no_resched();
 }
 
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -3078,9 +3083,7 @@ asmlinkage __visible void __sched schedule(void)
 
 	sched_submit_work(tsk);
 	do {
-		preempt_disable();
 		__schedule();
-		sched_preempt_enable_no_resched();
 	} while (need_resched());
 }
 EXPORT_SYMBOL(schedule);
@@ -3119,14 +3122,15 @@ void __sched schedule_preempt_disabled(void)
 static void __sched notrace preempt_schedule_common(void)
 {
 	do {
-		preempt_active_enter();
+		__preempt_count_add(PREEMPT_ACTIVE);
 		__schedule();
-		preempt_active_exit();
+		__preempt_count_sub(PREEMPT_ACTIVE);
 
 		/*
 		 * Check again in case we missed a preemption opportunity
 		 * between schedule and now.
 		 */
+		barrier();
 	} while (need_resched());
 }
 
@@ -3172,13 +3176,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		return;
 
 	do {
-		/*
-		 * Use raw __prempt_count() ops that don't call function.
-		 * We can't call functions before disabling preemption which
-		 * disarm preemption tracing recursions.
-		 */
-		__preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
-		barrier();
+		__preempt_count_add(PREEMPT_ACTIVE);
 		/*
 		 * Needs preempt disabled in case user_exit() is traced
 		 * and the tracer calls preempt_enable_notrace() causing
@@ -3188,8 +3186,8 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 		__schedule();
 		exception_exit(prev_ctx);
 
+		__preempt_count_sub(PREEMPT_ACTIVE);
 		barrier();
-		__preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET);
 	} while (need_resched());
 }
 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
@@ -3212,11 +3210,17 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 	prev_state = exception_enter();
 
 	do {
-		preempt_active_enter();
+		__preempt_count_add(PREEMPT_ACTIVE);
 		local_irq_enable();
 		__schedule();
 		local_irq_disable();
-		preempt_active_exit();
+		__preempt_count_sub(PREEMPT_ACTIVE);
+
+		/*
+		 * Check again in case we missed a preemption opportunity
+		 * between schedule and now.
+		 */
+		barrier();
 	} while (need_resched());
 
 	exception_exit(prev_state);


[PATCH 1/2] x86/lguest: clean up lguest_setup_irq.

2015-08-03 Thread Rusty Russell
We make it static and hoist it higher in the file for the next patch.
We also give a nice panic if it fails during boot.

Signed-off-by: Rusty Russell 
---
 arch/x86/lguest/boot.c | 43 ++-
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 433e5a7dd37f..f38b7e8a88d2 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -835,6 +835,26 @@ static struct irq_chip lguest_irq_controller = {
.irq_unmask = enable_lguest_irq,
 };
 
+/*
+ * Interrupt descriptors are allocated as-needed, but low-numbered ones are
+ * reserved by the generic x86 code.  So we ignore irq_alloc_desc_at if it
+ * tells us the irq is already used: other errors (ie. ENOMEM) we take
+ * seriously.
+ */
+static int lguest_setup_irq(unsigned int irq)
+{
+   int err;
+
+   /* Returns -ve error or vector number. */
+   err = irq_alloc_desc_at(irq, 0);
+   if (err < 0 && err != -EEXIST)
+   return err;
+
+   irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
+ handle_level_irq, "level");
+   return 0;
+}
+
 static int lguest_enable_irq(struct pci_dev *dev)
 {
u8 line = 0;
@@ -879,26 +899,6 @@ static void __init lguest_init_IRQ(void)
 }
 
 /*
- * Interrupt descriptors are allocated as-needed, but low-numbered ones are
- * reserved by the generic x86 code.  So we ignore irq_alloc_desc_at if it
- * tells us the irq is already used: other errors (ie. ENOMEM) we take
- * seriously.
- */
-int lguest_setup_irq(unsigned int irq)
-{
-   int err;
-
-   /* Returns -ve error or vector number. */
-   err = irq_alloc_desc_at(irq, 0);
-   if (err < 0 && err != -EEXIST)
-   return err;
-
-   irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
- handle_level_irq, "level");
-   return 0;
-}
-
-/*
  * Time.
  *
  * It would be far better for everyone if the Guest had its own clock, but
@@ -1028,7 +1028,8 @@ static void lguest_time_irq(unsigned int irq, struct 
irq_desc *desc)
 static void lguest_time_init(void)
 {
/* Set up the timer interrupt (0) to go to our simple timer routine */
-   lguest_setup_irq(0);
+   if (lguest_setup_irq(0) != 0)
+   panic("Could not set up timer irq");
irq_set_handler(0, lguest_time_irq);
 
clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] x86/lguest: Do not setup unused irq vectors

2015-08-03 Thread Rusty Russell
From: Thomas Gleixner 

No point in assigning the interrupt vectors if there is no interrupt
chip installed. Move it to lguest_setup_irq().

(And call it from lguest_enable_irq).

Signed-off-by: Thomas Gleixner 
Signed-off-by: Rusty Russell  (fixed typo)
Signed-off-by: Rusty Russell 
---
 arch/x86/lguest/boot.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f38b7e8a88d2..2566c97c01c8 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -850,21 +850,29 @@ static int lguest_setup_irq(unsigned int irq)
if (err < 0 && err != -EEXIST)
return err;
 
+   /*
+* Tell the Linux infrastructure that the interrupt is
+* controlled by our level-based lguest interrupt controller.
+*/
irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
  handle_level_irq, "level");
+
+   /* Some systems map "vectors" to interrupts weirdly.  Not us! */
+   __this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq], irq);
return 0;
 }
 
 static int lguest_enable_irq(struct pci_dev *dev)
 {
+   int err;
u8 line = 0;
 
/* We literally use the PCI interrupt line as the irq number. */
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
-   irq_set_chip_and_handler_name(line, &lguest_irq_controller,
- handle_level_irq, "level");
-   dev->irq = line;
-   return 0;
+   err = lguest_setup_irq(line);
+   if (!err)
+   dev->irq = line;
+   return err;
 }
 
 /* We don't do hotplug PCI, so this shouldn't be called. */
@@ -875,17 +883,13 @@ static void lguest_disable_irq(struct pci_dev *dev)
 
 /*
  * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
- * interrupt (except 128, which is used for system calls), and then tells the
- * Linux infrastructure that each interrupt is controlled by our level-based
- * lguest interrupt controller.
+ * interrupt (except 128, which is used for system calls).
  */
 static void __init lguest_init_IRQ(void)
 {
unsigned int i;
 
for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
-   /* Some systems map "vectors" to interrupts weirdly.  Not us! */
-   __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != IA32_SYSCALL_VECTOR)
set_intr_gate(i, irq_entries_start +
8 * (i - FIRST_EXTERNAL_VECTOR));
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 2/7] x86/lguest: Do not setup unused irq vectors

2015-08-03 Thread Rusty Russell
Thomas Gleixner  writes:
> On Mon, 3 Aug 2015, Rusty Russell wrote:
>> Thomas Gleixner  writes:
>> > +
>> > +  /* Some systems map "vectors" to interrupts weirdly.  Not us! */
>> > +  __this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq, irq);
>> 
>> Missing ].
>
> Doh.
>
>> [   17.751889] do_IRQ: 0.33 No irq handler for vector (irq -1)
>> 
>> You broke interrupts :(
>
> Right, because I missed the other place which fiddles with
> interrupts. Does the patch below fix the issue?

Yep.  I added error handling.

I reworked it into two patches: one which staticizes lguest_setup_irq()
and moves it up, the other of which applies your changes.

Will post, you can take them...

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V9 0/5] map GHES memory region according to EFI memory map

2015-08-03 Thread Borislav Petkov
On Mon, Aug 03, 2015 at 05:23:54PM +0100, Matt Fleming wrote:
> Rafael, Boris?

The ghes.c change looks fine I guess. The whole patchset makes sense
now, with the arch bits extracted. So

Acked-by: Borislav Petkov 

However, we probably should work towards adhering to EFI memory
attributes on x86, long term, as we talked. But that's a future thing.

Thanks.

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface

2015-08-03 Thread Borislav Petkov
On Mon, Aug 03, 2015 at 09:07:53PM -0700, Andy Lutomirski wrote:
> Except that, with the new interface, static_key_likely is the other
> way around, right?  If the key is true (i.e. enabled), then it doesn't
> branch.
> 
> I think of the key as a boolean thing that happens to work by code
> patching under the hood.  The fancy patching affects the performance
> but doesn't really make it functionally different from a regular
> variable.  How about making it extra explicit:
> 
> static_key_set(&key, value);
> 
> where value is a bool or maybe even an unsigned int?

Let's have an actual example:

+   if (static_branch_likely(&__use_tsc)) {
+   u64 tsc_now = rdtsc();
+
+   /* return the value in ns */
+   return cycles_2_ns(tsc_now);
+   }

Well, I can see how the likely/unlikely things can confuse. They
actually don't have anything to do with where we will branch to but how
the code will be laid out, AFAICT. So I'm reading this as:

if (use_tsc)) {
RDTSC;
return;
}

and then it is straightforward.

So in this case, the jump will be disabled and we won't branch anywhere.
It actually becomes:

RDTSC;
return;

which can't get any more optimal than it is.

Hmm, yeah, I see how that can be confusing... But the asm is finally
fine. Hey, at least one thing...

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface

2015-08-03 Thread Andy Lutomirski
On Mon, Aug 3, 2015 at 8:37 PM, Borislav Petkov  wrote:
> On Mon, Aug 03, 2015 at 05:57:57PM -0400, Steven Rostedt wrote:
>> That's implementation details, not a general concept that users will
>> need to know about.
>
> Why?
>
> It is a branch, regardless of which insn is used on which arch - it is
> either active and you *branch* to that code or *inactive* and you don't.
> So now it is actually what it should've been from the beginning...

Except that, with the new interface, static_key_likely is the other
way around, right?  If the key is true (i.e. enabled), then it doesn't
branch.

I think of the key as a boolean thing that happens to work by code
patching under the hood.  The fancy patching affects the performance
but doesn't really make it functionally different from a regular
variable.  How about making it extra explicit:

static_key_set(&key, value);

where value is a bool or maybe even an unsigned int?

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [TRIVIAL PATCH] block: Correct misuses of 0x%

2015-08-03 Thread yalin wang

> On Aug 4, 2015, at 10:27, Joe Perches  wrote:
> 
> On Tue, 2015-08-04 at 10:19 +0800, yalin wang wrote:
>> Ping ? 
>>> On Aug 3, 2015, at 16:56, yalin wang  wrote:
>>> 
>>> 
 On Aug 3, 2015, at 16:03, Joe Perches  wrote:
 
 On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote:
>> On Aug 3, 2015, at 04:25, Joe Perches  wrote:
>> 
>> Correct misuse of 0x%d in logging messages.
>> 
 []
> why not use like this : dev_dbg(&h->pdev->dev, "   Max outstanding 
> commands = %#x\n”  ? 
> %#x will add 0x prefix automatically .
 
 It's generally a consistency thing.
 A 0 value would be emitted as 0 and not 0x0.
 
>>> i try on my ubuntu , 
>>> 
>>> static int __init throtl_init(void) 
>>>   
>>> {   
>>>  
>>>printk("module init test: %#x %p\n", 0, (void *)0x123);  
>>> 
>>> return 0;   
>>> 
>>> 
>>> }   
>>> 
>>> 
>>> module_init(throtl_init); 
>>> 
>>> #uname -a
>>> Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 
>>> UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
>>> 
>>> #dmesg
>>> [259356.375586] module init test: 0x0 0123
>>> 
>>> it seems don’t need 0x%x for 0, just need %#x for all numbers.
>>> there are lots of use like this, i can change them if needed:
>>> 
>>> # egrep -r  -i '0x%\d*x'  .  | wc -l 
>>> 11776
> 
> I suggest not, it's not a standard usage and the 0
> may be unexpected.
> 
ok, printk for %x  is really not compatible with glibc printf behavior,
another is %p,  printk print %p as hex but not with 0x prefix, while
printf print %p as 0x…. with prefix, is this need change ?
so we don’t need lots of 0x%p printk .









--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Joe Perches
On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote:
> On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
> > This patch replaces calls to net_dbg_ratelimited when !DEBUG with
> > no_printk, keeping with the idiom of all the other debug print helpers.
> 
> Makes sense, thanks Jason.

Perhaps better still would be to use if (0) no_printk so that
the call and whatever argument calls the net_dbg_ratelimited
makes are completely eliminated.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Joe Perches
On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote:
> The pr_debug family of functions turns into a no-op when -DDEBUG is not
> specified, opting instead to call "no_printk", which gets compiled to a
> no-op (but retains gcc's nice warnings about printf-style arguments).
> 
> The problem with net_dbg_ratelimited is that it is defined to be a
> variant of net_ratelimited_function, which expands to essentially:
> 
> if (net_ratelimit())
> pr_debug(fmt, ...);
> 
> When DEBUG is not defined, then this becomes,
> 
> if (net_ratelimit())
> ;
> 
> This seems benign, except it isn't. Firstly, there's the obvious
> overhead of calling net_ratelimit needlessly, which does quite some book
> keeping for the rate limiting. Given that the pr_debug and
> net_dbg_ratelimited family of functions are sprinkled liberally through
> performance critical code, with developers assuming they'll be compiled
> out to a no-op most of the time, we certainly do not want this needless
> book keeping. Secondly, and most visibly, even though no debug message
> is printed when DEBUG is not defined, if there is a flood of
> invocations, dmesg winds up peppered with messages such as
> "net_ratelimit: 320 callbacks suppressed". This is because our
> aforementioned net_ratelimit() function actually prints this text in
> some circumstances. It's especially odd to see this when there isn't any
> other accompanying debug message.
> 
> So, in sum, it doesn't make sense to have this function's current
> behavior, and instead it should match what every other debug family of
> functions in the kernel does with !DEBUG -- nothing.
> 
> This patch replaces calls to net_dbg_ratelimited when !DEBUG with
> no_printk, keeping with the idiom of all the other debug print helpers.

Makes sense, thanks Jason.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/3] Reverted "selftests: add hugetlbfstest"

2015-08-03 Thread Mike Kravetz

Rebased as suggested by Naoya Horiguch

This manually reverts 7e50533d4b84289e4f01de56d6f98e9c64e2229e

The hugetlbfstest test depends on hugetlb pages being counted
in a task's rss.  This functionality is not in the kernel, so
the test will always fail.  Remove test to avoid confusion.

Signed-off-by: Mike Kravetz 
---
 tools/testing/selftests/vm/Makefile|  1 -
 tools/testing/selftests/vm/hugetlbfstest.c | 86 
--

 tools/testing/selftests/vm/run_vmtests | 11 
 3 files changed, 98 deletions(-)
 delete mode 100644 tools/testing/selftests/vm/hugetlbfstest.c

diff --git a/tools/testing/selftests/vm/Makefile 
b/tools/testing/selftests/vm/Makefile

index 2da6608..bb888c6 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -4,7 +4,6 @@ CFLAGS = -Wall
 BINARIES = compaction_test
 BINARIES += hugepage-mmap
 BINARIES += hugepage-shm
-BINARIES += hugetlbfstest
 BINARIES += map_hugetlb
 BINARIES += mlock2-tests
 BINARIES += on-fault-limit
diff --git a/tools/testing/selftests/vm/hugetlbfstest.c 
b/tools/testing/selftests/vm/hugetlbfstest.c

deleted file mode 100644
index 02e1072..000
--- a/tools/testing/selftests/vm/hugetlbfstest.c
+++ /dev/null
@@ -1,86 +0,0 @@
-#define _GNU_SOURCE
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-typedef unsigned long long u64;
-
-static size_t length = 1 << 24;
-
-static u64 read_rss(void)
-{
-   char buf[4096], *s = buf;
-   int i, fd;
-   u64 rss;
-
-   fd = open("/proc/self/statm", O_RDONLY);
-   assert(fd > 2);
-   memset(buf, 0, sizeof(buf));
-   read(fd, buf, sizeof(buf) - 1);
-   for (i = 0; i < 1; i++)
-   s = strchr(s, ' ') + 1;
-   rss = strtoull(s, NULL, 10);
-   return rss << 12; /* assumes 4k pagesize */
-}
-
-static void do_mmap(int fd, int extra_flags, int unmap)
-{
-   int *p;
-   int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags;
-   u64 before, after;
-   int ret;
-
-   before = read_rss();
-   p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0);
-   assert(p != MAP_FAILED ||
-   !"mmap returned an unexpected error");
-   after = read_rss();
-   assert(llabs(after - before - length) < 0x4 ||
-   !"rss didn't grow as expected");
-   if (!unmap)
-   return;
-   ret = munmap(p, length);
-   assert(!ret || !"munmap returned an unexpected error");
-   after = read_rss();
-   assert(llabs(after - before) < 0x4 ||
-   !"rss didn't shrink as expected");
-}
-
-static int open_file(const char *path)
-{
-   int fd, err;
-
-   unlink(path);
-   fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL
-   | O_LARGEFILE | O_CLOEXEC, 0600);
-   assert(fd > 2);
-   unlink(path);
-   err = ftruncate(fd, length);
-   assert(!err);
-   return fd;
-}
-
-int main(void)
-{
-   int hugefd, fd;
-
-   fd = open_file("/dev/shm/hugetlbhog");
-   hugefd = open_file("/hugepages/hugetlbhog");
-
-   system("echo 100 > /proc/sys/vm/nr_hugepages");
-   do_mmap(-1, MAP_ANONYMOUS, 1);
-   do_mmap(fd, 0, 1);
-   do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1);
-   do_mmap(hugefd, 0, 1);
-   do_mmap(hugefd, MAP_HUGETLB, 1);
-   /* Leak the last one to test do_exit() */
-   do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0);
-   printf("oll korrekt.\n");
-   return 0;
-}
diff --git a/tools/testing/selftests/vm/run_vmtests 
b/tools/testing/selftests/vm/run_vmtests

index 231174a..b7ae2b6 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -76,17 +76,6 @@ else
 fi

 echo ""
-echo "running hugetlbfstest"
-echo ""
-./hugetlbfstest
-if [ $? -ne 0 ]; then
-   echo "[FAIL]"
-   exitcode=1
-else
-   echo "[PASS]"
-fi
-
-echo ""
 echo "running userfaultfd"
 echo ""
 ./userfaultfd 128 32
--
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime

2015-08-03 Thread Borislav Petkov
On Mon, Aug 03, 2015 at 11:45:24AM -0700, Andy Lutomirski wrote:
> P.P.P.S.  Who thought that IRET faults unmasking NMIs made any sense
> whatsoever when NMIs run on an IST stack?  Seriously, people?

What happened with asking Intel for a sane IRET-NG?

Should be relatively easy - take the current IRET microcode, get rid
of the nasty crap, allocate a new opcode and done. Validation should
actually have *less* to do and can reuse all current test cases.

:-)

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/5] x86, gfp: Cache best near node for memory allocation.

2015-08-03 Thread Tang Chen

Hi TJ,

Sorry for the late reply.

On 07/16/2015 05:48 AM, Tejun Heo wrote:

..
so in initialization pharse makes no sense any more. The best near online
node for each cpu should be cached somewhere.
I'm not really following.  Is this because the now offline node can
later come online and we'd have to break the constant mapping
invariant if we update the mapping later?  If so, it'd be nice to
spell that out.


Yes. Will document this in the next version.


..
  
+int get_near_online_node(int node)

+{
+   return per_cpu(x86_cpu_to_near_online_node,
+  cpumask_first(&node_to_cpuid_mask_map[node]));
+}
+EXPORT_SYMBOL(get_near_online_node);

Umm... this function is sitting on a fairly hot path and scanning a
cpumask each time.  Why not just build a numa node -> numa node array?


Indeed. Will avoid to scan a cpumask.


..

  
  static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,

unsigned int order)
  {
-   VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
+   VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+
+#if IS_ENABLED(CONFIG_X86) && IS_ENABLED(CONFIG_NUMA)
+   if (!node_online(nid))
+   nid = get_near_online_node(nid);
+#endif
  
  	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));

  }

Ditto.  Also, what's the synchronization rules for NUMA node
on/offlining.  If you end up updating the mapping later, how would
that be synchronized against the above usages?


I think the near online node map should be updated when node online/offline
happens. But about this, I think the current numa code has a little problem.

As you know, firmware info binds a set of CPUs and memory to a node. But
at boot time, if the node has no memory (a memory-less node) , it won't 
be online.

But the CPUs on that node is available, and bound to the near online node.
(Here, I mean numa_set_node(cpu, node).)

Why does the kernel do this ? I think it is used to ensure that we can 
allocate memory
successfully by calling functions like alloc_pages_node() and 
alloc_pages_exact_node().
By these two fuctions, any CPU should be bound to a node who has memory 
so that

memory allocation can be successful.

That means, for a memory-less node at boot time, CPUs on the node is 
online,

but the node is not online.

That also means, "the node is online" equals to "the node has memory". 
Actually, there

are a lot of code in the kernel is using this rule.


But,
1) in cpu_up(), it will try to online a node, and it doesn't check if 
the node has memory.

2) in try_offline_node(), it offlines CPUs first, and then the memory.

This behavior looks a little wired, or let's say it is ambiguous. It 
seems that a NUMA node
consists of CPUs and memory. So if the CPUs are online, the node should 
be online.


And also,
The main purpose of this patch-set is to make the cpuid <-> nodeid 
mapping persistent.
After this patch-set, alloc_pages_node() and alloc_pages_exact_node() 
won't depend on
cpuid <-> nodeid mapping any more. So the node should be online if the 
CPUs on it are

online. Otherwise, we cannot setup interfaces of CPUs under /sys.


Unfortunately, since I don't have a machine a with memory-less node, I 
cannot reproduce

the problem right now.

How do you think the node online behavior should be changed ?

Thanks.





































--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] kcore:change kcore_read to make sure the kernel read is safe

2015-08-03 Thread yalin wang
This change kcore_read() to use __copy_from_user_inatomic() to
copy data from kernel address, because kern_addr_valid() just make sure
page table is valid during call it, whne it return, the page table may
change, for example, like set_fixmap() function will change kernel page
table, then maybe trigger kernel crash if encounter this unluckily.

Signed-off-by: yalin wang 
---
 fs/proc/kcore.c | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726..b085fde 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -86,8 +86,8 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
size = try;
*nphdr = *nphdr + 1;
}
-   *elf_buflen =   sizeof(struct elfhdr) + 
-   (*nphdr + 2)*sizeof(struct elf_phdr) + 
+   *elf_buflen =   sizeof(struct elfhdr) +
+   (*nphdr + 2)*sizeof(struct elf_phdr) +
3 * ((sizeof(struct elf_note)) +
 roundup(sizeof(CORE_STR), 4)) +
roundup(sizeof(struct elf_prstatus), 4) +
@@ -435,6 +435,7 @@ read_kcore(struct file *file, char __user *buffer, size_t 
buflen, loff_t *fpos)
size_t elf_buflen;
int nphdr;
unsigned long start;
+   unsigned long page = 0;
 
read_lock(&kclist_lock);
size = get_kcore_size(&nphdr, &elf_buflen);
@@ -485,7 +486,7 @@ read_kcore(struct file *file, char __user *buffer, size_t 
buflen, loff_t *fpos)
start = kc_offset_to_vaddr(*fpos - elf_buflen);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
-   
+
while (buflen) {
struct kcore_list *m;
 
@@ -515,15 +516,32 @@ read_kcore(struct file *file, char __user *buffer, size_t 
buflen, loff_t *fpos)
} else {
if (kern_addr_valid(start)) {
unsigned long n;
+   mm_segment_t old_fs = get_fs();
+
+   if (page == 0) {
+   page = __get_free_page(GFP_KERNEL);
+   if (page == 0)
+   return -ENOMEM;
 
-   n = copy_to_user(buffer, (char *)start, tsz);
+   }
+   set_fs(KERNEL_DS);
+   pagefault_disable();
+   n = __copy_from_user_inatomic((void *)page,
+   (__force const void __user *)start,
+   tsz);
+   pagefault_enable();
+   set_fs(old_fs);
+   if (n)
+   memset((void *)page + tsz - n, 0, n);
+
+   n = copy_to_user(buffer, (char *)page, tsz);
/*
 * We cannot distinguish between fault on source
 * and fault on destination. When this happens
 * we clear too and hope it will trigger the
 * EFAULT again.
 */
-   if (n) { 
+   if (n) {
if (clear_user(buffer + tsz - n,
n))
return -EFAULT;
@@ -540,7 +558,7 @@ read_kcore(struct file *file, char __user *buffer, size_t 
buflen, loff_t *fpos)
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
-
+   free_page(page);
return acc;
 }
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface

2015-08-03 Thread Borislav Petkov
On Mon, Aug 03, 2015 at 05:57:57PM -0400, Steven Rostedt wrote:
> That's implementation details, not a general concept that users will
> need to know about.

Why?

It is a branch, regardless of which insn is used on which arch - it is
either active and you *branch* to that code or *inactive* and you don't.
So now it is actually what it should've been from the beginning...

I realize simplifying the terminology around those jump labels/static
branches things comes kinda unnatural now.

-- 
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the target-updates tree with the libata tree

2015-08-03 Thread Stephen Rothwell
Hi Nicholas,

Today's linux-next merge of the target-updates tree got a conflict in:

  drivers/ata/libata-scsi.c

between commit:

  fe16d4f202c5 ("Revert "libata-eh: Set 'information' field for autosense"")

from the libata tree and commit:

  f5a8b3a796db ("scsi: Protect against buffer possible overflow in 
scsi_set_sense_information")

from the target-updates tree.

I fixed it up (the former removed some code that was updated by the
latter, so I just removed the code) and can carry the fix as necessary
(no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the target-updates tree with the libata tree

2015-08-03 Thread Stephen Rothwell
Hi Nicholas,

Today's linux-next merge of the target-updates tree got conflicts in:

  drivers/scsi/scsi_error.c
  include/scsi/scsi_eh.h

between commit:

  fe16d4f202c5 ("Revert "libata-eh: Set 'information' field for autosense"")

from the libata tree and commit:

  7708c1656552 ("scsi: Move sense handling routines to scsi_common")

from the target-updates tree.

I fixed it up (I left scsi_set_sense_information in its new place) and
can carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 1/6] mmc: sdhci-esdhc-imx: add imx7d support and support HS400

2015-08-03 Thread Dong Aisheng
On Wed, Jul 29, 2015 at 05:03:52PM +0800, Haibo Chen wrote:
> The imx7d usdhc is derived from imx6sx, the difference is that
> imx7d support HS400.
> 
> So introduce a new compatible string for imx7d and add HS400
> support for imx7d usdhc.
> 
> Signed-off-by: Haibo Chen 
> ---
>  drivers/mmc/host/sdhci-esdhc-imx.c | 66 
> ++
>  1 file changed, 66 insertions(+)
> 
> diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c 
> b/drivers/mmc/host/sdhci-esdhc-imx.c
> index c6b9f64..b441eed 100644
> --- a/drivers/mmc/host/sdhci-esdhc-imx.c
> +++ b/drivers/mmc/host/sdhci-esdhc-imx.c
> @@ -44,6 +44,7 @@
>  #define  ESDHC_MIX_CTRL_EXE_TUNE (1 << 22)
>  #define  ESDHC_MIX_CTRL_SMPCLK_SEL   (1 << 23)
>  #define  ESDHC_MIX_CTRL_FBCLK_SEL(1 << 25)
> +#define  ESDHC_MIX_CTRL_HS400_EN (1 << 26)
>  /* Bits 3 and 6 are not SDHCI standard definitions */
>  #define  ESDHC_MIX_CTRL_SDHCI_MASK   0xb7
>  /* Tuning bits */
> @@ -60,6 +61,16 @@
>  #define  ESDHC_TUNE_CTRL_MIN 0
>  #define  ESDHC_TUNE_CTRL_MAX ((1 << 7) - 1)
>  
> +/* strobe dll register */
> +#define ESDHC_STROBE_DLL_CTRL0x70
> +#define ESDHC_STROBE_DLL_CTRL_ENABLE (1 << 0)
> +#define ESDHC_STROBE_DLL_CTRL_RESET  (1 << 1)
> +#define ESDHC_STROBE_DLL_CTRL_SLV_DLY_TARGET_SHIFT   3
> +
> +#define ESDHC_STROBE_DLL_STATUS  0x74
> +#define ESDHC_STROBE_DLL_STS_REF_LOCK(1 << 1)
> +#define ESDHC_STROBE_DLL_STS_SLV_LOCK0x1
> +
>  #define ESDHC_TUNING_CTRL0xcc
>  #define ESDHC_STD_TUNING_EN  (1 << 24)
>  /* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
> @@ -120,6 +131,8 @@
>  #define ESDHC_FLAG_ERR004536 BIT(7)
>  /* The IP supports HS200 mode */
>  #define ESDHC_FLAG_HS200 BIT(8)
> +/* The IP supports HS400 mode */
> +#define ESDHC_FLAG_SUP_HS400 BIT(9)
>  
>  struct esdhc_soc_data {
>   u32 flags;
> @@ -156,6 +169,12 @@ static struct esdhc_soc_data usdhc_imx6sx_data = {
>   | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200,
>  };
>  
> +static struct esdhc_soc_data usdhc_imx7d_data = {
> + .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
> + | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
> + | ESDHC_FLAG_SUP_HS400,

Better to use ESDHC_FLAG_HS400 to keep align with exist ESDHC_FLAG_HS200.

Regards
Dong Aisheng
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG

2015-08-03 Thread Jason A. Donenfeld
The pr_debug family of functions turns into a no-op when -DDEBUG is not
specified, opting instead to call "no_printk", which gets compiled to a
no-op (but retains gcc's nice warnings about printf-style arguments).

The problem with net_dbg_ratelimited is that it is defined to be a
variant of net_ratelimited_function, which expands to essentially:

if (net_ratelimit())
pr_debug(fmt, ...);

When DEBUG is not defined, then this becomes,

if (net_ratelimit())
;

This seems benign, except it isn't. Firstly, there's the obvious
overhead of calling net_ratelimit needlessly, which does quite some book
keeping for the rate limiting. Given that the pr_debug and
net_dbg_ratelimited family of functions are sprinkled liberally through
performance critical code, with developers assuming they'll be compiled
out to a no-op most of the time, we certainly do not want this needless
book keeping. Secondly, and most visibly, even though no debug message
is printed when DEBUG is not defined, if there is a flood of
invocations, dmesg winds up peppered with messages such as
"net_ratelimit: 320 callbacks suppressed". This is because our
aforementioned net_ratelimit() function actually prints this text in
some circumstances. It's especially odd to see this when there isn't any
other accompanying debug message.

So, in sum, it doesn't make sense to have this function's current
behavior, and instead it should match what every other debug family of
functions in the kernel does with !DEBUG -- nothing.

This patch replaces calls to net_dbg_ratelimited when !DEBUG with
no_printk, keeping with the idiom of all the other debug print helpers.

Signed-off-by: Jason A. Donenfeld 
---
 include/linux/net.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 04aa068..500fdfe 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -239,8 +239,13 @@ do {   
\
net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
 #define net_info_ratelimited(fmt, ...) \
net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
+#if defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)  \
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
+#else
+#define net_dbg_ratelimited(fmt, ...)  \
+   no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
 
 bool __net_get_random_once(void *buf, int nbytes, bool *done,
   struct static_key *done_key);
-- 
2.4.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 1/7] locking/pvqspinlock: Unconditional PV kick with _Q_SLOW_VAL

2015-08-03 Thread Waiman Long

On 08/01/2015 06:29 PM, Peter Zijlstra wrote:

On Fri, Jul 31, 2015 at 10:21:58PM -0400, Waiman Long wrote:

The smp_store_release() is not a full barrier. In order to avoid missed
wakeup, we may need to add memory barrier around locked and cpu state
variables adding to complexity. As the chance of spurious wakeup is very
low, it is easier and safer to just do an unconditional kick at unlock
time.

Signed-off-by: Waiman Long
---
  kernel/locking/qspinlock_paravirt.h |   11 ---
  1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/locking/qspinlock_paravirt.h 
b/kernel/locking/qspinlock_paravirt.h
index 15d3733..2dd4b39 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -240,7 +240,6 @@ static void pv_wait_head(struct qspinlock *lock, struct 
mcs_spinlock *node)
cpu_relax();
}

-   WRITE_ONCE(pn->state, vcpu_halted);
if (!lp) { /* ONCE */
lp = pv_hash(lock, pn);
/*
@@ -320,9 +319,15 @@ __visible void __pv_queued_spin_unlock(struct qspinlock 
*lock)
/*
 * At this point the memory pointed at by lock can be freed/reused,
 * however we can still use the pv_node to kick the CPU.
+*
+* As smp_store_release() is not a full barrier, adding a check to
+* the node->state doesn't guarantee the checking is really done
+* after clearing the lock byte

This is true, but _WHY_ is that a problem ?

  since they are in 2 separate

+* cachelines and so hardware can reorder them.

That's just gibberish, even in the same cacheline stuff can get
reordered.

 So either we insert

+* memory barrier here and in the corresponding pv_wait_head()
+* function or we do an unconditional kick which is what is done here.

why, why why ? You've added words, but you've not actually described
what the problem is you're trying to fix.

AFAICT the only thing we really care about here is that the load in
question happens _after_ we observe SLOW, and that is still true.

The order against the unlock is irrelevant.

So we set ->state before we hash and before we set SLOW. Given that
we've seen SLOW, we must therefore also see ->state.

If ->state == halted, this means the CPU in question is blocked and the
pv_node will not get re-used -- if it does get re-used, it wasn't
blocked and we don't care either.

Therefore, ->cpu is stable and we'll kick it into action.

How do you end up not waking a waiting cpu? Explain that.



Yes, it is safe in the current code. In some versions of my pvqspinlock 
patch, I was resetting the state back to running in pv_wait_head(). This 
causes race problem.


The current code, however, will not reset the state back to running and 
so the check is redundant. I will clarify that in the next patch.



*/
-   if (READ_ONCE(node->state) == vcpu_halted)
-   pv_kick(node->cpu);
+   pv_kick(node->cpu);
  }

Also, this patch clearly isn't against my tree.



Yes, I was backing against the latest tip tree. As some of the files in 
the patch were modified in the latest tip tree, I will rebase my patch 
and update it.


Please let me know if I should be using your tree instead.

Cheers,
Longman
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] iio: adc: vf610: Add IIO buffer support for Vybrid ADC

2015-08-03 Thread Duan Andy
From: Sanchayan Maity  Sent: Monday, August 03, 2015 
11:10 PM
> To: ji...@kernel.org; linux-...@vger.kernel.org
> Cc: ste...@agner.ch; Duan Fugang-B38611; hof...@osadl.org;
> sanjeev_sha...@mentor.com; Estevam Fabio-R49496; knaac...@gmx.de;
> l...@metafoo.de; pme...@pmeerw.net; antoine.ten...@free-electrons.com;
> linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org;
> Sanchayan Maity
> Subject: [PATCH] iio: adc: vf610: Add IIO buffer support for Vybrid ADC
> 
> This patch adds support for IIO buffer to the Vybrid ADC driver.
> IIO triggered buffer infrastructure along with iio sysfs trigger is used
> to leverage continuous sampling support provided by the ADC block.
> 
> Signed-off-by: Sanchayan Maity 
> ---
>  drivers/iio/adc/Kconfig |   4 ++
>  drivers/iio/adc/vf610_adc.c | 122
> +---
>  2 files changed, 120 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index
> 7c55658..4661241 100644
> --- a/drivers/iio/adc/Kconfig
> +++ b/drivers/iio/adc/Kconfig
> @@ -337,6 +337,10 @@ config TWL6030_GPADC  config VF610_ADC
>   tristate "Freescale vf610 ADC driver"
>   depends on OF
> + select IIO_BUFFER
> + select IIO_TRIGGER
> + select IIO_SYSFS_TRIGGER
> + select IIO_TRIGGERED_BUFFER
>   help
> Say yes here to support for Vybrid board analog-to-digital
> converter.
> Since the IP is used for i.MX6SLX, the driver also support
> i.MX6SLX.
> diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
> index 23b8fb9..af72b9a 100644
> --- a/drivers/iio/adc/vf610_adc.c
> +++ b/drivers/iio/adc/vf610_adc.c
> @@ -34,8 +34,11 @@
>  #include 
> 
>  #include 
> +#include 
>  #include 
> -#include 
> +#include 
> +#include  #include
> +
> 
>  /* This will be the driver name the kernel reports */  #define
> DRIVER_NAME "vf610-adc"
> @@ -170,6 +173,7 @@ struct vf610_adc {
>   u32 sample_freq_avail[5];
> 
>   struct completion completion;
> + u16 *buffer;
>  };
> 
>  static const u32 vf610_hw_avgs[] = { 1, 4, 8, 16, 32 }; @@ -505,12
> +509,22 @@ static const struct iio_chan_spec_ext_info vf610_ext_info[] =
> {
>   .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) |  \
>   BIT(IIO_CHAN_INFO_SAMP_FREQ),   \
>   .ext_info = vf610_ext_info, \
> + .address = (_idx),  \
> + .scan_index = (_idx),   \
> + .scan_type.sign = 'u',  \
> + .scan_type.realbits = 12,   \
> + .scan_type.storagebits = 16,\
>  }
> 
>  #define VF610_ADC_TEMPERATURE_CHAN(_idx, _chan_type) {   \
>   .type = (_chan_type),   \
>   .channel = (_idx),  \
>   .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), \
> + .address = (_idx),  \
> + .scan_index = (_idx),   \
> + .scan_type.sign = 'u',  \
> + .scan_type.realbits = 12,   \
> + .scan_type.storagebits = 16,\
>  }
> 
>  static const struct iio_chan_spec vf610_adc_iio_channels[] = { @@ -531,6
> +545,7 @@ static const struct iio_chan_spec vf610_adc_iio_channels[] = {
>   VF610_ADC_CHAN(14, IIO_VOLTAGE),
>   VF610_ADC_CHAN(15, IIO_VOLTAGE),
>   VF610_ADC_TEMPERATURE_CHAN(26, IIO_TEMP),
> + IIO_CHAN_SOFT_TIMESTAMP(32),
>   /* sentinel */
>  };
> 
> @@ -559,13 +574,21 @@ static int vf610_adc_read_data(struct vf610_adc
> *info)
> 
>  static irqreturn_t vf610_adc_isr(int irq, void *dev_id)  {
> - struct vf610_adc *info = (struct vf610_adc *)dev_id;
> + struct iio_dev *indio_dev = (struct iio_dev *)dev_id;
> + struct vf610_adc *info = iio_priv(indio_dev);
>   int coco;
> 
>   coco = readl(info->regs + VF610_REG_ADC_HS);
>   if (coco & VF610_ADC_HS_COCO0) {
>   info->value = vf610_adc_read_data(info);
> - complete(&info->completion);
> + if (iio_buffer_enabled(indio_dev)) {
> + info->buffer[0] = info->value;
> + writel(0, info->regs + VF610_REG_ADC_HS);
The register is read only. After ADC_Rn is read, the coco bit is cleared.

> + iio_push_to_buffers_with_timestamp(indio_dev,
> + info->buffer, iio_get_time_ns());
> + iio_trigger_notify_done(indio_dev->trig);
> + } else
> + complete(&info->completion);
>   }
> 
>   return IRQ_HANDLED;
> @@ -612,6 +635,9 @@ static int vf610_read_raw(struct iio_dev *indio_dev,
>   switch (mask) {
>   case IIO_CHAN_INFO_RAW:
>   case IIO_CHAN_INFO_PROCESSED:
> + if (iio_buffer_enabled(indio_dev))
> + return -EBUSY;
> +
>   mutex_lock(&indio_dev->mlock);
>   rein

Re: [PATCH 0/3] vm hugetlb selftest cleanup

2015-08-03 Thread Naoya Horiguchi
On Thu, Jul 30, 2015 at 05:59:50PM -0700, Mike Kravetz wrote:
> As a followup to discussions of hugetlbfs fallocate, this provides
> cleanup the vm hugetlb selftests.  Remove hugetlbfstest as it tests
> functionality not present in the kernel.  Emphasize that libhugetlbfs
> test suite should be used for hugetlb regression testing.
> 
> Mike Kravetz (3):
>   Reverted "selftests: add hugetlbfstest"
>   selftests:vm: Point to libhugetlbfs for regression testing
>   Documentation: update libhugetlbfs location and use for testing

It seems that patch 1 conflicts with commit bd67d5c15cc1 ("Test compaction
of mlocked memory"), but the resolution is trivial, so for the series ...

Acked-by: Naoya Horiguchi 

Thanks!--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: hugetlb pages not accounted for in rss

2015-08-03 Thread Naoya Horiguchi
On Wed, Jul 29, 2015 at 04:20:59PM -0700, Mike Kravetz wrote:
> On 07/29/2015 12:08 PM, David Rientjes wrote:
> >On Tue, 28 Jul 2015, Jörn Engel wrote:
> >
> >>Well, we definitely need something.  Having a 100GB process show 3GB of
> >>rss is not very useful.  How would we notice a memory leak if it only
> >>affects hugepages, for example?
> >>
> >
> >Since the hugetlb pool is a global resource, it would also be helpful to
> >determine if a process is mapping more than expected.  You can't do that
> >just by adding a huge rss metric, however: if you have 2MB and 1GB
> >hugepages configured you wouldn't know if a process was mapping 512 2MB
> >hugepages or 1 1GB hugepage.
> >
> >That's the purpose of hugetlb_cgroup, after all, and it supports usage
> >counters for all hstates.  The test could be converted to use that to
> >measure usage if configured in the kernel.
> >
> >Beyond that, I'm not sure how a per-hstate rss metric would be exported to
> >userspace in a clean way and other ways of obtaining the same data are
> >possible with hugetlb_cgroup.  I'm not sure how successful you'd be in
> >arguing that we need separate rss counters for it.
>
> If I want to track hugetlb usage on a per-task basis, do I then need to
> create one cgroup per task?
>
> For example, suppose I have many tasks using hugetlb and the global pool
> is getting low on free pages.  It might be useful to know which tasks are
> using hugetlb pages, and how many they are using.
>
> I don't actually have this need (I think), but it appears to be what
> Jörn is asking for.

One possible way to get hugetlb metric in per-task basis is to walk page
table via /proc/pid/pagemap, and counting page flags for each mapped page
(we can easily do this with tools/vm/page-types.c like "page-types -p 
-b huge"). This is obviously slower than just storing the counter as
in-kernel data and just exporting it, but might be useful in some situation.

Thanks,
Naoya 
HoriguchiN�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�&j:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

[PATCH] ARM64: dts: mt6795: enable basic SMP bringup for MT6795

2015-08-03 Thread Scott Shu
This patch adds support SMP on MediaTek MT6795 Cortex-A53 Octa-core SoC.

The patch is based on v4.2-rc1 and following patch series:
(1) Mars Cheng's "Add mt6795 basic chip support" [1]

[1] https://lkml.org/lkml/2015/7/14/63

Signed-off-by: Scott Shu 
---
 arch/arm64/boot/dts/mediatek/mt6795.dtsi |   13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt6795.dtsi 
b/arch/arm64/boot/dts/mediatek/mt6795.dtsi
index da200e7..c85659d 100644
--- a/arch/arm64/boot/dts/mediatek/mt6795.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt6795.dtsi
@@ -20,6 +20,11 @@
#address-cells = <2>;
#size-cells = <2>;
 
+   psci {
+   compatible = "arm,psci-0.2";
+   method = "smc";
+   };
+
cpus {
#address-cells = <1>;
#size-cells = <0>;
@@ -27,48 +32,56 @@
cpu0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x000>;
};
 
cpu1: cpu@1 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x001>;
};
 
cpu2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x002>;
};
 
cpu3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x003>;
};
 
cpu4: cpu@100 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x100>;
};
 
cpu5: cpu@101 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x101>;
};
 
cpu6: cpu@102 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x102>;
};
 
cpu7: cpu@103 {
device_type = "cpu";
compatible = "arm,cortex-a53";
+   enable-method = "psci";
reg = <0x103>;
};
};
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v5] powerpc/rcpm: add RCPM driver

2015-08-03 Thread Chenhui Zhao



On Tue, Aug 4, 2015 at 4:23 AM, Scott Wood  
wrote:

On Mon, 2015-08-03 at 19:14 +0800, Chenhui Zhao wrote:

 On Sat, Aug 1, 2015 at 8:45 AM, Scott Wood 
 wrote:
 > On Fri, 2015-06-26 at 15:44 +0800,  
Yuantian.Tang@freescale.comwrote:

 > >  +static void rcpm_v1_set_ip_power(bool enable, u32 *mask)
 > >  +{
 > >  + if (enable)
 > >  + setbits32(&rcpm_v1_regs->ippdexpcr, *mask);
 > >  + else
 > >  + clrbits32(&rcpm_v1_regs->ippdexpcr, *mask);
 > >  +}
 > >  +
 > >  +static void rcpm_v2_set_ip_power(bool enable, u32 *mask)
 > >  +{
 > >  + if (enable)
 > >  + setbits32(&rcpm_v2_regs->ippdexpcr[0], *mask);
 > >  + else
 > >  + clrbits32(&rcpm_v2_regs->ippdexpcr[0], *mask);
 > >  +}
 >
 > Why do these take "u32 *mask" instead of "u32 mask"?
 >
 > -Scott

 I think it can be used in the case where there are several mask 
values.


When would that be?

-Scott


So far, only use one register, even though the register name is 
"IPPDEXPCRn" (has "n" suffix) in T4 RM.


OK. Just change the parameter to "u32 mask".

-Chenhui

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [TRIVIAL PATCH] block: Correct misuses of 0x%

2015-08-03 Thread Joe Perches
On Tue, 2015-08-04 at 10:19 +0800, yalin wang wrote:
> Ping ? 
> > On Aug 3, 2015, at 16:56, yalin wang  wrote:
> > 
> > 
> >> On Aug 3, 2015, at 16:03, Joe Perches  wrote:
> >> 
> >> On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote:
>  On Aug 3, 2015, at 04:25, Joe Perches  wrote:
>  
>  Correct misuse of 0x%d in logging messages.
>  
> >> []
> >>> why not use like this : dev_dbg(&h->pdev->dev, "   Max outstanding 
> >>> commands = %#x\n”  ? 
> >>> %#x will add 0x prefix automatically .
> >> 
> >> It's generally a consistency thing.
> >> A 0 value would be emitted as 0 and not 0x0.
> >> 
> > i try on my ubuntu , 
> > 
> > static int __init throtl_init(void) 
> >   
> >  {  
> >   
> > printk("module init test: %#x %p\n", 0, (void *)0x123); 
> >  
> >  return 0;  
> >  
> > 
> >  }  
> >  
> > 
> >  module_init(throtl_init); 
> > 
> > #uname -a
> > Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 
> > UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
> > 
> > #dmesg
> > [259356.375586] module init test: 0x0 0123
> > 
> > it seems don’t need 0x%x for 0, just need %#x for all numbers.
> > there are lots of use like this, i can change them if needed:
> > 
> > # egrep -r  -i '0x%\d*x'  .  | wc -l 
> > 11776

I suggest not, it's not a standard usage and the 0
may be unexpected.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] usb: gadget: f_printer: fix the bug of deadlock caused by nested spinlock

2015-08-03 Thread fupan

On 08/03/2015 10:47 PM, Felipe Balbi wrote:

Hi,

On Mon, Aug 03, 2015 at 07:19:43PM +0800, fupan...@windriver.com wrote:

From: fli 

Function printer_func_disable() has called spinlock on printer_dev->lock,
and it'll call function chain of

 printer_reset_interface()
 |
+---dwc3_gadget_ep_disable()
|
 +---__dwc3_gadget_ep_disable()
 |
 +---dwc3_remove_requests()
 |
 +---dwc3_gadget_giveback()
 |
 +---rx_complete()

in the protected block.

However, rx_complete() in f_printer.c calls spinlock on printer_dev->lock again,
which will cause system hang.

The following steps can reproduce this hang:

1. Build the test program from Documentation/usb/gadget_printer.txt as g_printer
2. Plug in the USB device to a host(such as Ubuntu).
3. on the USB device system run:
#modprobe g_printer.ko
#./g_printer -read_data

4. Unplug the USB device from the host

The system will hang later.

In order to avoid this deadlock, moving the spinlock from 
printer_func_disable() into
printer_reset_interface() and excluding the block of calling 
dwc3_gadget_ep_disable(),
in which the critical resource will be protected by its spinlock in 
rx_complete().

This commit will fix the system hang with the following calltrace:

INFO: rcu_preempt detected stalls on CPUs/tasks: { 3} (detected by 0, t=21006 
jiffies, g=524, c=523, q=2)
sending NMI to all CPUs:
NMI backtrace for cpu 3
CPU: 3 PID: 718 Comm: irq/22-dwc3 Not tainted 3.10.38-ltsi-WR6.0.0.11_standard 
#2
Hardware name: Intel Corp. VALLEYVIEW B3 PLATFORM/NOTEBOOK, BIOS 
BYTICRB1.86C.0092.R32.1410021707 10/02/2014
task: f44f4c20 ti: f40f6000 task.ti: f40f6000
EIP: 0060:[] EFLAGS: 0097 CPU: 3
EIP is at _raw_spin_lock_irqsave+0x35/0x40
EAX: 0076 EBX: f80fad00 ECX: 0076 EDX: 0075
ESI: 0096 EDI: ff94 EBP: f40f7e20 ESP: f40f7e18
  DS: 007b ES: 007b FS: 00d8 GS:  SS: 0068
CR0: 8005003b CR2: b77ac000 CR3: 01c3 CR4: 001007f0
DR0:  DR1:  DR2:  DR3: 
DR6: 0ff0 DR7: 0400
Stack:
  f474a720 f80fad00 f40f7e3c f80f93cc c135d486  f474a720 f468fb00
  f4bea894 f40f7e54 f7e35f19 ff00 f468fb00 f468fb24 0086 f40f7e64
  f7e36577 f468fb00 f4bea810 f40f7e74 f7e365a8 f468fb00 f4bea894 f40f7e9c
Call Trace:
  [] rx_complete+0x1c/0xb0 [g_printer]
  [] ? vsnprintf+0x166/0x390
  [] dwc3_gadget_giveback+0xc9/0xf0 [dwc3]
  [] dwc3_remove_requests+0x57/0x70 [dwc3]
  [] __dwc3_gadget_ep_disable+0x18/0x60 [dwc3]
  [] dwc3_gadget_ep_disable+0x89/0xf0 [dwc3]
  [] printer_reset_interface+0x31/0x50 [g_printer]
  [] printer_func_disable+0x20/0x30 [g_printer]
  [] composite_disconnect+0x4b/0x90 [libcomposite]
  [] dwc3_disconnect_gadget+0x38/0x43 [dwc3]
  [] dwc3_gadget_disconnect_interrupt+0x3e/0x5a [dwc3]
  [] dwc3_thread_interrupt+0x5c8/0x610 [dwc3]
  [] irq_thread_fn+0x18/0x30
  [] irq_thread+0x100/0x130
  [] ? irq_finalize_oneshot.part.29+0xb0/0xb0
  [] ? wake_threads_waitq+0x40/0x40
  [] ? irq_thread_dtor+0xb0/0xb0
  [] kthread+0x94/0xa0
  [] ret_from_kernel_thread+0x1b/0x28
  [] ? kthread_create_on_node+0xc0/0xc0

Signed-off-by: fupan li 

Thanks, out of curiosity, do you plan on sending a glue layer for
Windriver's DWC3 ?

No, just this fix patch.

Fupan


cheers



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [TRIVIAL PATCH] block: Correct misuses of 0x%

2015-08-03 Thread yalin wang
Ping ? 
> On Aug 3, 2015, at 16:56, yalin wang  wrote:
> 
> 
>> On Aug 3, 2015, at 16:03, Joe Perches  wrote:
>> 
>> On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote:
 On Aug 3, 2015, at 04:25, Joe Perches  wrote:
 
 Correct misuse of 0x%d in logging messages.
 
>> []
>>> why not use like this : dev_dbg(&h->pdev->dev, "   Max outstanding 
>>> commands = %#x\n”  ? 
>>> %#x will add 0x prefix automatically .
>> 
>> It's generally a consistency thing.
>> A 0 value would be emitted as 0 and not 0x0.
>> 
> i try on my ubuntu , 
> 
> static int __init throtl_init(void)   
> 
>  {
> 
> printk("module init test: %#x %p\n", 0, (void *)0x123);   
>
>  return 0;
>
> 
>  }
>
> 
>  module_init(throtl_init); 
> 
> #uname -a
> Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 UTC 
> 2015 x86_64 x86_64 x86_64 GNU/Linux
> 
> #dmesg
> [259356.375586] module init test: 0x0 0123
> 
> it seems don’t need 0x%x for 0, just need %#x for all numbers.
> there are lots of use like this, i can change them if needed:
> 
> # egrep -r  -i '0x%\d*x'  .  | wc -l 
> 11776
> 
> 
> Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 00/15] trivial: Drop unlikely before IS_ERR(_OR_NULL)

2015-08-03 Thread Viresh Kumar
On 03-08-15, 17:38, Steven Rostedt wrote:
> On Fri, 31 Jul 2015 13:23:10 +0300
> "Kirill A. Shutemov"  wrote:
> > We have two cases in code:
> > 
> > drivers/rtc/rtc-gemini.c:   if (likely(IS_ERR(rtc->rtc_dev)))
> > drivers/staging/lustre/lustre/obdclass/lu_object.c: if 
> > (likely(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT)) {
> > 
> > The first one is mistake, I think. Or do we expect rtc_device_register()
> > to fail?
> > 
> > The second is redundant. "if (PTR_ERR(shadow) == -ENOENT)" should do the
> > job.
> > 
> 
> Yep, those look like bugs to me.

Yeah, I have fixed both of them :)

-- 
viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/8] watchdog: watchdog_dev: Use single variable name for struct watchdog_device

2015-08-03 Thread Guenter Roeck
The current code uses 'wdd', wddev', and 'watchdog' as variable names
for struct watchdog_device. This is confusing and makes it difficult
to enhance the code. Replace it all with 'wdd'.

Cc: Timo Kokkonen 
Cc: Uwe Kleine-König 
Signed-off-by: Guenter Roeck 
---
 drivers/watchdog/watchdog_dev.c | 151 
 1 file changed, 75 insertions(+), 76 deletions(-)

diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 6aaefbad303e..06171c73daf5 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -51,7 +51,7 @@ static struct watchdog_device *old_wdd;
 
 /*
  * watchdog_ping: ping the watchdog.
- * @wddev: the watchdog device to ping
+ * @wdd: the watchdog device to ping
  *
  * If the watchdog has no own ping operation then it needs to be
  * restarted via the start operation. This wrapper function does
@@ -59,65 +59,65 @@ static struct watchdog_device *old_wdd;
  * We only ping when the watchdog device is running.
  */
 
-static int watchdog_ping(struct watchdog_device *wddev)
+static int watchdog_ping(struct watchdog_device *wdd)
 {
int err = 0;
 
-   mutex_lock(&wddev->lock);
+   mutex_lock(&wdd->lock);
 
-   if (test_bit(WDOG_UNREGISTERED, &wddev->status)) {
+   if (test_bit(WDOG_UNREGISTERED, &wdd->status)) {
err = -ENODEV;
goto out_ping;
}
 
-   if (!watchdog_active(wddev))
+   if (!watchdog_active(wdd))
goto out_ping;
 
-   if (wddev->ops->ping)
-   err = wddev->ops->ping(wddev);  /* ping the watchdog */
+   if (wdd->ops->ping)
+   err = wdd->ops->ping(wdd);  /* ping the watchdog */
else
-   err = wddev->ops->start(wddev); /* restart watchdog */
+   err = wdd->ops->start(wdd); /* restart watchdog */
 
 out_ping:
-   mutex_unlock(&wddev->lock);
+   mutex_unlock(&wdd->lock);
return err;
 }
 
 /*
  * watchdog_start: wrapper to start the watchdog.
- * @wddev: the watchdog device to start
+ * @wdd: the watchdog device to start
  *
  * Start the watchdog if it is not active and mark it active.
  * This function returns zero on success or a negative errno code for
  * failure.
  */
 
-static int watchdog_start(struct watchdog_device *wddev)
+static int watchdog_start(struct watchdog_device *wdd)
 {
int err = 0;
 
-   mutex_lock(&wddev->lock);
+   mutex_lock(&wdd->lock);
 
-   if (test_bit(WDOG_UNREGISTERED, &wddev->status)) {
+   if (test_bit(WDOG_UNREGISTERED, &wdd->status)) {
err = -ENODEV;
goto out_start;
}
 
-   if (watchdog_active(wddev))
+   if (watchdog_active(wdd))
goto out_start;
 
-   err = wddev->ops->start(wddev);
+   err = wdd->ops->start(wdd);
if (err == 0)
-   set_bit(WDOG_ACTIVE, &wddev->status);
+   set_bit(WDOG_ACTIVE, &wdd->status);
 
 out_start:
-   mutex_unlock(&wddev->lock);
+   mutex_unlock(&wdd->lock);
return err;
 }
 
 /*
  * watchdog_stop: wrapper to stop the watchdog.
- * @wddev: the watchdog device to stop
+ * @wdd: the watchdog device to stop
  *
  * Stop the watchdog if it is still active and unmark it active.
  * This function returns zero on success or a negative errno code for
@@ -125,155 +125,154 @@ out_start:
  * If the 'nowayout' feature was set, the watchdog cannot be stopped.
  */
 
-static int watchdog_stop(struct watchdog_device *wddev)
+static int watchdog_stop(struct watchdog_device *wdd)
 {
int err = 0;
 
-   mutex_lock(&wddev->lock);
+   mutex_lock(&wdd->lock);
 
-   if (test_bit(WDOG_UNREGISTERED, &wddev->status)) {
+   if (test_bit(WDOG_UNREGISTERED, &wdd->status)) {
err = -ENODEV;
goto out_stop;
}
 
-   if (!watchdog_active(wddev))
+   if (!watchdog_active(wdd))
goto out_stop;
 
-   if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) {
-   dev_info(wddev->dev, "nowayout prevents watchdog being 
stopped!\n");
+   if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) {
+   dev_info(wdd->dev, "nowayout prevents watchdog being 
stopped!\n");
err = -EBUSY;
goto out_stop;
}
 
-   err = wddev->ops->stop(wddev);
+   err = wdd->ops->stop(wdd);
if (err == 0)
-   clear_bit(WDOG_ACTIVE, &wddev->status);
+   clear_bit(WDOG_ACTIVE, &wdd->status);
 
 out_stop:
-   mutex_unlock(&wddev->lock);
+   mutex_unlock(&wdd->lock);
return err;
 }
 
 /*
  * watchdog_get_status: wrapper to get the watchdog status
- * @wddev: the watchdog device to get the status from
+ * @wdd: the watchdog device to get the status from
  * @status: the status of the watchdog device
  *
  * Get the watchdog's stat

[PATCH 3/8] watchdog: Introduce WDOG_RUNNING flag

2015-08-03 Thread Guenter Roeck
The WDOG_RUNNING flag is expected to be set by watchdog drivers if
the hardware watchdog is running. If the flag is set, the watchdog
subsystem will ping the watchdog even if the watchdog device is closed.

The watchdog driver stop function is now optional and may be omitted
if the watchdog can not be stopped. If stopping the watchdog is not
possible but the driver implements a stop function, it is responsible
to set the WDOG_RUNNING flag in its stop function.

Cc: Timo Kokkonen 
Cc: Uwe Kleine-König 
Signed-off-by: Guenter Roeck 
---
 Documentation/watchdog/watchdog-kernel-api.txt | 19 -
 drivers/watchdog/watchdog_core.c   |  2 +-
 drivers/watchdog/watchdog_dev.c| 39 --
 include/linux/watchdog.h   |  7 +
 4 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt 
b/Documentation/watchdog/watchdog-kernel-api.txt
index 5fa085276874..7fda3c86cf46 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -144,17 +144,18 @@ are:
   device.
   The routine needs a pointer to the watchdog timer device structure as a
   parameter. It returns zero on success or a negative errno code for failure.
-* stop: with this routine the watchdog timer device is being stopped.
-  The routine needs a pointer to the watchdog timer device structure as a
-  parameter. It returns zero on success or a negative errno code for failure.
-  Some watchdog timer hardware can only be started and not be stopped. The
-  driver supporting this hardware needs to make sure that a start and stop
-  routine is being provided. This can be done by using a timer in the driver
-  that regularly sends a keepalive ping to the watchdog timer hardware.
 
 Not all watchdog timer hardware supports the same functionality. That's why
 all other routines/operations are optional. They only need to be provided if
 they are supported. These optional routines/operations are:
+* stop: with this routine the watchdog timer device is being stopped.
+  The routine needs a pointer to the watchdog timer device structure as a
+  parameter. It returns zero on success or a negative errno code for failure.
+  Some watchdog timer hardware can only be started and not be stopped. A
+  driver supporting such hardware does not have to implement the stop routine.
+  If a driver has no stop function, the watchdog core will set WDOG_RUNNING and
+  start calling the driver's keepalive pings function after the watchdog device
+  is closed.
 * ping: this is the routine that sends a keepalive ping to the watchdog timer
   hardware.
   The routine needs a pointer to the watchdog timer device structure as a
@@ -206,6 +207,10 @@ bit-operations. The status bits that are defined are:
   any watchdog_ops, so that you can be sure that no operations (other then
   unref) will get called after unregister, even if userspace still holds a
   reference to /dev/watchdog
+* WDOG_RUNNING: Set by the watchdog driver if the hardware watchdog is running.
+  The bit must be set if the watchdog timer hardware can not be stopped;
+  otherwise it is optional. If set, the watchdog driver core will send
+  keepalive pings to the watchdog hardware while the watchdog device is closed.
 
   To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
   timer device) you can either:
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 1a8059455413..b38d1b7ae10e 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -145,7 +145,7 @@ static int __watchdog_register_device(struct 
watchdog_device *wdd)
return -EINVAL;
 
/* Mandatory operations need to be supported */
-   if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
+   if (!wdd->ops->start)
return -EINVAL;
 
watchdog_check_min_max_timeout(wdd);
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 25849c1d6dc1..e0fbc4ac9bb7 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -58,8 +58,9 @@ static inline bool watchdog_need_worker(struct 
watchdog_device *wdd)
unsigned int hm = wdd->max_hw_timeout_ms;
unsigned int m = wdd->max_timeout * 1000;
 
-   return watchdog_active(wdd) && hm && hm != m &&
-   wdd->timeout * 500 > hm;
+   return (watchdog_active(wdd) && hm && hm != m &&
+   wdd->timeout * 500 > hm) ||
+  (!watchdog_active(wdd) && watchdog_running(wdd));
 }
 
 static inline void watchdog_update_worker(struct watchdog_device *wdd,
@@ -87,7 +88,7 @@ static int _watchdog_ping(struct watchdog_device *wdd)
if (test_bit(WDOG_UNREGISTERED, &wdd->status))
return -ENODEV;
 
-   if (!watchdog_active(wdd))
+   if (!watchdog_active(wdd) && !watchdog_running(wdd))
retu

[PATCH 8/8] watchdog: at91sam9: Convert to use infrastructure triggered keepalives

2015-08-03 Thread Guenter Roeck
The watchdog infrastructure now supports handling watchdog keepalive
if the watchdog is running while the watchdog device is closed.
The infrastructure now also supports generating additional heartbeats
if the maximum hardware timeout is smaller than or close to the
configured timeout. Convert the driver to use this
infrastructure.

Signed-off-by: Guenter Roeck 
---
 drivers/watchdog/at91sam9_wdt.c | 102 +---
 1 file changed, 11 insertions(+), 91 deletions(-)

diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index e4698f7c5f93..0de39b52962c 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -48,8 +47,8 @@
  * use this to convert a watchdog
  * value from/to milliseconds.
  */
-#define ticks_to_hz_rounddown(t)   t) + 1) * HZ) >> 8)
-#define ticks_to_hz_roundup(t) (t) + 1) * HZ) + 255) >> 8)
+#define ticks_to_ms_rounddown(t)   t) + 1) * 1000) >> 8)
+#define ticks_to_ms_roundup(t) (t) + 1) * 1000) + 255) >> 8)
 #define ticks_to_secs(t)   (((t) + 1) >> 8)
 #define secs_to_ticks(s)   ((s) ? (((s) << 8) - 1) : 0)
 
@@ -64,9 +63,6 @@
 /* Hardware timeout in seconds */
 #define WDT_HW_TIMEOUT 2
 
-/* Timer heartbeat (500ms) */
-#define WDT_TIMEOUT(HZ/2)
-
 /* User land timeout */
 #define WDT_HEARTBEAT 15
 static int heartbeat;
@@ -83,11 +79,8 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once 
started "
 struct at91wdt {
struct watchdog_device wdd;
void __iomem *base;
-   unsigned long next_heartbeat;   /* the next_heartbeat for the timer */
-   struct timer_list timer;/* The timer that pings the watchdog */
u32 mr;
u32 mr_mask;
-   unsigned long heartbeat;/* WDT heartbeat in jiffies */
bool nowayout;
unsigned int irq;
 };
@@ -107,47 +100,13 @@ static irqreturn_t wdt_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
 }
 
-/*
- * Reload the watchdog timer.  (ie, pat the watchdog)
- */
-static inline void at91_wdt_reset(struct at91wdt *wdt)
-{
-   wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
-}
-
-/*
- * Timer tick
- */
-static void at91_ping(unsigned long data)
-{
-   struct at91wdt *wdt = (struct at91wdt *)data;
-   if (time_before(jiffies, wdt->next_heartbeat) ||
-   !watchdog_active(&wdt->wdd)) {
-   at91_wdt_reset(wdt);
-   mod_timer(&wdt->timer, jiffies + wdt->heartbeat);
-   } else {
-   pr_crit("I will reset your machine !\n");
-   }
-}
-
 static int at91_wdt_start(struct watchdog_device *wdd)
 {
struct at91wdt *wdt = to_wdt(wdd);
-   /* calculate when the next userspace timeout will be */
-   wdt->next_heartbeat = jiffies + wdd->timeout * HZ;
-   return 0;
-}
 
-static int at91_wdt_stop(struct watchdog_device *wdd)
-{
-   /* The watchdog timer hardware can not be stopped... */
-   return 0;
-}
+   wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
 
-static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int 
new_timeout)
-{
-   wdd->timeout = new_timeout;
-   return at91_wdt_start(wdd);
+   return 0;
 }
 
 static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt)
@@ -157,8 +116,8 @@ static int at91_wdt_init(struct platform_device *pdev, 
struct at91wdt *wdt)
u32 value;
int err;
u32 mask = wdt->mr_mask;
-   unsigned long min_heartbeat = 1;
-   unsigned long max_heartbeat;
+   unsigned int min_timeout = jiffies_to_msecs(1);
+   unsigned int hw_timeout;
struct device *dev = &pdev->dev;
 
tmp = wdt_read(wdt, AT91_WDT_MR);
@@ -180,31 +139,15 @@ static int at91_wdt_init(struct platform_device *pdev, 
struct at91wdt *wdt)
delta = (tmp & AT91_WDT_WDD) >> 16;
 
if (delta < value)
-   min_heartbeat = ticks_to_hz_roundup(value - delta);
+   min_timeout = ticks_to_ms_roundup(value - delta);
 
-   max_heartbeat = ticks_to_hz_rounddown(value);
-   if (!max_heartbeat) {
+   hw_timeout = ticks_to_ms_rounddown(value);
+   if (hw_timeout < min_timeout * 2) {
dev_err(dev,
"heartbeat is too small for the system to handle it 
correctly\n");
return -EINVAL;
}
-
-   /*
-* Try to reset the watchdog counter 4 or 2 times more often than
-* actually requested, to avoid spurious watchdog reset.
-* If this is not possible because of the min_heartbeat value, reset
-* it at the min_heartbeat period.
-*/
-   if ((max_heartbeat / 4) >= min_heartbeat)
-   wdt->heartbeat = max_heartbeat / 4;
-   else if ((max_heartbeat / 2) >= min_heartbeat)
-   wdt->heartbeat = max_heartb

[PATCH 4/8] watchdog: Make set_timeout function optional

2015-08-03 Thread Guenter Roeck
For some watchdogs, the hardware timeout is fixed, and the
watchdog driver depends on the watchdog core to handle the
actual timeout. In this situation, the watchdog driver might
only set the 'timeout' variable but do nothing else.
This can as well be handled by the infrastructure, so make
the set_timeout callback optional. If WDIOF_SETTIMEOUT is
configured but the .set_timeout callback is not available,
update the timeout variable in the infrastructure code.

Signed-off-by: Guenter Roeck 
---
 Documentation/watchdog/watchdog-kernel-api.txt | 4 
 drivers/watchdog/watchdog_dev.c| 9 ++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt 
b/Documentation/watchdog/watchdog-kernel-api.txt
index 7fda3c86cf46..2f1a4ad7e565 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -178,6 +178,10 @@ they are supported. These optional routines/operations are:
   because the watchdog does not necessarily has a 1 second resolution).
   (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
   watchdog's info structure).
+  If the watchdog driver does not have to perform any action but setting the
+  timeout value of the watchdog_device, this callback can be omitted.
+  If set_timeout is not provided but WDIOF_SETTIMEOUT is set, the watchdog
+  infrastructure updates the timeout value of the watchdog_device internally.
 * get_timeleft: this routines returns the time that's left before a reset.
 * ref: the operation that calls kref_get on the kref of a dynamically
   allocated watchdog_device struct.
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index e0fbc4ac9bb7..73bae196a081 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -260,9 +260,9 @@ out_status:
 static int watchdog_set_timeout(struct watchdog_device *wdd,
unsigned int timeout)
 {
-   int err;
+   int err = 0;
 
-   if (!wdd->ops->set_timeout || !(wdd->info->options & WDIOF_SETTIMEOUT))
+   if (!(wdd->info->options & WDIOF_SETTIMEOUT))
return -EOPNOTSUPP;
 
if (watchdog_timeout_invalid(wdd, timeout))
@@ -275,7 +275,10 @@ static int watchdog_set_timeout(struct watchdog_device 
*wdd,
goto out_timeout;
}
 
-   err = wdd->ops->set_timeout(wdd, timeout);
+   if (wdd->ops->set_timeout)
+   err = wdd->ops->set_timeout(wdd, timeout);
+   else
+   wdd->timeout = timeout;
 
watchdog_update_worker(wdd, true, false);
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/8] watchdog: imx2: Convert to use infrastructure triggered keepalives

2015-08-03 Thread Guenter Roeck
The watchdog infrastructure now supports handling watchdog keepalive
if the watchdog is running while the watchdog device is closed.
Convert the driver to use this infrastructure.

Signed-off-by: Guenter Roeck 
---
 drivers/watchdog/imx2_wdt.c | 72 -
 1 file changed, 12 insertions(+), 60 deletions(-)

diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 0bb1a1d1b170..66feef254661 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -25,7 +25,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -34,7 +33,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 
 #define DRIVER_NAME "imx2-wdt"
@@ -62,7 +60,6 @@
 struct imx2_wdt_device {
struct clk *clk;
struct regmap *regmap;
-   struct timer_list timer;/* Pings the watchdog when closed */
struct watchdog_device wdog;
struct notifier_block restart_handler;
 };
@@ -151,16 +148,6 @@ static int imx2_wdt_ping(struct watchdog_device *wdog)
return 0;
 }
 
-static void imx2_wdt_timer_ping(unsigned long arg)
-{
-   struct watchdog_device *wdog = (struct watchdog_device *)arg;
-   struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
-
-   /* ping it every wdog->timeout / 2 seconds to prevent reboot */
-   imx2_wdt_ping(wdog);
-   mod_timer(&wdev->timer, jiffies + wdog->timeout * HZ / 2);
-}
-
 static int imx2_wdt_set_timeout(struct watchdog_device *wdog,
unsigned int new_timeout)
 {
@@ -177,40 +164,19 @@ static int imx2_wdt_start(struct watchdog_device *wdog)
 {
struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
 
-   if (imx2_wdt_is_running(wdev)) {
-   /* delete the timer that pings the watchdog after close */
-   del_timer_sync(&wdev->timer);
+   if (imx2_wdt_is_running(wdev))
imx2_wdt_set_timeout(wdog, wdog->timeout);
-   } else
+   else
imx2_wdt_setup(wdog);
 
-   return imx2_wdt_ping(wdog);
-}
-
-static int imx2_wdt_stop(struct watchdog_device *wdog)
-{
-   /*
-* We don't need a clk_disable, it cannot be disabled once started.
-* We use a timer to ping the watchdog while /dev/watchdog is closed
-*/
-   imx2_wdt_timer_ping((unsigned long)wdog);
-   return 0;
-}
-
-static inline void imx2_wdt_ping_if_active(struct watchdog_device *wdog)
-{
-   struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+   set_bit(WDOG_RUNNING, &wdog->status);
 
-   if (imx2_wdt_is_running(wdev)) {
-   imx2_wdt_set_timeout(wdog, wdog->timeout);
-   imx2_wdt_timer_ping((unsigned long)wdog);
-   }
+   return imx2_wdt_ping(wdog);
 }
 
 static const struct watchdog_ops imx2_wdt_ops = {
.owner = THIS_MODULE,
.start = imx2_wdt_start,
-   .stop = imx2_wdt_stop,
.ping = imx2_wdt_ping,
.set_timeout = imx2_wdt_set_timeout,
 };
@@ -277,9 +243,10 @@ static int __init imx2_wdt_probe(struct platform_device 
*pdev)
watchdog_set_nowayout(wdog, nowayout);
watchdog_init_timeout(wdog, timeout, &pdev->dev);
 
-   setup_timer(&wdev->timer, imx2_wdt_timer_ping, (unsigned long)wdog);
-
-   imx2_wdt_ping_if_active(wdog);
+   if (imx2_wdt_is_running(wdev)) {
+   imx2_wdt_set_timeout(wdog, wdog->timeout);
+   set_bit(WDOG_RUNNING, &wdog->status);
+   }
 
/*
 * Disable the watchdog power down counter at boot. Otherwise the power
@@ -320,7 +287,6 @@ static int __exit imx2_wdt_remove(struct platform_device 
*pdev)
watchdog_unregister_device(wdog);
 
if (imx2_wdt_is_running(wdev)) {
-   del_timer_sync(&wdev->timer);
imx2_wdt_ping(wdog);
dev_crit(&pdev->dev, "Device removed: Expect reboot!\n");
}
@@ -334,10 +300,9 @@ static void imx2_wdt_shutdown(struct platform_device *pdev)
 
if (imx2_wdt_is_running(wdev)) {
/*
-* We are running, we need to delete the timer but will
-* give max timeout before reboot will take place
+* We are running, configure max timeout before reboot
+* will take place.
 */
-   del_timer_sync(&wdev->timer);
imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
imx2_wdt_ping(wdog);
dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n");
@@ -355,10 +320,6 @@ static int imx2_wdt_suspend(struct device *dev)
if (imx2_wdt_is_running(wdev)) {
imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
imx2_wdt_ping(wdog);
-
-   /* The watchdog is not active */
-   if (!watchdog_active(wdog))
-   del_timer_sync(&wdev->timer);
}
 
clk_disable_unprepare(wdev->clk);
@@ -384,19 +345,1

[PATCH 7/8] watchdog: gpio_wdt: Convert to use infrastructure triggered keepalives

2015-08-03 Thread Guenter Roeck
The watchdog infrastructure now supports handling watchdog keepalive
if the watchdog is running while the watchdog device is closed.
The infrastructure now also supports generating additional heartbeats
if the maximum hardware timeout is smaller than or close to the
configured timeout. Convert the driver to use this infrastructure.

Signed-off-by: Guenter Roeck 
---
 drivers/watchdog/gpio_wdt.c | 65 -
 1 file changed, 11 insertions(+), 54 deletions(-)

diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 1687cc2d7122..cbbdae440bfa 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -32,12 +32,8 @@ struct gpio_wdt_priv {
boolactive_low;
boolstate;
boolalways_running;
-   boolarmed;
unsigned inthw_algo;
-   unsigned inthw_margin;
-   unsigned long   last_jiffies;
struct notifier_block   notifier;
-   struct timer_list   timer;
struct watchdog_device  wdd;
 };
 
@@ -50,20 +46,12 @@ static void gpio_wdt_disable(struct gpio_wdt_priv *priv)
gpio_direction_input(priv->gpio);
 }
 
-static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv)
-{
-   priv->state = priv->active_low;
-   gpio_direction_output(priv->gpio, priv->state);
-   priv->last_jiffies = jiffies;
-   mod_timer(&priv->timer, priv->last_jiffies + priv->hw_margin);
-}
-
 static int gpio_wdt_start(struct watchdog_device *wdd)
 {
struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-   gpio_wdt_start_impl(priv);
-   priv->armed = true;
+   priv->state = priv->active_low;
+   gpio_direction_output(priv->gpio, priv->state);
 
return 0;
 }
@@ -72,10 +60,9 @@ static int gpio_wdt_stop(struct watchdog_device *wdd)
 {
struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-   priv->armed = false;
if (!priv->always_running) {
-   mod_timer(&priv->timer, 0);
gpio_wdt_disable(priv);
+   clear_bit(WDOG_RUNNING, &priv->wdd.status);
}
 
return 0;
@@ -85,32 +72,6 @@ static int gpio_wdt_ping(struct watchdog_device *wdd)
 {
struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-   priv->last_jiffies = jiffies;
-
-   return 0;
-}
-
-static int gpio_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
-{
-   wdd->timeout = t;
-
-   return gpio_wdt_ping(wdd);
-}
-
-static void gpio_wdt_hwping(unsigned long data)
-{
-   struct watchdog_device *wdd = (struct watchdog_device *)data;
-   struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
-
-   if (priv->armed && time_after(jiffies, priv->last_jiffies +
- msecs_to_jiffies(wdd->timeout * 1000))) {
-   dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n");
-   return;
-   }
-
-   /* Restart timer */
-   mod_timer(&priv->timer, jiffies + priv->hw_margin);
-
switch (priv->hw_algo) {
case HW_ALGO_TOGGLE:
/* Toggle output pin */
@@ -124,6 +85,8 @@ static void gpio_wdt_hwping(unsigned long data)
gpio_set_value_cansleep(priv->gpio, priv->active_low);
break;
}
+
+   return 0;
 }
 
 static int gpio_wdt_notify_sys(struct notifier_block *nb, unsigned long code,
@@ -132,12 +95,10 @@ static int gpio_wdt_notify_sys(struct notifier_block *nb, 
unsigned long code,
struct gpio_wdt_priv *priv = container_of(nb, struct gpio_wdt_priv,
  notifier);
 
-   mod_timer(&priv->timer, 0);
-
switch (code) {
case SYS_HALT:
case SYS_POWER_OFF:
-   gpio_wdt_disable(priv);
+   gpio_wdt_stop(&priv->wdd);
break;
default:
break;
@@ -157,7 +118,6 @@ static const struct watchdog_ops gpio_wdt_ops = {
.start  = gpio_wdt_start,
.stop   = gpio_wdt_stop,
.ping   = gpio_wdt_ping,
-   .set_timeout= gpio_wdt_set_timeout,
 };
 
 static int gpio_wdt_probe(struct platform_device *pdev)
@@ -205,9 +165,6 @@ static int gpio_wdt_probe(struct platform_device *pdev)
if (hw_margin < 2 || hw_margin > 65535)
return -EINVAL;
 
-   /* Use safe value (1/2 of real timeout) */
-   priv->hw_margin = msecs_to_jiffies(hw_margin / 2);
-
priv->always_running = of_property_read_bool(pdev->dev.of_node,
 "always-running");
 
@@ -217,11 +174,15 @@ static int gpio_wdt_probe(struct platform_device *pdev)
priv->wdd.ops   = &gpio_wdt_ops;
priv->wdd.min_timeout   = SOFT_TIMEOUT_MIN;
priv->wdd.max_timeout   = SOFT_TIMEOUT_MAX;
+   priv->wdd.max_hw_timeout_m

[PATCH 6/8] watchdog: retu: Convert to use infrastructure triggered keepalives

2015-08-03 Thread Guenter Roeck
The watchdog infrastructure now supports handling watchdog keepalive
if the watchdog is running while the watchdog device is closed.
Convert the driver to use this infrastructure.

Signed-off-by: Guenter Roeck 
---
 drivers/watchdog/retu_wdt.c | 78 -
 1 file changed, 7 insertions(+), 71 deletions(-)

diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c
index b7c68e275aeb..ce2982a7670c 100644
--- a/drivers/watchdog/retu_wdt.c
+++ b/drivers/watchdog/retu_wdt.c
@@ -28,69 +28,22 @@
 /* Watchdog timer values in seconds */
 #define RETU_WDT_MAX_TIMER 63
 
-struct retu_wdt_dev {
-   struct retu_dev *rdev;
-   struct device   *dev;
-   struct delayed_work ping_work;
-};
-
-/*
- * Since Retu watchdog cannot be disabled in hardware, we must kick it
- * with a timer until userspace watchdog software takes over. If
- * CONFIG_WATCHDOG_NOWAYOUT is set, we never start the feeding.
- */
-static void retu_wdt_ping_enable(struct retu_wdt_dev *wdev)
-{
-   retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER);
-   schedule_delayed_work(&wdev->ping_work,
-   round_jiffies_relative(RETU_WDT_MAX_TIMER * HZ / 2));
-}
-
-static void retu_wdt_ping_disable(struct retu_wdt_dev *wdev)
-{
-   retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER);
-   cancel_delayed_work_sync(&wdev->ping_work);
-}
-
-static void retu_wdt_ping_work(struct work_struct *work)
-{
-   struct retu_wdt_dev *wdev = container_of(to_delayed_work(work),
-   struct retu_wdt_dev, ping_work);
-   retu_wdt_ping_enable(wdev);
-}
-
 static int retu_wdt_start(struct watchdog_device *wdog)
 {
-   struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+   struct retu_dev *rdev = watchdog_get_drvdata(wdog);
 
-   retu_wdt_ping_disable(wdev);
+   set_bit(WDOG_RUNNING, &wdog->status);
 
-   return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
-}
-
-static int retu_wdt_stop(struct watchdog_device *wdog)
-{
-   struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
-
-   retu_wdt_ping_enable(wdev);
-
-   return 0;
-}
-
-static int retu_wdt_ping(struct watchdog_device *wdog)
-{
-   struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
-
-   return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
+   return retu_write(rdev, RETU_REG_WATCHDOG, wdog->timeout);
 }
 
 static int retu_wdt_set_timeout(struct watchdog_device *wdog,
unsigned int timeout)
 {
-   struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+   struct retu_dev *rdev = watchdog_get_drvdata(wdog);
 
wdog->timeout = timeout;
-   return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout);
+   return retu_write(rdev, RETU_REG_WATCHDOG, wdog->timeout);
 }
 
 static const struct watchdog_info retu_wdt_info = {
@@ -101,8 +54,6 @@ static const struct watchdog_info retu_wdt_info = {
 static const struct watchdog_ops retu_wdt_ops = {
.owner  = THIS_MODULE,
.start  = retu_wdt_start,
-   .stop   = retu_wdt_stop,
-   .ping   = retu_wdt_ping,
.set_timeout= retu_wdt_set_timeout,
 };
 
@@ -111,39 +62,26 @@ static int retu_wdt_probe(struct platform_device *pdev)
struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent);
bool nowayout = WATCHDOG_NOWAYOUT;
struct watchdog_device *retu_wdt;
-   struct retu_wdt_dev *wdev;
int ret;
 
retu_wdt = devm_kzalloc(&pdev->dev, sizeof(*retu_wdt), GFP_KERNEL);
if (!retu_wdt)
return -ENOMEM;
 
-   wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL);
-   if (!wdev)
-   return -ENOMEM;
-
retu_wdt->info  = &retu_wdt_info;
retu_wdt->ops   = &retu_wdt_ops;
retu_wdt->timeout   = RETU_WDT_MAX_TIMER;
retu_wdt->min_timeout   = 0;
retu_wdt->max_timeout   = RETU_WDT_MAX_TIMER;
 
-   watchdog_set_drvdata(retu_wdt, wdev);
+   watchdog_set_drvdata(retu_wdt, rdev);
watchdog_set_nowayout(retu_wdt, nowayout);
 
-   wdev->rdev  = rdev;
-   wdev->dev   = &pdev->dev;
-
-   INIT_DELAYED_WORK(&wdev->ping_work, retu_wdt_ping_work);
-
ret = watchdog_register_device(retu_wdt);
if (ret < 0)
return ret;
 
-   if (nowayout)
-   retu_wdt_ping(retu_wdt);
-   else
-   retu_wdt_ping_enable(wdev);
+   retu_wdt_start(retu_wdt);
 
platform_set_drvdata(pdev, retu_wdt);
 
@@ -153,10 +91,8 @@ static int retu_wdt_probe(struct platform_device *pdev)
 static int retu_wdt_remove(struct platform_device *pdev)
 {
struct watchdog_device *wdog = platform_get_drvdata(pdev);
-   struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog);
 
watchdog_unregi

[PATCH 2/8] watchdog: Introduce hardware maximum timeout in watchdog core

2015-08-03 Thread Guenter Roeck
Introduce an optional hardware maximum timeout in the watchdog core.
The hardware maximum timeout can be lower than the maximum timeout.

Drivers can set the maximum hardare timeout value in the watchdog data
structure. If the configured timeout exceeds half the value of the
maximum hardware timeout, the watchdog core enables a timer function
to assist sending keepalive requests to the watchdog driver.

Cc: Timo Kokkonen 
Cc: Uwe Kleine-König 
Signed-off-by: Guenter Roeck 
---
 Documentation/watchdog/watchdog-kernel-api.txt |  14 +++
 drivers/watchdog/watchdog_dev.c| 121 +
 include/linux/watchdog.h   |  21 -
 3 files changed, 135 insertions(+), 21 deletions(-)

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt 
b/Documentation/watchdog/watchdog-kernel-api.txt
index d8b0d3367706..5fa085276874 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -53,9 +53,12 @@ struct watchdog_device {
unsigned int timeout;
unsigned int min_timeout;
unsigned int max_timeout;
+   unsigned int max_hw_timeout_ms;
+   unsigned long last_keepalive;
void *driver_data;
struct mutex lock;
unsigned long status;
+   struct delayed_work work;
struct list_head deferred;
 };
 
@@ -73,8 +76,18 @@ It contains following fields:
   additional information about the watchdog timer itself. (Like it's unique 
name)
 * ops: a pointer to the list of watchdog operations that the watchdog supports.
 * timeout: the watchdog timer's timeout value (in seconds).
+  This is the time after which the system will reboot if user space does
+  not send a heartbeat request if the watchdog device is opened.
+  This may or may not be the hardware watchdog timeout. See max_hw_timeout_ms
+  for more details.
 * min_timeout: the watchdog timer's minimum timeout value (in seconds).
 * max_timeout: the watchdog timer's maximum timeout value (in seconds).
+* max_hw_timeout_ms: Maximum hardware timeout, in milli-seconds. May differ
+  from max_timeout. If set, the infrastructure will send a heartbeat to the
+  watchdog driver if 'timeout' is larger than 'max_hw_timeout / 2',
+  unless user space failed to ping the watchdog for 'timeout' seconds.
+* last_keepalive: Time of most recent keepalive triggered from user space,
+  in jiffies.
 * bootstatus: status of the device after booting (reported with watchdog
   WDIOF_* status bits).
 * driver_data: a pointer to the drivers private data of a watchdog device.
@@ -85,6 +98,7 @@ It contains following fields:
   information about the status of the device (Like: is the watchdog timer
   running/active, is the nowayout bit set, is the device opened via
   the /dev/watchdog interface or not, ...).
+* work: Worker data structure for WatchDog Timer Driver Core internal use only.
 * deferred: entry in wtd_deferred_reg_list which is used to
   register early initialized watchdogs.
 
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 06171c73daf5..25849c1d6dc1 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -37,7 +37,9 @@
 #include/* For the -ENODEV/... values */
 #include   /* For printk/panic/... */
 #include   /* For file operations */
+#include  /* For timeout functions */
 #include /* For watchdog specific items */
+#include/* For workqueue */
 #include   /* For handling misc devices */
 #include /* For __init/__exit/... */
 #include  /* For copy_to_user/put_user/... */
@@ -49,6 +51,53 @@ static dev_t watchdog_devt;
 /* the watchdog device behind /dev/watchdog */
 static struct watchdog_device *old_wdd;
 
+static struct workqueue_struct *watchdog_wq;
+
+static inline bool watchdog_need_worker(struct watchdog_device *wdd)
+{
+   unsigned int hm = wdd->max_hw_timeout_ms;
+   unsigned int m = wdd->max_timeout * 1000;
+
+   return watchdog_active(wdd) && hm && hm != m &&
+   wdd->timeout * 500 > hm;
+}
+
+static inline void watchdog_update_worker(struct watchdog_device *wdd,
+ bool cancel, bool sync)
+{
+   if (watchdog_need_worker(wdd)) {
+   unsigned int t = wdd->timeout * 1000;
+
+   if (wdd->max_hw_timeout_ms && t > wdd->max_hw_timeout_ms)
+   t = wdd->max_hw_timeout_ms;
+   queue_delayed_work(watchdog_wq, &wdd->work,
+  msecs_to_jiffies(t / 2));
+   } else if (cancel) {
+   if (sync)
+   cancel_delayed_work_sync(&wdd->work);
+   else
+   cancel_delayed_work(&wdd->work);
+   }
+}
+
+static int _watchdog_ping(struct watchdog_device *wdd)
+{
+   int err;
+
+   if (test_bit(WDOG_UNREGISTERED, &wdd->status))
+   return -ENODEV;
+
+   if (!watchdog_active(wdd))
+

[PATCH 0/8] watchdog: Add support for keepalives triggered by infrastructure

2015-08-03 Thread Guenter Roeck
The watchdog infrastructure is currently purely passive, meaning
it only passes information from user space to drivers and vice versa.

Since watchdog hardware tends to have its own quirks, this can result
in quite complex watchdog drivers. A number of scanarios are especially common.

- A watchdog is always active and can not be disabled, or can not be disabled
  once enabled. To support such hardware, watchdog drivers have to implement
  their own timers and use those timers to trigger watchdog keepalives while
  the watchdog device is not or not yet opened.
- A variant of this is the desire to enable a watchdog as soon as its driver
  has been instantiated, to protect the system while it is still booting up,
  but the watchdog daemon is not yet running.
- Some watchdogs have a very short maximum timeout, in the range of just a few
  seconds. Such low timeouts are difficult if not impossible to support from
  user space. Drivers supporting such watchdog hardware need to implement
  a timer function to augment heartbeats from user space.

This patch set solves the above problems while keeping changes to the
watchdog core minimal.

- A new status flag, WDOG_RUNNING, informs the watchdog subsystem that a
  watchdog is running, and that the watchdog subsystem needs to generate
  heartbeat requests while the associated watchdog device is closed.
- A new parameter in the watchdog data structure, max_hw_timeout_ms, informs
  the watchdog subsystem about a maximum hardware timeout. The watchdog
  subsystem uses this information together with the configured timeout
  and the maximum permitted timeout to determine if it needs to generate
  additional heartbeat requests.

Patch #1 is a preparatory patch.

Patch #2 adds timer functionality to the watchdog core. It solves the problem
of short maximum hardware timeouts by augmenting heartbeats triggered from
user space with internally triggered heartbeats.

Patch #3 adds functionality to generate heartbeats while the watchdog device is
closed. It handles situation where where the watchdog is running after
the driver has been instantiated, but the device is not yet opened,
and post-close situations necessary if a watchdog can not be stopped.

Patch #4 makes the set_timeout function optional. This is now possible since
timeout changes can now be completely handled in the watchdog core, for
example if the hardware watchdog timeout is fixed.

Patch #5 to #8 are example conversions of some watchdog drivers.
Those patches will require testing.

This patch set does not solve all limitations of the watchdog subsystem.
Specifically, it does not add support for the following features.

- It is desirable to be able to specify a maximum early timeout,
  from booting the system to opening the watchdog device.
- Some watchdogs may require a minimum period of time between
  heartbeats. Examples are DA9062 and possibly AT91SAM9x.

This and other features will be adddessed with subsequent patches.

The patch set is inspired by an earlier patch set from Timo Kokonnen.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] staging/lustre: Properly reference kthread_run instead of cfs_daemonize

2015-08-03 Thread green
From: Oleg Drokin 

cfs_daemonize is long gone and replaced by a proper call to kthread_run,
so update the comment to reflect that fact.

Signed-off-by: Oleg Drokin 
---
 drivers/staging/lustre/lustre/include/lustre_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h 
b/drivers/staging/lustre/lustre/include/lustre_net.h
index 77a7de9..3bb2f8b 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -2183,7 +2183,7 @@ struct ptlrpcd_ctl {
 */
struct ptlrpc_request_set  *pc_set;
/**
-* Thread name used in cfs_daemonize()
+* Thread name used in kthread_run()
 */
charpc_name[16];
/**
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] staging/lustre/ptlrpc: Remove stray cfs_daemonize comment

2015-08-03 Thread green
From: Oleg Drokin 

Ever since daemonize was removed in 3.18, there are no longer
any flags passed to kthread_run.
Most of the comments were deleted, but this one lingered on
until now.

Signed-off-by: Oleg Drokin 
---
 drivers/staging/lustre/lustre/ptlrpc/pinger.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c 
b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index d05c37c..f8edb79 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -318,8 +318,6 @@ int ptlrpc_start_pinger(void)
 
strcpy(pinger_thread.t_name, "ll_ping");
 
-   /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
-* just drop the VM and FILES in cfs_daemonize_ctxt() right away. */
rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread,
 "%s", pinger_thread.t_name));
if (IS_ERR_VALUE(rc)) {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] Lustre: remove cfs_daemonize from comments

2015-08-03 Thread green
From: Oleg Drokin 

cfs_daemonize was removed long ago, but I just stumbled upon
a couple of instances where it was still referenced in the comments,
so here are the patches to clean it up and not cause any unnecessary
confusion.

Oleg Drokin (2):
  staging/lustre/ptlrpc: Remove stray daemonize comment
  staging/lustre: Properly reference kthread_run instead of
cfs_daemonize

 drivers/staging/lustre/lustre/include/lustre_net.h | 2 +-
 drivers/staging/lustre/lustre/ptlrpc/pinger.c  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks

2015-08-03 Thread hl

Hi Stephen,

On 04/08/15 09:14, Stephen Boyd wrote:

On 08/03/2015 06:03 PM, Lin Huang wrote:

From: huang lin 

pclk_pd_pmu needs to keep running and with the upcoming gpio clock
handling this is not always the case anymore. So add it to the list
of critical clocks for now.

Signed-off-by: Heiko Stuebner 
Signed-off-by: Lin Huang 


From: says huang lin, first signed-off-by is Heiko Stuebner, and final 
signed-off-by is Lin Huang... who actually authored this patch? Is Lin 
Huang the same person as huang lin ?


This patch writed by Heiko and upload by me, i am sorry confuse you with 
my name, yes, Lin Huang and huang lin is the same person, it is all me.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V3 request from stable 3.10 to 3.14] efi: fix 32bit kernel boot failed problem using efi

2015-08-03 Thread fupan.li
From: Fupan Li 

Commit 35d5134b7d5a
("x86/efi: Correct EFI boot stub use of code32_start")
imported a bug, which will cause 32bit kernel boot failed
using efi method. It should use the label's address instead
of the value stored in the label to caculate the address of
code32_start.

Signed-off-by: Fupan Li 
---
 arch/x86/boot/compressed/head_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/head_32.S 
b/arch/x86/boot/compressed/head_32.S
index abb988a..3b28eff 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -54,7 +54,7 @@ ENTRY(efi_pe_entry)
callreloc
 reloc:
popl%ecx
-   sublreloc, %ecx
+   subl$reloc, %ecx
movl%ecx, BP_code32_start(%eax)
 
sub $0x4, %esp
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] serial: don't announce CIR serial ports

2015-08-03 Thread Peter Hurley
Hi Maciej,

On 08/02/2015 05:09 PM, Maciej S. Szmigiero wrote:
> CIR type serial ports aren't real serial ports.
> This is just a way to prevent legacy serial driver
> from probing and eventually binding some resources
> so don't announce them like normal serial ports.

I'd like to keep some form of reporting so that we know the
port was properly probed; what about extending uart_report_port()
to including CIR + disabled status?

Secondly, good catch! Because we should not be trying to
register a console on this port, nor driving modem signals.

So maybe an early exit after uart_report_port?

Regards,
Peter Hurley

> Signed-off-by: Maciej Szmigiero 
> ---
>  drivers/tty/serial/serial_core.c |2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/tty/serial/serial_core.c 
> b/drivers/tty/serial/serial_core.c
> index f368520..99f944d 100644
> --- a/drivers/tty/serial/serial_core.c
> +++ b/drivers/tty/serial/serial_core.c
> @@ -2237,7 +2237,7 @@ uart_configure_port(struct uart_driver *drv, struct 
> uart_state *state,
>   port->ops->config_port(port, flags);
>   }
>  
> - if (port->type != PORT_UNKNOWN) {
> + if (port->type != PORT_UNKNOWN && port->type != PORT_8250_CIR) {
>   unsigned long flags;
>  
>   uart_report_port(drv, port);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 01/11] user_ns: 3 new LSM hooks for user namespace operations

2015-08-03 Thread Kees Cook
On Mon, Aug 3, 2015 at 4:34 AM, Lukasz Pawelczyk
 wrote:
> On pią, 2015-07-31 at 22:48 -0500, Serge E. Hallyn wrote:
>> On Fri, Jul 31, 2015 at 11:28:56AM +0200, Lukasz Pawelczyk wrote:
>> > On czw, 2015-07-30 at 16:30 -0500, Serge E. Hallyn wrote:
>> > > On Fri, Jul 24, 2015 at 12:04:35PM +0200, Lukasz Pawelczyk wrote:
>> > > > @@ -969,6 +982,7 @@ static int userns_install(struct nsproxy
>> > > > *nsproxy, struct ns_common *ns)
>> > > >  {
>> > > > struct user_namespace *user_ns = to_user_ns(ns);
>> > > > struct cred *cred;
>> > > > +   int err;
>> > > >
>> > > > /* Don't allow gaining capabilities by reentering
>> > > >  * the same user namespace.
>> > > > @@ -986,6 +1000,10 @@ static int userns_install(struct nsproxy
>> > > > *nsproxy, struct ns_common *ns)
>> > > > if (!ns_capable(user_ns, CAP_SYS_ADMIN))
>> > > > return -EPERM;
>> > > >
>> > > > +   err = security_userns_setns(nsproxy, user_ns);
>> > > > +   if (err)
>> > > > +   return err;
>> > >
>> > > So at this point the LSM thinks current is in the new ns.  If
>> > > prepare_creds() fails below, should it be informed of that?
>> > > (Or am I over-thinking this?)
>> > >
>> > > > +
>> > > > cred = prepare_creds();
>> > > > if (!cred)
>> > > > return -ENOMEM;
>> >
>> > Hmm, the use case for this hook I had in mind was just to allow or
>> > disallow the operation based on the information passed in
>> > arguments.
>> > Not to register the current in any way so LSM can think it is or
>> > isn't
>> > in the new namespace.
>> >
>> > I think that any other LSM check that would like to know in what
>> > namespace the current is, would just check that from current's
>> > creds.
>> > Not use some stale and duplicated information the above hook could
>> > have
>> > registered.
>> >
>> > I see no reason for this hook to change the LSM state, only to
>> > answer
>> > the question: allowed/disallowed (eventually return an error cause
>> > it
>> > is unable to give an answer which falls into the disallow
>> > category).
>>
>> How about renaming it "security_userns_may_setns()" for clarity?
>
> I personally have nothing against it. However looking at already
> existing hooks only one of them has "may" in the name (unix_may_send)
> while a lot clearly have exactly this purpose (e.g. most of inode_*
> family, some from file_* and task_*). So it seems the trend is against
> it.
>
> What do you think? Anyone else has an opinion?

Personally, I prefer that hooks be named as closely to their caller,
or calling context, as possible. In this case, it seems like "may" is
implied. It's an LSM like all the others, so it can fail, which would
cause the caller to fail too, so "may" tends to be implicit. I would
leave it as-is, but I could be convinced otherwise.

-Kees

-- 
Kees Cook
Chrome OS Security
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime

2015-08-03 Thread Kees Cook
On Mon, Aug 3, 2015 at 4:19 PM, Willy Tarreau  wrote:
> On Mon, Aug 03, 2015 at 03:35:15PM -0700, Kees Cook wrote:
>> Yay for perm disable! Thank you! :)
>
> Andy would like to see this evolve towards something possibly
> more complete and/or generic. I think this needs more thoughts
> and that we should possibly stick to 0/1 for now and decide how
> we want to make this evolve later to cover permanent disable,
> various ABIs, etc...
>
> What do you think ?

That's probably the best way forward. I still think a generic syscall
disabling feature would be nice. :) I won't have time to work on it
for a little while, though.

-Kees

-- 
Kees Cook
Chrome OS Security
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 2/7] cpufreq: opp: fix handling of turbo modes

2015-08-03 Thread Krzysztof Kozlowski
On 30.07.2015 23:37, Kukjin Kim wrote:
> On 07/27/15 20:47, Bartlomiej Zolnierkiewicz wrote:
>> On Monday, July 27, 2015 05:06:41 PM Viresh Kumar wrote:
>>> On 27-07-15, 13:14, Bartlomiej Zolnierkiewicz wrote:
 Sorry but you don't seem to understand the issue.
>>>
>>> :)
>>>
>>> No, I did. I understand that if someone uses opp bindings today with
>>> some entries as turbo OPPs, cpufreq will use them as normal
>>> frequencies. And that may harm the board.
>>>
>>> BUT, opp-v2 code isn't ready to be used yet. And platforms should see
>>> what all is implemented before trying to use them.
>>
>> OK.
>>
>>> All I was saying is, this isn't a FIX as we haven't introduced the
>>> feature yet. Otherwise I had no issues with the patch.
>>
>> I will update the description for the next patchset revision.
>>
> Hi Bart,
> 
> When will you re-post v3? Because I have a plan to send a pull-request
> to arm-soc until this weekend...

Dear Kukjin,

We are already at 4.2-rc5 and you did not send the pull request before
the weekend as you said. It is really late and there is no special
reason for delaying the request. What happened?

Best regards,
Krzysztof

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the security tree with Linus' tree

2015-08-03 Thread Stephen Rothwell
Hi James,

Today's linux-next merge of the security tree got a conflict in:

  security/yama/yama_lsm.c

between commit:

  5413fcdbe9e7 ("Adding YAMA hooks also when YAMA is not stacked.")

from Linus' tree and commit:

  730daa164e7c ("Yama: remove needless CONFIG_SECURITY_YAMA_STACKED")

from the security tree.

I fixed it up (the latter removed the code updated by the former, so I
just did that) and can carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks

2015-08-03 Thread Stephen Boyd

On 08/03/2015 06:03 PM, Lin Huang wrote:

From: huang lin 

pclk_pd_pmu needs to keep running and with the upcoming gpio clock
handling this is not always the case anymore. So add it to the list
of critical clocks for now.

Signed-off-by: Heiko Stuebner 
Signed-off-by: Lin Huang 


From: says huang lin, first signed-off-by is Heiko Stuebner, and final 
signed-off-by is Lin Huang... who actually authored this patch? Is Lin 
Huang the same person as huang lin ?


--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm: add the block to the tail of the list in expand()

2015-08-03 Thread Xishi Qiu
On 2015/8/3 12:10, Dave Hansen wrote:

> On 08/02/2015 07:05 PM, Xishi Qiu wrote:
 Also, this might not do very much good in practice.  If you are
 splitting a high-order page, you are doing the split because the
 lower-order lists are empty.  So won't that list_add() be to an empty
>>
>> I made a mistake, you are right, all the lower-order lists are empty,
>> so it is no sense to add to the tail.
> 
> I actually tested this experimentally and the lists are not always
> empty.  It's probably __rmqueue_smallest() vs. __rmqueue_fallback() logic.
> 
> In any case, you might want to double-check.
> 

Hi Dave,

How did you do the experiment?

Thanks,
Xishi Qiu

> .
> 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: cgroup/loop Bad page state oops in Linux v4.2-rc3-136-g45b4b782e848

2015-08-03 Thread Josh Boyer
On Mon, Aug 3, 2015 at 12:56 PM, Josh Boyer  wrote:
> On Mon, Aug 3, 2015 at 10:28 AM, Mike Snitzer  wrote:
>> On Sun, Aug 02 2015 at 10:01P -0400,
>> Josh Boyer  wrote:
>>
>>> On Fri, Jul 31, 2015 at 2:58 PM, Josh Boyer  
>>> wrote:
>>> > On Thu, Jul 30, 2015 at 8:19 PM, Mike Snitzer  wrote:
>>> >>
>>> >> The only commit that looks even remotely related (given 32bit concerns)
>>> >> would be 1c220c69ce0dcc0f234a9f263ad9c0864f971852
>>> >
>>> > Confirmed.  I built kernels for our tester that started with the
>>> > working snapshot and applied the patches above one at a time.  The
>>> > failing patch was the commit you suspected.
>>> >
>>> > I can try and build a 4.2-rc4 kernel with that reverted, but it would
>>> > be good if someone could start thinking about how that could cause
>>> > this issue.
>>>
>>> A revert on top of 4.2-rc4 booted.  So this is currently causing
>>> issues with upstream as well.
>>
>> Hi Josh,
>>
>> I've staged the following fix in linux-next (for 4.2-rc6 inclusion):
>> https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=76270d574acc897178a5c8be0bd2a743a77e4bac
>>
>> Can you please verify that it works for your 32bit testcase against
>> 4.2-rc4 (or rc5)?
>
> Sure, I'll get a kernel with this included spun up and ask Adam to test.

Adam tested this with success.  If you're still collecting patch
metadata, adding:

Tested-by: Adam Williamson 

would be appreciated.

josh

>> From: Mike Snitzer 
>> Date: Mon, 3 Aug 2015 09:54:58 -0400
>> Subject: [PATCH] dm: fix dm_merge_bvec regression on 32 bit systems
>>
>> A DM regression on 32 bit systems was reported against v4.2-rc3 here:
>> https://lkml.org/lkml/2015/7/29/401
>>
>> Fix this by reverting both commit 1c220c69 ("dm: fix casting bug in
>> dm_merge_bvec()") and 148e51ba ("dm: improve documentation and code
>> clarity in dm_merge_bvec").  This combined revert is done to eliminate
>> the possibility of a partial revert in stable@ kernels.
>>
>> In hindsight the correct fix, at the time 1c220c69 was applied to fix
>> the regression that 148e51ba introduced, should've been to simply revert
>> 148e51ba.
>>
>> Reported-by: Josh Boyer 
>> Acked-by: Joe Thornber 
>> Signed-off-by: Mike Snitzer 
>> Cc: sta...@vger.kernel.org # 3.19+
>> ---
>>  drivers/md/dm.c | 27 ++-
>>  1 file changed, 10 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
>> index ab37ae1..0d7ab20 100644
>> --- a/drivers/md/dm.c
>> +++ b/drivers/md/dm.c
>> @@ -1729,7 +1729,8 @@ static int dm_merge_bvec(struct request_queue *q,
>> struct mapped_device *md = q->queuedata;
>> struct dm_table *map = dm_get_live_table_fast(md);
>> struct dm_target *ti;
>> -   sector_t max_sectors, max_size = 0;
>> +   sector_t max_sectors;
>> +   int max_size = 0;
>>
>> if (unlikely(!map))
>> goto out;
>> @@ -1742,18 +1743,10 @@ static int dm_merge_bvec(struct request_queue *q,
>>  * Find maximum amount of I/O that won't need splitting
>>  */
>> max_sectors = min(max_io_len(bvm->bi_sector, ti),
>> - (sector_t) queue_max_sectors(q));
>> + (sector_t) BIO_MAX_SECTORS);
>> max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
>> -
>> -   /*
>> -* FIXME: this stop-gap fix _must_ be cleaned up (by passing a 
>> sector_t
>> -* to the targets' merge function since it holds sectors not bytes).
>> -* Just doing this as an interim fix for stable@ because the more
>> -* comprehensive cleanup of switching to sector_t will impact every
>> -* DM target that implements a ->merge hook.
>> -*/
>> -   if (max_size > INT_MAX)
>> -   max_size = INT_MAX;
>> +   if (max_size < 0)
>> +   max_size = 0;
>>
>> /*
>>  * merge_bvec_fn() returns number of bytes
>> @@ -1761,13 +1754,13 @@ static int dm_merge_bvec(struct request_queue *q,
>>  * max is precomputed maximal io size
>>  */
>> if (max_size && ti->type->merge)
>> -   max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
>> +   max_size = ti->type->merge(ti, bvm, biovec, max_size);
>> /*
>>  * If the target doesn't support merge method and some of the devices
>> -* provided their merge_bvec method (we know this by looking for the
>> -* max_hw_sectors that dm_set_device_limits may set), then we can't
>> -* allow bios with multiple vector entries.  So always set max_size
>> -* to 0, and the code below allows just one page.
>> +* provided their merge_bvec method (we know this by looking at
>> +* queue_max_hw_sectors), then we can't allow bios with multiple 
>> vector
>> +* entries.  So always set max_size to 0, and the code below allows
>> +* just one page.
>>  */
>> else if (queue_ma

Re: [PATCH] x86: Clean up files of Intel Processor Trace

2015-08-03 Thread Takao Indoh
On 2015/08/03 20:03, Borislav Petkov wrote:
> On Mon, Aug 03, 2015 at 11:08:07AM +0200, Peter Zijlstra wrote:
>> For those of us suffering OCDs and all, its a good change though. The
>> alfabet song does go: A, B, C, D etc.. after all. Not: A, C, D, B ...
> 
> ... except that x86 encoding orders regs like it was originally: AX,
> CX, DX, BX, ... Don't ask me why - looks like someone thought that the
> C (count) and D (double precision - AX extension) registers were more
> important than B (base).
> 
> Or someone was simply illiterate.
> 

I thought this was typo. If it is intentional, I'll keep it intact.

Thanks,
Takao Indoh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 2/2] pinctrl: rockchip: only enable gpio clock when it setting

2015-08-03 Thread Lin Huang
From: huang lin 

gpio can keep state even the clock disable, for save power
consumption, only enable gpio clock when it setting

Signed-off-by: Heiko Stuebner 
Signed-off-by: Lin Huang 
---
Changes in v2:
Advices by Douglas Anderson
-use readl_relaxed() instead readl()
-fix commit message format error

 drivers/pinctrl/pinctrl-rockchip.c | 57 +++---
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-rockchip.c 
b/drivers/pinctrl/pinctrl-rockchip.c
index cc2843a..70a4539 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -945,6 +945,7 @@ static int _rockchip_pmx_gpio_set_direction(struct 
gpio_chip *chip,
if (ret < 0)
return ret;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -953,9 +954,11 @@ static int _rockchip_pmx_gpio_set_direction(struct 
gpio_chip *chip,
data |= BIT(pin);
else
data &= ~BIT(pin);
+
writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR);
 
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 
return 0;
 }
@@ -1389,6 +1392,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, 
unsigned offset, int value)
unsigned long flags;
u32 data;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl(reg);
@@ -1398,6 +1402,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, 
unsigned offset, int value)
writel(data, reg);
 
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 }
 
 /*
@@ -1409,7 +1414,9 @@ static int rockchip_gpio_get(struct gpio_chip *gc, 
unsigned offset)
struct rockchip_pin_bank *bank = gc_to_pin_bank(gc);
u32 data;
 
+   clk_enable(bank->clk);
data = readl(bank->reg_base + GPIO_EXT_PORT);
+   clk_disable(bank->clk);
data >>= offset;
data &= 1;
return data;
@@ -1546,6 +1553,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
if (ret < 0)
return ret;
 
+   clk_enable(bank->clk);
spin_lock_irqsave(&bank->slock, flags);
 
data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR);
@@ -1603,6 +1611,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
default:
irq_gc_unlock(gc);
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
return -EINVAL;
}
 
@@ -1611,6 +1620,7 @@ static int rockchip_irq_set_type(struct irq_data *d, 
unsigned int type)
 
irq_gc_unlock(gc);
spin_unlock_irqrestore(&bank->slock, flags);
+   clk_disable(bank->clk);
 
return 0;
 }
@@ -1620,8 +1630,10 @@ static void rockchip_irq_suspend(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct rockchip_pin_bank *bank = gc->private;
 
+   clk_enable(bank->clk);
bank->saved_masks = irq_reg_readl(gc, GPIO_INTMASK);
irq_reg_writel(gc, ~gc->wake_active, GPIO_INTMASK);
+   clk_disable(bank->clk);
 }
 
 static void rockchip_irq_resume(struct irq_data *d)
@@ -1629,7 +1641,27 @@ static void rockchip_irq_resume(struct irq_data *d)
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
struct rockchip_pin_bank *bank = gc->private;
 
+   clk_enable(bank->clk);
irq_reg_writel(gc, bank->saved_masks, GPIO_INTMASK);
+   clk_disable(bank->clk);
+}
+
+static void rockchip_irq_gc_mask_clr_bit(struct irq_data *d)
+{
+   struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+   struct rockchip_pin_bank *bank = gc->private;
+
+   clk_enable(bank->clk);
+   irq_gc_mask_clr_bit(d);
+}
+
+void rockchip_irq_gc_mask_set_bit(struct irq_data *d)
+{
+   struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+   struct rockchip_pin_bank *bank = gc->private;
+
+   irq_gc_mask_set_bit(d);
+   clk_disable(bank->clk);
 }
 
 static int rockchip_interrupts_register(struct platform_device *pdev,
@@ -1640,7 +1672,7 @@ static int rockchip_interrupts_register(struct 
platform_device *pdev,
unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
struct irq_chip_generic *gc;
int ret;
-   int i;
+   int i, j;
 
for (i = 0; i < ctrl->nr_banks; ++i, ++bank) {
if (!bank->valid) {
@@ -1649,11 +1681,19 @@ static int rockchip_interrupts_register(struct 
platform_device *pdev,
continue;
}
 
+   ret = clk_enable(bank->clk);
+   if (ret) {
+   dev_err(&pdev->dev, "failed to enable clock for bank 
%s\n",
+   bank->name);
+   conti

  1   2   3   4   5   6   7   8   9   >