Re: [PATCHv4 2/6] Staging: most: mostcore/core.c. Fix "Using plain integer as NULL pointer" warnings
On Mon, Aug 03, 2015 at 11:22:35PM +0200, Adrian Remonda wrote: > This patch fixes the warning generated by sparse: "Using plain integer > as NULL pointer" by replacing the offending 0 with NULL. > > Signed-off-by: Adrian Remonda > --- This patch will not apply because of 7ac5c9f0a022 ("Staging: most: fix snprintf() is printing too much"). You need to update your tree. regards sudip -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface
> On Jul 28, 2015, at 21:21, Peter Zijlstra wrote: > > There are various problems and short-comings with the current > static_key interface: > > - static_key_{true,false}() read like a branch depending on the key > value, instead of the actual likely/unlikely branch depending on > init value. > > - static_key_{true,false}() are, as stated above, tied to the > static_key init values STATIC_KEY_INIT_{TRUE,FALSE}. > > - we're limited to the 2 (out of 4) possible options that compile to > a default NOP because that's what our arch_static_branch() assembly > emits. > > So provide a new static_key interface: > > DEFINE_STATIC_KEY_TRUE(name); > DEFINE_STATIC_KEY_FALSE(name); > > Which define a key of different types with an initial true/false > value. > > Then allow: > > static_branch_likely() > static_branch_unlikely() > > to take a key of either type and emit the right instruction for the > case. > > This means adding a second arch_static_branch_jump() assembly helper > which emits a JMP per default. > > In order to determine the right instruction for the right state, > encode the branch type in the LSB of jump_entry::key. > > Signed-off-by: Peter Zijlstra (Intel) > --- > is this means static_key_{true,false}() are deprecated ? do you need mark static_key_{true,false}() as deprecated? like this: static __always_inline __deprecated bool static_key_false(struct static_key *key) ? Thanks -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks
pclk_pd_pmu needs to keep running and with the upcoming gpio clock handling this is not always the case anymore. So add it to the list of critical clocks for now. Signed-off-by: Heiko Stuebner Signed-off-by: Lin Huang --- Changes in v3: -match the author and Signed-off-by name drivers/clk/rockchip/clk-rk3288.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c index 0df5bae..9040878 100644 --- a/drivers/clk/rockchip/clk-rk3288.c +++ b/drivers/clk/rockchip/clk-rk3288.c @@ -780,6 +780,7 @@ static const char *const rk3288_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", "hclk_peri", + "pclk_pd_pmu", }; #ifdef CONFIG_PM_SLEEP -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v3 2/2] pinctrl: rockchip: only enable gpio clock when it setting
gpio can keep state even the clock disable, for save power consumption, only enable gpio clock when it setting Signed-off-by: Heiko Stuebner Signed-off-by: Lin Huang --- Changes in v3: -match author and Signed-off-by name drivers/pinctrl/pinctrl-rockchip.c | 57 +++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index cc2843a..70a4539 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -945,6 +945,7 @@ static int _rockchip_pmx_gpio_set_direction(struct gpio_chip *chip, if (ret < 0) return ret; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR); @@ -953,9 +954,11 @@ static int _rockchip_pmx_gpio_set_direction(struct gpio_chip *chip, data |= BIT(pin); else data &= ~BIT(pin); + writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return 0; } @@ -1389,6 +1392,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value) unsigned long flags; u32 data; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl(reg); @@ -1398,6 +1402,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value) writel(data, reg); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); } /* @@ -1409,7 +1414,9 @@ static int rockchip_gpio_get(struct gpio_chip *gc, unsigned offset) struct rockchip_pin_bank *bank = gc_to_pin_bank(gc); u32 data; + clk_enable(bank->clk); data = readl(bank->reg_base + GPIO_EXT_PORT); + clk_disable(bank->clk); data >>= offset; data &= 1; return data; @@ -1546,6 +1553,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) if (ret < 0) return ret; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR); @@ -1603,6 +1611,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) default: irq_gc_unlock(gc); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return -EINVAL; } @@ -1611,6 +1620,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) irq_gc_unlock(gc); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return 0; } @@ -1620,8 +1630,10 @@ static void rockchip_irq_suspend(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct rockchip_pin_bank *bank = gc->private; + clk_enable(bank->clk); bank->saved_masks = irq_reg_readl(gc, GPIO_INTMASK); irq_reg_writel(gc, ~gc->wake_active, GPIO_INTMASK); + clk_disable(bank->clk); } static void rockchip_irq_resume(struct irq_data *d) @@ -1629,7 +1641,27 @@ static void rockchip_irq_resume(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct rockchip_pin_bank *bank = gc->private; + clk_enable(bank->clk); irq_reg_writel(gc, bank->saved_masks, GPIO_INTMASK); + clk_disable(bank->clk); +} + +static void rockchip_irq_gc_mask_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + clk_enable(bank->clk); + irq_gc_mask_clr_bit(d); +} + +void rockchip_irq_gc_mask_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + irq_gc_mask_set_bit(d); + clk_disable(bank->clk); } static int rockchip_interrupts_register(struct platform_device *pdev, @@ -1640,7 +1672,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; struct irq_chip_generic *gc; int ret; - int i; + int i, j; for (i = 0; i < ctrl->nr_banks; ++i, ++bank) { if (!bank->valid) { @@ -1649,11 +1681,19 @@ static int rockchip_interrupts_register(struct platform_device *pdev, continue; } + ret = clk_enable(bank->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock for bank %s\n", + bank->name); + continue; + } + bank->domain = irq_domain_add_linear(b
Re: [PATCH] megaraid_sas: fix missing { } braces
Hi Colin, Colin King writes: > From: Colin Ian King > > Static analysis by smatch indicated that there was a curly > braces issue: > > drivers/scsi/megaraid/megaraid_sas_base.c:6139 > megasas_mgmt_fw_ioctl() warn: curly braces intended? > > Add braces in the appropriate place so that kbuf_arr[i] gets > set to NULL only when we need to. Also, remove whitespace > between kbuff_arr and []. I don't quite get where you removed that whitespace. Might it be a left over from another version of the patch? > > Signed-off-by: Colin Ian King > --- > drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c > b/drivers/scsi/megaraid/megaraid_sas_base.c > index 71b884d..8face78 100644 > --- a/drivers/scsi/megaraid/megaraid_sas_base.c > +++ b/drivers/scsi/megaraid/megaraid_sas_base.c > @@ -6131,12 +6131,13 @@ megasas_mgmt_fw_ioctl(struct megasas_instance > *instance, > } > > for (i = 0; i < ioc->sge_count; i++) { > - if (kbuff_arr[i]) > + if (kbuff_arr[i]) { > dma_free_coherent(&instance->pdev->dev, > le32_to_cpu(kern_sge32[i].length), > kbuff_arr[i], > le32_to_cpu(kern_sge32[i].phys_addr)); > kbuff_arr[i] = NULL; > + } > } > > megasas_return_cmd(instance, cmd); For the curly braces part: Reviewed-by: Johannes Thumshirn While you're up to it, care to check drivers/scsi/bfa as well? There was a patch somewhen in the past but it seems it wasn't applied. But if found this bugzilla entry https://bugzilla.kernel.org/show_bug.cgi?id=98261 for it. Thanks, Johannes -- Johannes Thumshirn Storage jthumsh...@suse.de +49 911 74053 689 SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB 21284 (AG Nürnberg) Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 1/6] mtd: spi-nor: change return value of read/write
On 3 August 2015 at 23:46, Marek Vasut wrote: > On Monday, August 03, 2015 at 08:39:01 PM, Michal Suchanek wrote: >> Change the return value of spi-nor device read and write methods to >> allow returning amount of data transferred and errors as >> read(2)/write(2) does. >> >> Signed-off-by: Michal Suchanek >> --- >> include/linux/mtd/spi-nor.h | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h >> index e540952..7d782cb 100644 >> --- a/include/linux/mtd/spi-nor.h >> +++ b/include/linux/mtd/spi-nor.h >> @@ -185,9 +185,9 @@ struct spi_nor { >> int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len, >> int write_enable); >> >> - int (*read)(struct spi_nor *nor, loff_t from, >> + ssize_t (*read)(struct spi_nor *nor, loff_t from, >> size_t len, size_t *retlen, u_char *read_buf); >> - void (*write)(struct spi_nor *nor, loff_t to, >> + ssize_t (*write)(struct spi_nor *nor, loff_t to, >> size_t len, size_t *retlen, const u_char *write_buf); >> int (*erase)(struct spi_nor *nor, loff_t offs); > > You realize that if someone does bisect and has only this patch applied, > the compiler will complain loudly about mismatching data types, right ? :) Yes, the compiler prints a warning. However, only the return value which is not used changes so it should not cause any real problem. The data type in the fsl-quadspi and m25p80 drivers is matched in the following two patches. Thanks Michal -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] sched: Trace point sched_stat_sleep should cover iowait case
On 2015/8/4 2:43, Peter Zijlstra wrote: > On Mon, Aug 03, 2015 at 01:35:28PM -0400, Steven Rostedt wrote: >> On Mon, 27 Jul 2015 09:11:52 -0400 >> yangoliver wrote: >> >>> Per sched_stat_sleep definition in sched.h, it should include >>> iowait case. This can also relect the design of sum_sleep_runtime >>> statistic, as this counter also includes the io_wait. >>> >>> Signed-off-by: Yong Yang >>> --- >>> kernel/sched/fair.c | 2 ++ >>> 1 file changed, 2 insertions(+) >>> >>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c >>> index d113c3b..85677bf 100644 >>> --- a/kernel/sched/fair.c >>> +++ b/kernel/sched/fair.c >>> @@ -3018,6 +3018,8 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, >>> struct sched_entity *se) >>> se->statistics.sum_sleep_runtime += delta; >>> >>> if (tsk) { >>> + trace_sched_stat_sleep(tsk, delta); >>> + >>> if (tsk->in_iowait) { >>> se->statistics.iowait_sum += delta; >>> se->statistics.iowait_count++; > No, that's broken in two ways. Firstly you don't change semantics of > stuff just because of a comment and secondly iowait has nothing what all > to do with INTERRUPTIBLE/sleep vs UNINTERRUPTIBLE/blocked. Peter, Sorry for missing key person in this mail thread. Another reason I think sched_stat_sleep should cover UNINTERRUPTIBLE/blocked case is, the sum_sleep_runtime counter get increased for both INTERRUPTIBLE and UNINTERRUPTIBLE cases. We can find below statement for both cases in the code, se->statistics.sum_sleep_runtime += delta; Plus below comments, I guessed the sched_stat_sleep trace point is originally designed for cover all kind of sleep cases: interruptible and uninterruptible, /* * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ DEFINE_EVENT(sched_stat_template, sched_stat_sleep, TP_PROTO(struct task_struct *tsk, u64 delay), TP_ARGS(tsk, delay)); Do you think we should make sched_stat_sleep meaning similar with the sum_sleep_runtime counter? If not, we may need fix the comments in sched.h above. > > And wtf are you doing sending sched patches and not Cc maintainers. > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net] net: dsa: fix EDSA frame from hwaccel frame
If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX, an EDSA frame is prepended with a 802.1q header once queued. To fix this, push the VLAN tag to the payload if present, before checking the frame protocol. [note: we may prefer to access directly VLAN TCI from hwaccel frames, but this approach is simpler.] Signed-off-by: Vivien Didelot --- net/dsa/tag_edsa.c | 5 + 1 file changed, 5 insertions(+) diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 2288c80..3ada4eb 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include "dsa_priv.h" @@ -21,6 +22,10 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u8 *edsa_header; + skb = vlan_hwaccel_push_inside(skb); + if (unlikely(!skb)) + return NULL; + /* * Convert the outermost 802.1q tag to a DSA tag and prepend * a DSA ethertype field is the packet is tagged, or insert -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next 4/4] net: dsa: mv88e6xxx: refactor FDB routines
Refactor mv88e6xxx_port_fdb_{add,del,getnext} to respect the new DSA switch driver FDB access routines. The Marvell 88E6xxx switches support up to 4094 FIDs (from 1 to 0xfff; FID 0 means that multiple address databases are not being used). So change the fid_mask for a fid_bitmap of 4096 bits. FIDs 1 to num_ports will be reserved for non-bridged ports and bridge groups (a bridge group gets the FID of its first member). The remaining bits will then be used for VLANs. Also do not consider an address (yet) if it is trunk mapped. This change is a need to welcome the future support for hardware VLANs. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6171.c | 3 + drivers/net/dsa/mv88e6352.c | 3 + drivers/net/dsa/mv88e6xxx.c | 205 +++- drivers/net/dsa/mv88e6xxx.h | 31 +-- 4 files changed, 172 insertions(+), 70 deletions(-) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index cfa21ed..735f04c 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,6 +116,9 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, + .port_fdb_add = mv88e6xxx_port_fdb_add, + .port_fdb_del = mv88e6xxx_port_fdb_del, + .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index eb4630f..191fb25 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -341,6 +341,9 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, + .port_fdb_add = mv88e6xxx_port_fdb_add, + .port_fdb_del = mv88e6xxx_port_fdb_del, + .port_fdb_getnext = mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 438c73e..f576a39 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -964,7 +965,7 @@ static int _mv88e6xxx_atu_cmd(struct dsa_switch *ds, int fid, u16 cmd) { int ret; - ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x01, fid); + ret = _mv88e6xxx_reg_write(ds, REG_GLOBAL, GLOBAL_ATU_FID, fid); if (ret < 0) return ret; @@ -1091,7 +1092,7 @@ int mv88e6xxx_join_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) ps->bridge_mask[fid] = br_port_mask; if (fid != ps->fid[port]) { - ps->fid_mask |= 1 << ps->fid[port]; + clear_bit(ps->fid[port], ps->fid_bitmap); ps->fid[port] = fid; ret = _mv88e6xxx_update_bridge_config(ds, fid); } @@ -1125,9 +1126,16 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) mutex_lock(&ps->smi_mutex); - newfid = __ffs(ps->fid_mask); + newfid = find_next_zero_bit(ps->fid_bitmap, VLAN_N_VID, 1); + if (unlikely(newfid > ps->num_ports)) { + netdev_err(ds->ports[port], "all first %d FIDs are used\n", + ps->num_ports); + ret = -ENOSPC; + goto unlock; + } + ps->fid[port] = newfid; - ps->fid_mask &= ~(1 << newfid); + set_bit(newfid, ps->fid_bitmap); ps->bridge_mask[fid] &= ~(1 << port); ps->bridge_mask[newfid] = 1 << port; @@ -1135,6 +1143,7 @@ int mv88e6xxx_leave_bridge(struct dsa_switch *ds, int port, u32 br_port_mask) if (!ret) ret = _mv88e6xxx_update_bridge_config(ds, newfid); +unlock: mutex_unlock(&ps->smi_mutex); return ret; @@ -1174,8 +1183,8 @@ int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state) return 0; } -static int __mv88e6xxx_write_addr(struct dsa_switch *ds, - const unsigned char *addr) +static int _mv88e6xxx_atu_mac_write(struct dsa_switch *ds, + const u8 addr[ETH_ALEN]) { int i, ret; @@ -1190,7 +1199,7 @@ static int __mv88e6xxx_write_addr(struct dsa_switch *ds, return 0; } -static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr) +static int _mv88e6xxx_atu_mac_read(struct dsa_switch *ds, u8 addr[ETH_ALEN]) { int i, ret; @@ -1206,109 +1215,184 @@ static int __mv88e6xxx_read_addr(struct dsa_switch *ds, unsigned char *addr) return 0; } -static int __mv88e6xxx_port_fdb_cmd(struct dsa_switch *ds, int port, - const un
Re: [PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS allocations
Hi Michal, On Thu, 2 Jul 2015, Michal Hocko wrote: > On Thu 02-07-15 10:25:51, Theodore Ts'o wrote: > > On Wed, Jul 01, 2015 at 03:37:15PM +0200, Michal Hocko wrote: > From: Michal Hocko > Date: Thu, 2 Jul 2015 17:05:05 +0200 > Subject: [PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS > allocations > > Nikolay has reported a hang when a memcg reclaim got stuck with the > following backtrace... Sorry, I couldn't manage more than to ignore you when you Cc'ed me on this a month ago. Dave's perfectly correct, we had ourselves come to notice that recently: although in an ideal world a filesystem would only mark PageWriteback once the IO is all ready to go, in the real world that's not quite so, and a memory allocation may stand between. Which leaves my v3.6 c3b94f44fcb0 in danger of deadlocking. And suddenly now, in v4.2-rc or perhaps in v4.1 also, that has started hitting me too (I don't know which release Nicolay noticed this on). And it has become urgent to fix: I've added Linus to the Cc because I believe his comment in the rc5 announcement, "There's also a pending question about some of the VM changes", reflects this. Twice when I was trying to verify fixes to the dcache issue which came up at the end of last week, I was frustrated by unrelated hangs in my load. The first time I didn't recognize it, but the second time I did, and then came to realize that your patch is just what is needed. But I have modified it a little, I don't think you'll mind. As you suggested yourself, I actually prefer to test may_enter_fs there, rather than __GFP_FS: not a big deal, I certainly wouldn't want to delay the fix if someone thinks differently; but I tend to feel that may_enter_fs is what we already use for such decisions there, so better to use it. (And the SwapCache case immune to ext4 or xfs IO submission pattern.) I've fixed up the patch and updated the comments, since Tejun has meanwhile introduced sane_reclaim(sc) - I'm staying on in the insane asylum for now (and sane_reclaim is clearly unaffected by the change). I've omitted your hunk unindenting Case 3 wait_on_page_writeback(page): I prefer your style too, but thought it better to minimize the patch, especially if this is heading to the stables. (I was tempted to add in my unlock_page there, that we discussed once before: but again thought it better to minimize the fix - it is "selfish" not to unlock_page, but I think that anything heading for deadlock on the locked page would in other circumstances be heading for deadlock on the writeback page - I've never found that change critical.) And I've done quite a bit of testing. The loads that hung at the weekend have been running nicely for 24 hours now, no problem with the writeback hang and no problem with the dcache ENOTDIR issue. Though I've no idea of what recent VM change turned this into a hot issue. And more testing on the history of it, considering your stable 3.6+ designation that I wasn't satisfied with. Getting out that USB stick again, I find that 3.6, 3.7 and 3.8 all OOM if their __GFP_IO test is updated to a may_enter_fs test; but something happened in 3.9 to make it and subsequent releases safe with the may_enter_fs test. You can certainly argue that the remote chance of a deadlock is worse than the fair chance of a spurious OOM; but if you insist on 3.6+, then I think it would have to go back even further, because we marked that commit for stable itself. I suggest 3.9+. [PATCH] mm, vmscan: Do not wait for page writeback for GFP_NOFS allocations From: Michal Hocko Nikolay has reported a hang when a memcg reclaim got stuck with the following backtrace: PID: 18308 TASK: 883d7c9b0a30 CPU: 1 COMMAND: "rsync" #0 [88177374ac60] __schedule at 815ab152 #1 [88177374acb0] schedule at 815ab76e #2 [88177374acd0] schedule_timeout at 815ae5e5 #3 [88177374ad70] io_schedule_timeout at 815aad6a #4 [88177374ada0] bit_wait_io at 815abfc6 #5 [88177374adb0] __wait_on_bit at 815abda5 #6 [88177374ae00] wait_on_page_bit at 8111fd4f #7 [88177374ae50] shrink_page_list at 81135445 #8 [88177374af50] shrink_inactive_list at 81135845 #9 [88177374b060] shrink_lruvec at 81135ead #10 [88177374b150] shrink_zone at 811360c3 #11 [88177374b220] shrink_zones at 81136eff #12 [88177374b2a0] do_try_to_free_pages at 8113712f #13 [88177374b300] try_to_free_mem_cgroup_pages at 811372be #14 [88177374b380] try_charge at 81189423 #15 [88177374b430] mem_cgroup_try_charge at 8118c6f5 #16 [88177374b470] __add_to_page_cache_locked at 8112137d #17 [88177374b4e0] add_to_page_cache_lru at 81121618 #18 [88177374b510] pagecache_get_page at 8112170b #19 [88177374b560] grow_dev_page at 811c8297 #20 [88177374b5c0] __getblk_slow at 811c91d6 #21
[PATCH net-next 2/4] net: switchdev: support static FDB addresses
This patch adds a is_static boolean to the switchdev_obj_fdb structure, in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE. Signed-off-by: Vivien Didelot --- include/net/switchdev.h | 1 + net/switchdev/switchdev.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e90e1a0..0e296b8 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj { struct switchdev_obj_fdb { /* PORT_FDB */ u8 addr[ETH_ALEN]; u16 vid; + bool is_static; } fdb; } u; }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 28786e8..b75897c 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_flags = NTF_SELF; ndm->ndm_type= 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; + ndm->ndm_state = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) goto nla_put_failure; -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next 1/4] net: switchdev: change fdb addr for a byte array
The address in the switchdev_obj_fdb structure is currently represented as a pointer. Replacing it for a 6-byte array allows switchdev to carry addresses directly read from hardware registers, not stored by the switch chip driver (as in Rocker). Signed-off-by: Vivien Didelot --- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/switchdev.h | 2 +- net/bridge/br_fdb.c | 2 +- net/switchdev/switchdev.c| 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 4cd5a71..faa5db0 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { if (found->key.pport != rocker_port->pport) continue; - fdb->addr = found->key.addr; + memcpy(fdb->addr, found->key.addr, ETH_ALEN); fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); err = obj->cb(rocker_port->dev, obj); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89da893..e90e1a0 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -70,7 +70,7 @@ struct switchdev_obj { u32 tb_id; } ipv4_fib; struct switchdev_obj_fdb { /* PORT_FDB */ - const unsigned char *addr; + u8 addr[ETH_ALEN]; u16 vid; } fdb; } u; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875d..2c64b6a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = f->addr.addr, .vid = f->vlan_id, }, }; + memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN); switchdev_port_obj_del(f->dst->dev, &obj); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 33bafa2..28786e8 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + memcpy(obj.u.fdb.addr, addr, ETH_ALEN); return switchdev_port_obj_add(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + memcpy(obj.u.fdb.addr, addr, ETH_ALEN); return switchdev_port_obj_del(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH net-next 3/4] net: dsa: add support for switchdev FDB objects
Remove the fdb_{add,del,getnext} function pointer in favor of new port_fdb_{add,del,getnext}. Implement the switchdev_port_obj_{add,del,dump} functions in DSA to support the SWITCHDEV_OBJ_PORT_FDB objects. These functions are called from switchdev_port_bridge_{get,set,del}link. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6171.c | 3 - drivers/net/dsa/mv88e6352.c | 3 - include/net/dsa.h | 16 ++-- net/dsa/slave.c | 221 4 files changed, 129 insertions(+), 114 deletions(-) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 1c78084..cfa21ed 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, - .fdb_add= mv88e6xxx_port_fdb_add, - .fdb_del= mv88e6xxx_port_fdb_del, - .fdb_getnext= mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index af210ef..eb4630f 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, - .fdb_add= mv88e6xxx_port_fdb_add, - .fdb_del= mv88e6xxx_port_fdb_del, - .fdb_getnext= mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63b..a090c8a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,12 +296,16 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - int (*fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_getnext)(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + + /* +* Forwarding database +*/ + int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid, + u8 addr[ETH_ALEN]); + int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid, + u8 addr[ETH_ALEN]); + int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid, + u8 addr[ETH_ALEN], bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0010c69..0f99a17 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dsa_priv.h" /* slave mii_bus handling ***/ @@ -200,105 +201,6 @@ out: return 0; } -static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], -struct net_device *dev, -const unsigned char *addr, u16 vid, u16 nlm_flags) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_add) - ret = ds->drv->fdb_add(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], -struct net_device *dev, -const unsigned char *addr, u16 vid) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_del) - ret = ds->drv->fdb_del(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, - const unsigned char *addr, u16 vid, - bool is_static, - u32 portid, u32 seq, int type, - unsigned int flags) -{ - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1= 0; - ndm->ndm_pad2= 0; - ndm->ndm_flags = NTF_EXT_LEARNED;
[PATCH net-next 0/4] net: dsa: support switchdev FDB objects
This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB objects. The first two patches add minor but necessary changes to switchdev, the third one implements the switchdev glue in DSA for FDB routines, and the forth one refactors the FDB access functions in the mv88e6xxx code. Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1): # bridge fdb add 3c:97:0e:11:30:6e dev swp2 # bridge fdb add 3c:97:0e:11:40:78 dev swp3 # bridge fdb add 3c:97:0e:11:50:86 dev swp4 # bridge fdb del 3c:97:0e:11:40:78 dev swp3 # bridge fdb 01:00:5e:00:00:01 dev eth0 self permanent 01:00:5e:00:00:01 dev eth1 self permanent 00:50:d2:10:78:15 dev swp0 master br0 permanent 3c:97:0e:11:30:6e dev swp2 self static 00:50:d2:10:78:15 dev swp3 master br1 permanent 3c:97:0e:11:50:86 dev swp4 self static # cat /sys/kernel/debug/dsa0/atu # DB T/P Vec State Addr # 001 Port 004 e 3c:97:0e:11:30:6e # 004 Port 010 e 3c:97:0e:11:50:86 For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged ports and bridge groups, and the remaining will be later used by VLANs. This change is necessary to welcome the support for hardware VLANs (which will follow soon). Cheers, -v Vivien Didelot (4): net: switchdev: change fdb addr for a byte array net: switchdev: support static FDB addresses net: dsa: add support for switchdev FDB objects net: dsa: mv88e6xxx: refactor FDB routines drivers/net/dsa/mv88e6171.c | 6 +- drivers/net/dsa/mv88e6352.c | 6 +- drivers/net/dsa/mv88e6xxx.c | 205 ++-- drivers/net/dsa/mv88e6xxx.h | 31 +++-- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/dsa.h| 16 ++- include/net/switchdev.h | 3 +- net/bridge/br_fdb.c | 2 +- net/dsa/slave.c | 221 +++ net/switchdev/switchdev.c| 6 +- 10 files changed, 308 insertions(+), 190 deletions(-) -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 4/5] clk: Hi6220: add stub clock driver
Hi Stephen, On Mon, Aug 03, 2015 at 02:37:52PM -0700, Stephen Boyd wrote: > On 08/03, Leo Yan wrote: > > diff --git a/drivers/clk/hisilicon/clk-hi6220-stub.c > > b/drivers/clk/hisilicon/clk-hi6220-stub.c > > new file mode 100644 > > index 000..0931666 > > --- /dev/null > > +++ b/drivers/clk/hisilicon/clk-hi6220-stub.c > > @@ -0,0 +1,279 @@ > > +/* > > + * Hi6220 stub clock driver > > + * > > + * Copyright (c) 2015 Hisilicon Limited. > > + * Copyright (c) 2015 Linaro Limited. > > + * > > + * Author: Leo Yan > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + * > > + */ > > + > > +#include > > Is this include used? > > > +#include > > +#include > > Is this include used? > > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > + > > +#include > > + > > +/* Stub clocks id */ > > +#define HI6220_STUB_ACPU0 0 > > +#define HI6220_STUB_ACPU1 1 > > +#define HI6220_STUB_GPU2 > > +#define HI6220_STUB_DDR5 > > + > > +/* Mailbox message */ > > +#define HI6220_MBOX_MSG_LEN8 > > + > > +#define HI6220_MBOX_FREQ (0xA) > > +#define HI6220_MBOX_CMD_SET(0x3) > > +#define HI6220_MBOX_OBJ_AP (0x0) > > + > > +/* CPU dynamic frequency scaling */ > > +#define ACPU_DFS_FREQ_MAX (0x1724) > > +#define ACPU_DFS_CUR_FREQ (0x17CC) > > +#define ACPU_DFS_FLAG (0x1B30) > > +#define ACPU_DFS_FREQ_REQ (0x1B34) > > +#define ACPU_DFS_FREQ_LMT (0x1B38) > > +#define ACPU_DFS_LOCK_FLAG (0xAEAEAEAE) > > We don't need parenthesis around single values in these macros. > > > + > > +#define to_stub_clk(hw) container_of(hw, struct hi6220_stub_clk, hw) > > + > > +struct hi6220_stub_clk { > > + u32 id; > > + u32 rate; > > + > > + struct device *dev; > > + struct clk_hw hw; > > + > > + struct regmap *dfs_map; > > + struct mbox_client cl; > > + struct mbox_chan *mbox; > > +}; > > + > > +struct hi6220_mbox_msg { > > + unsigned char type; > > + unsigned char cmd; > > + unsigned char obj; > > + unsigned char src; > > + unsigned char para[4]; > > +}; > > + > > +union hi6220_mbox_data { > > + unsigned int data[HI6220_MBOX_MSG_LEN]; > > + struct hi6220_mbox_msg msg; > > +}; > > + > > +static unsigned int hi6220_acpu_get_freq(struct hi6220_stub_clk *stub_clk) > > +{ > > + unsigned int freq; > > + > > + regmap_read(stub_clk->dfs_map, ACPU_DFS_CUR_FREQ, &freq); > > + return freq; > > +} > > + > > +static int hi6220_acpu_set_freq(struct hi6220_stub_clk *stub_clk, > > + unsigned int freq) > > +{ > > + union hi6220_mbox_data data; > > + > > + stub_clk->mbox = mbox_request_channel(&stub_clk->cl, 0); > > Why not request the channel once in probe? > > > + if (IS_ERR(stub_clk->mbox)) { > > + dev_err(stub_clk->dev, "failed get mailbox channel\n"); > > + return PTR_ERR(stub_clk->mbox); > > + }; > > + > > + /* set the frequency in sram */ > > + regmap_write(stub_clk->dfs_map, ACPU_DFS_FREQ_REQ, freq); > > + > > + /* compound mailbox message */ > > + data.msg.type = HI6220_MBOX_FREQ; > > + data.msg.cmd = HI6220_MBOX_CMD_SET; > > + data.msg.obj = HI6220_MBOX_OBJ_AP; > > + data.msg.src = HI6220_MBOX_OBJ_AP; > > + > > + mbox_send_message(stub_clk->mbox, &data); > > + mbox_free_channel(stub_clk->mbox); > > + return 0; > > +} > > + > > +static int hi6220_acpu_round_freq(struct hi6220_stub_clk *stub_clk, > > + unsigned int freq) > > +{ > > + unsigned int limit_flag, limit_freq = UINT_MAX; > > + unsigned int max_freq; > > + > > + /* check the constrainted frequency */ > > s/constrainted/constrained/ ? > > > + regmap_read(stub_clk->dfs_map, ACPU_DFS_FLAG, &limit_flag); > > + if (limit_flag == ACPU_DFS_LOCK_FLAG) > > + regmap_read(stub_clk->dfs_map, ACPU_DFS_FREQ_LMT, &limit_freq); > > + > > + /* check the supported maximum frequency */ > > + regmap_read(stub_clk->dfs_map, ACPU_DFS_FREQ_MAX, &max_freq); > > + > > + /* calculate the real maximum frequency */ > > + max_freq = min(max_freq, limit_freq); > > + > > + if (WARN_ON(freq > max_freq)) > > + freq = max_freq; > > + > > + return freq; > > +} > > + > > +static unsigned long hi6220_stub_clk_recalc_rate(struct clk_hw *hw, > > + unsigned long parent_rate) > > +{ > > + u32 rate = 0; > > + struct hi6220_stub_clk *stub_clk = to_stub_clk(hw); > > + > > + switch (stub_clk->id) { > > + case HI6220_STUB_ACPU0: > > + rate = hi6220_acpu_get_freq(stub_clk); > > + > > + /* convert from KHz to Hz */ > > s/KHz/kHz/ ? > > > + rate *= 1000; > > + break; > > + > > + default: > > + de
Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame
From: Vivien Didelot Date: Tue, 4 Aug 2015 02:01:18 -0400 (EDT) > Dully noted. Should I resend it? Yes, please. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] clk: pxa: pxa3xx: fix CKEN register access
Stephen Boyd writes: > On 08/03/2015 12:58 PM, Robert Jarzmik wrote: >> Clocks 0 to 31 are on CKENA, and not CKENB. The clock register names >> were inadequately inverted. As a consequence, all clock operations were >> happening on CKENB, because almost all but 2 clocks are on CKENA. >> >> As the clocks were activated by the bootloader in the former tests, it >> escaped the testing that the wrong clock gate was manipulated. The error >> was revealed by changing the pxa3xx-and driver to a module, where tupon >> unloading the wrong clock was disabled in CKENB. >> >> Signed-off-by: Robert Jarzmik >> --- > > Did you want a fixes tag to send this back to stable? Ah yes, good point, v2 on its way. Stephen and Mike, do you think this can still get in -rc6 ? Cheers. -- Robert -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2] clk: pxa: pxa3xx: fix CKEN register access
Clocks 0 to 31 are on CKENA, and not CKENB. The clock register names were inadequately inverted. As a consequence, all clock operations were happening on CKENB, because almost all but 2 clocks are on CKENA. As the clocks were activated by the bootloader in the former tests, it escaped the testing that the wrong clock gate was manipulated. The error was revealed by changing the pxa3xx-and driver to a module, where tupon unloading the wrong clock was disabled in CKENB. Fixes: 9bbb8a338fb2 ("clk: pxa: add pxa3xx clock driver") Signed-off-by: Robert Jarzmik --- Since v1: added Fixes: --- drivers/clk/pxa/clk-pxa3xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index c677b9ab5367..4af4eed5f89f 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -126,7 +126,7 @@ PARENTS(pxa3xx_ac97_bus) = { "ring_osc_60mhz", "ac97" }; PARENTS(pxa3xx_sbus) = { "ring_osc_60mhz", "system_bus" }; PARENTS(pxa3xx_smemcbus) = { "ring_osc_60mhz", "smemc" }; -#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENA : &CKENB) +#define CKEN_AB(bit) ((CKEN_ ## bit > 31) ? &CKENB : &CKENA) #define PXA3XX_CKEN(dev_id, con_id, parents, mult_lp, div_lp, mult_hp, \ div_hp, bit, is_lp, flags) \ PXA_CKEN(dev_id, con_id, bit, parents, mult_lp, div_lp, \ -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] media: atmel-isi: move configure_geometry() to start_streaming()
Hi, Laurent On 8/3/2015 9:27 PM, Laurent Pinchart wrote: Hi Josh, On Monday 03 August 2015 11:56:01 Josh Wu wrote: On 7/31/2015 10:37 PM, Laurent Pinchart wrote: On Wednesday 17 June 2015 18:39:39 Josh Wu wrote: As in set_fmt() function we only need to know which format is been set, we don't need to access the ISI hardware in this moment. So move the configure_geometry(), which access the ISI hardware, to start_streaming() will make code more consistent and simpler. Signed-off-by: Josh Wu --- drivers/media/platform/soc_camera/atmel-isi.c | 17 + 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c index 8bc40ca..b01086d 100644 --- a/drivers/media/platform/soc_camera/atmel-isi.c +++ b/drivers/media/platform/soc_camera/atmel-isi.c @@ -390,6 +390,11 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count) /* Disable all interrupts */ isi_writel(isi, ISI_INTDIS, (u32)~0UL); + ret = configure_geometry(isi, icd->user_width, icd->user_height, + icd->current_fmt->code); I would also make configure_geometry a void function, as the only failure case really can't occur. I think this case can be reached if user require a RGB565 format to capture and sensor also support RGB565 format. As atmel-isi driver will provide RGB565 support via the pass-through mode (maybe we need re-consider this part). So that will cause the configure_geometry() returns an error since it found the bus format is not Y8 or YUV422. In my opinion, we should not change configure_geometry()'s return type, until we add a insanity format check before we call configure_geometry() in future. It will really confuse the user if S_FMT accepts a format but STREAMON fails due to the format being unsupported. Could that be fixed by defaulting to a known supported format in S_FMT if the requested format isn't support ? yes, it's the right way to go. You could then remove the error check in configure_geometry(). So I will send a v2 patches, which will add one more patch to add insanity check on the S_FMT and remove the error check code in configure_geometry(). And for this patch in v2, I will add your reviewed-by tag. Is that Okay for you? Best Regards, Josh Wu Apart from that, Reviewed-by: Laurent Pinchart Thanks for the review. Best Regards, Josh Wu + if (ret < 0) + return ret; + spin_lock_irq(&isi->lock); /* Clear any pending interrupt */ isi_readl(isi, ISI_STATUS); @@ -477,8 +482,6 @@ static int isi_camera_init_videobuf(struct vb2_queue *q, static int isi_camera_set_fmt(struct soc_camera_device *icd, struct v4l2_format *f) { - struct soc_camera_host *ici = to_soc_camera_host(icd->parent); - struct atmel_isi *isi = ici->priv; struct v4l2_subdev *sd = soc_camera_to_subdev(icd); const struct soc_camera_format_xlate *xlate; struct v4l2_pix_format *pix = &f->fmt.pix; @@ -511,16 +514,6 @@ static int isi_camera_set_fmt(struct soc_camera_device *icd, if (mf->code != xlate->code) return -EINVAL; - /* Enable PM and peripheral clock before operate isi registers */ - pm_runtime_get_sync(ici->v4l2_dev.dev); - - ret = configure_geometry(isi, pix->width, pix->height, xlate->code); - - pm_runtime_put(ici->v4l2_dev.dev); - - if (ret < 0) - return ret; - pix->width = mf->width; pix->height = mf->height; pix->field = mf->field; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: net: dsa: support switchdev FDB objects
Hi, On Aug 4, 2015, at 1:54 AM, Vivien Didelot vivien.dide...@savoirfairelinux.com wrote: > This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB > objects. > > The first two patches add minor but necessary changes to switchdev, the third > one implements the switchdev glue in DSA for FDB routines, and the forth one > refactors the FDB access functions in the mv88e6xxx code. For some reason the patch 4/4 didn't follow. I also missed the net-next prefix, as mentioned by David earlier. Please ignore this serie, I will retry soon. Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] i2c: mediatek: fix transfer error handling
On Fri, 2015-07-31 at 13:00 +0200, Wolfram Sang wrote: > On Tue, Jul 28, 2015 at 11:38:05AM +0800, Eddie Huang wrote: > > From: Liguo Zhang > > > > Reset i2c dma engine in hw init function. > > When occur i2c ack error, mtk_i2c_irq may is twice, > > first is the ack error interrupt, then the complete interrupt, > > so i2c->irq_stat need keep the two interrupt value, and only > > call complete() for the complete interrupt. > > > > Signed-off-by: Liguo Zhang > > Signed-off-by: Eddie Huang > > Looks to me this patch needs to be split up into one patch per issue? OK, I can split > And doesn't it kill the auto_restart functionality? Sascha? No. restart_flag already set in mtk_i2c_do_transfer() function.It is not necessary check restart_flag again in mtk_i2c_irq(). It is simpler to just read status bit and write back to clear interrupt status. Eddie Thanks > > > --- > > drivers/i2c/busses/i2c-mt65xx.c | 25 ++--- > > 1 file changed, 18 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/i2c/busses/i2c-mt65xx.c > > b/drivers/i2c/busses/i2c-mt65xx.c > > index 9920eef..57d11b7 100644 > > --- a/drivers/i2c/busses/i2c-mt65xx.c > > +++ b/drivers/i2c/busses/i2c-mt65xx.c > > @@ -59,6 +59,7 @@ > > #define I2C_DMA_START_EN 0x0001 > > #define I2C_DMA_INT_FLAG_NONE 0x > > #define I2C_DMA_CLR_FLAG 0x > > +#define I2C_DMA_HARD_RST 0x0002 > > > > #define I2C_DEFAULT_SPEED 10 /* hz */ > > #define MAX_FS_MODE_SPEED 40 > > @@ -81,6 +82,7 @@ enum DMA_REGS_OFFSET { > > OFFSET_INT_FLAG = 0x0, > > OFFSET_INT_EN = 0x04, > > OFFSET_EN = 0x08, > > + OFFSET_RST = 0x0c, > > OFFSET_CON = 0x18, > > OFFSET_TX_MEM_ADDR = 0x1c, > > OFFSET_RX_MEM_ADDR = 0x20, > > @@ -262,6 +264,10 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) > > I2C_CONTROL_CLK_EXT_EN | I2C_CONTROL_DMA_EN; > > writew(control_reg, i2c->base + OFFSET_CONTROL); > > writew(I2C_DELAY_LEN, i2c->base + OFFSET_DELAY_LEN); > > + > > + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); > > + udelay(50); > > + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); > > } > > > > /* > > @@ -550,16 +556,20 @@ err_exit: > > static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id) > > { > > struct mtk_i2c *i2c = dev_id; > > - u16 restart_flag = 0; > > + u16 intr_stat = 0; > > > > - if (i2c->dev_comp->auto_restart) > > - restart_flag = I2C_RS_TRANSFER; > > + intr_stat = readw(i2c->base + OFFSET_INTR_STAT); > > + writew(intr_stat, i2c->base + OFFSET_INTR_STAT); > > > > - i2c->irq_stat = readw(i2c->base + OFFSET_INTR_STAT); > > - writew(restart_flag | I2C_HS_NACKERR | I2C_ACKERR > > - | I2C_TRANSAC_COMP, i2c->base + OFFSET_INTR_STAT); > > + /* > > +* when occurs i2c ack error, mtk_i2c_irq is called twice, > > +* first is the ack error interrupt, then the complete interrupt, > > +* i2c->irq_stat need keep the two interrupt value. > > +*/ > > + i2c->irq_stat |= intr_stat; > > > > - complete(&i2c->msg_complete); > > + if (i2c->irq_stat & I2C_TRANSAC_COMP) > > + complete(&i2c->msg_complete); > > > > return IRQ_HANDLED; > > } > > @@ -729,3 +739,4 @@ module_platform_driver(mtk_i2c_driver); > > MODULE_LICENSE("GPL v2"); > > MODULE_DESCRIPTION("MediaTek I2C Bus Driver"); > > MODULE_AUTHOR("Xudong Chen "); > > +MODULE_AUTHOR("Liguo Zhang "); > > -- > > 1.7.9.5 > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/3] spi: ppc44x: Remove chipselect from setupxfer
The chipselect operation is already done in spi_bitbang_transfer_one, or in spi_bitbang_setup, so there is no need to do it in setupxfer as well. Signed-off-by: Nicolas Boichat --- drivers/spi/spi-ppc4xx.c | 7 --- 1 file changed, 7 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 54fb984..55947f6 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -210,13 +210,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) if (in_8(&hw->regs->cdm) != cdm) out_8(&hw->regs->cdm, cdm); - spin_lock(&hw->bitbang.lock); - if (!hw->bitbang.busy) { - hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE); - /* Need to ndelay here? */ - } - spin_unlock(&hw->bitbang.lock); - return 0; } -- 2.5.0.rc2.392.g76e840b -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/3] spi: s3c24xx: Convert spinlock to mutex
bitbang->lock is now a mutex: replace spinlock function calls by mutex functions. Signed-off-by: Nicolas Boichat --- drivers/spi/spi-s3c24xx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c index f36bc32..b1d03e5 100644 --- a/drivers/spi/spi-s3c24xx.c +++ b/drivers/spi/spi-s3c24xx.c @@ -198,12 +198,11 @@ static int s3c24xx_spi_setup(struct spi_device *spi) if (ret) return ret; - spin_lock(&hw->bitbang.lock); - if (!hw->bitbang.busy) { + if (mutex_trylock(&hw->bitbang.lock)) { hw->bitbang.chipselect(spi, BITBANG_CS_INACTIVE); /* need to ndelay for 0.5 clocktick ? */ + mutex_unlock(&hw->bitbang.lock); } - spin_unlock(&hw->bitbang.lock); return 0; } -- 2.5.0.rc2.392.g76e840b -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3] spi: bitbang: Replace spinlock by mutex when calling chipselect
Enabling CONFIG_DEBUG_ATOMIC_SLEEP in kernel configuration, we get this warning in spi_gpio_setup: [1.177747] BUG: sleeping function called from invalid context at drivers/gpio/gpiolib.c:1431 [1.190182] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0 [1.196922] 3 locks held by swapper/0/1: [1.200812] #0: (&dev->mutex){..}, at: [] __driver_attach+0x58/0x98 [1.209147] #1: (spi_add_lock){+.+.+.}, at: [] spi_add_device+0x80/0x14c [1.217564] #2: (&(&bitbang->lock)->rlock){..}, at: [] spi_bitbang_setup+0x84/0xc4 [1.227185] irq event stamp: 279856 [1.230645] hardirqs last enabled at (279855): [] __mutex_unlock_slowpath+0x158/0x16c [1.240070] hardirqs last disabled at (279856): [] _raw_spin_lock_irqsave+0x20/0x6c [1.249233] softirqs last enabled at (262072): [] bdi_register+0x124/0x1d0 [1.257707] softirqs last disabled at (262070): [] bdi_register+0x100/0x1d0 [1.266185] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 3.18.0 #608 [1.277419] Call trace: [1.279848] [] dump_backtrace+0x0/0x12c [1.285209] [] show_stack+0x10/0x1c [1.290223] [] dump_stack+0x80/0xb4 [1.295238] [] __might_sleep+0x110/0x11c [1.300687] [] gpiod_set_raw_value_cansleep+0x24/0x4c [1.307255] [] spi_gpio_chipselect+0x74/0x88 [1.313045] [] spi_bitbang_setup+0x98/0xc4 [1.318664] [] spi_gpio_setup+0x50/0xc8 [1.324022] [] spi_setup+0xe4/0xf8 [1.328950] [] spi_add_device+0xd0/0x14c [1.334396] [] spi_register_master+0x6a8/0x718 [1.340359] [] spi_bitbang_start+0xe8/0x108 [1.346064] [] spi_gpio_probe+0x3b4/0x448 [1.351595] [] platform_drv_probe+0x4c/0x9c [1.357301] [] driver_probe_device+0xd4/0x23c [1.363180] [] __driver_attach+0x68/0x98 [1.368627] [] bus_for_each_dev+0x7c/0xb0 [1.374160] [] driver_attach+0x1c/0x28 [1.379434] [] bus_add_driver+0xd8/0x1e0 [1.384881] [] driver_register+0xbc/0x10c [1.390412] [] __platform_driver_register+0x5c/0x68 [1.396808] [] spi_gpio_driver_init+0x14/0x20 [1.402685] [] do_one_initcall+0x18c/0x1ac [1.408306] [] kernel_init_freeable+0x228/0x2e0 [1.414356] [] kernel_init+0x10/0xd8 chipselect (in this case, spi_gpio_chipselect, which calls gpiod_set_raw_value_cansleep), can sleep, so we should not hold a spinlock while calling it. This issue was introduced by this commit, which converted spi-gpio to cansleep variants: d9dda5a191 "spi: spi-gpio: Use 'cansleep' variants to access GPIO" Replace spinlock + busy variable by a mutex, and get rid of spi_bitbang_prepare_hardware and spi_bitbang_unprepare_hardware, which are not useful anymore. Signed-off-by: Nicolas Boichat --- Actually, I'm not sure if I understand the existing code: why are we not waiting for busy to go down to 0, then call chipselect, instead of not calling it at all if the bus happens to be busy when we setup the device? With the current approach, it would be easy to just use an unconditional mutex_lock. Also, is it harmful to deactivate the newly setup device in spi_bitbang_setup, even if the bus is busy with another device? chipselect should be independent for each device (or is it not?). So I'm not clear why we need any locking at all... Hopefully someone can shine some light on this... Anyway, this patch series does not change the existing behaviour, applies on top of broonie-sound/for-next, and, along with the 2 follow-up patches, was compile-tested on x86-64/arm (allyesconfig) and ppc44x (defconfig+SPI driver), and runtime-tested on an arm platform. drivers/spi/spi-bitbang.c | 42 +++-- include/linux/spi/spi_bitbang.h | 3 +-- 2 files changed, 8 insertions(+), 37 deletions(-) diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c index 840a498..931c37e 100644 --- a/drivers/spi/spi-bitbang.c +++ b/drivers/spi/spi-bitbang.c @@ -180,7 +180,6 @@ int spi_bitbang_setup(struct spi_device *spi) { struct spi_bitbang_cs *cs = spi->controller_state; struct spi_bitbang *bitbang; - unsigned long flags; bitbang = spi_master_get_devdata(spi->master); @@ -210,12 +209,11 @@ int spi_bitbang_setup(struct spi_device *spi) */ /* deselect chip (low or high) */ - spin_lock_irqsave(&bitbang->lock, flags); - if (!bitbang->busy) { + if (mutex_trylock(&bitbang->lock)) { bitbang->chipselect(spi, BITBANG_CS_INACTIVE); ndelay(cs->nsecs); + mutex_unlock(&bitbang->lock); } - spin_unlock_irqrestore(&bitbang->lock, flags); return 0; } @@ -252,20 +250,6 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t) * transfer-at-a-time ones to leverage dma or fifo hardware. */ -static int spi_bitbang_prepare_hardware(struct spi_master *spi) -{ - struct spi_bitbang *bitbang; - unsigned long flags; - - bitbang = spi_master_get_devdata
RE: [RFC 0/2] VFIO: Add virtual MSI doorbell support.
> -Original Message- > From: Pranavkumar Sawargaonkar [mailto:pranavku...@linaro.org] > Sent: Tuesday, August 04, 2015 11:18 AM > To: Bhushan Bharat-R65777 > Cc: k...@vger.kernel.org; Alex Williamson; kvm...@lists.cs.columbia.edu; > linux-arm-ker...@lists.infradead.org; linux-kernel@vger.kernel.org; > christoffer.d...@linaro.org; marc.zyng...@arm.com; will.dea...@arm.com; > bhelg...@google.com; a...@arndb.de; rob.herr...@linaro.org; > eric.au...@linaro.org; patc...@apm.com; Yoder Stuart-B08248 > Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support. > > Hi Bharat, > > On 28 July 2015 at 23:28, Alex Williamson > wrote: > > On Tue, 2015-07-28 at 17:23 +, Bhushan Bharat wrote: > >> Hi Alex, > >> > >> > -Original Message- > >> > From: Alex Williamson [mailto:alex.william...@redhat.com] > >> > Sent: Tuesday, July 28, 2015 9:52 PM > >> > To: Pranavkumar Sawargaonkar > >> > Cc: k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; linux-arm- > >> > ker...@lists.infradead.org; linux-kernel@vger.kernel.org; > >> > christoffer.d...@linaro.org; marc.zyng...@arm.com; > >> > will.dea...@arm.com; bhelg...@google.com; a...@arndb.de; > >> > rob.herr...@linaro.org; eric.au...@linaro.org; patc...@apm.com; > >> > Bhushan Bharat-R65777; Yoder > >> > Stuart-B08248 > >> > Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support. > >> > > >> > On Fri, 2015-07-24 at 14:33 +0530, Pranavkumar Sawargaonkar wrote: > >> > > In current VFIO MSI/MSI-X implementation, linux host kernel > >> > > allocates MSI/MSI-X vectors when userspace requests through vfio > ioctls. > >> > > Vfio creates irqfd mappings to notify MSI/MSI-X interrupts to the > >> > > userspace when raised. > >> > > Guest OS will see emulated MSI/MSI-X controller and receives an > >> > > interrupt when kernel notifies the same via irqfd. > >> > > > >> > > Host kernel allocates MSI/MSI-X using standard linux routines > >> > > like > >> > > pci_enable_msix_range() and pci_enable_msi_range(). > >> > > These routines along with requset_irq() in host kernel sets up > >> > > MSI/MSI-X vectors with Physical MSI/MSI-X addresses provided by > >> > > interrupt controller driver in host kernel. > >> > > > >> > > This means when a device is assigned with the guest OS, MSI/MSI-X > >> > > addresses present in PCIe EP are the PAs programmed by the host > >> > > linux > >> > kernel. > >> > > > >> > > In x86 MSI/MSI-X physical address range is reserved and iommu is > >> > > aware about these addreses and transalation is bypassed for these > address range. > >> > > > >> > > Unlike x86, ARM/ARM64 does not reserve MSI/MSI-X Physical address > >> > > range and all the transactions including MSI go through > >> > > iommu/smmu > >> > without bypass. > >> > > This requires extending current vfio MSI layer with additional > >> > > functionality for ARM/ARM64 by 1. Programing IOVA (referred as a > >> > > MSI virtual doorbell address) > >> > >in device's MSI vector as a MSI address. > >> > >This IOVA will be provided by the userspace based on the > >> > >MSI/MSI-X addresses reserved for the guest. > >> > > 2. Create an IOMMU mapping between this IOVA and > >> > >Physical address (PA) assigned to the MSI vector. > >> > > > >> > > This RFC is proposing a solution for MSI/MSI-X passthrough for > >> > ARM/ARM64. > >> > > >> > > >> > Hi Pranavkumar, > >> > > >> > Freescale has the same, or very similar, need, so any solution in > >> > this space will need to work for both ARM and powerpc. I'm not a > >> > big fan of this approach as it seems to require the user to > >> > configure MSI/X via ioctl and then call a separate ioctl mapping > >> > the doorbells. That's more code for the user, more code to get > >> > wrong and potentially a gap between configuring MSI/X and enabling > mappings where we could see IOMMU faults. > >> > > >> > If we know that doorbell mappings are required, why can't we set > >> > aside a bank of IOVA space and have them mapped automatically as > >> > MSI/X is being configured? Then the user's need for special > >> > knowledge and handling of this case is limited to setup. The IOVA > >> > space will be mapped and used as needed, we only need the user to > >> > specify the IOVA space reserved for this. Thanks, > >> > >> We probably need a mix of both to support Freescale PowerPC and ARM > >> based machines. > >> In this mix mode kernel vfio driver will reserve some IOVA for > >> mapping MSI page/s. > > > > If vfio is reserving pages independently from the user, this becomes > > what Marc called "shaping" the VM and what x86 effectively does. An > > interface extension should expose these implicit regions so the user > > can avoid them for DMA memory mapping. > > > >> If any other iova mapping will overlap with this then it will return > >> error and user-space. Ideally this should be choosen in such a way > >> that it never overlap, which is easy on some systems but can be > >> tricky on some other system like Freescale Pow
Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame
Hi David, On Aug 4, 2015, at 1:21 AM, David da...@davemloft.net wrote: > From: Vivien Didelot > Date: Sun, 2 Aug 2015 21:46:02 -0400 > >> If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX, >> an EDSA frame is prepended with a 802.1q header once queued. >> >> To fix this, push the VLAN tag to the payload if present, before >> checking the frame protocol. >> >> [note: we may prefer to access directly VLAN TCI from hwaccel frames, >> but this approach is simpler.] >> >> Signed-off-by: Vivien Didelot > > This is a bug fix so should target 'net', but you generated the patch > against 'net-next'. > > In any event, you should be explicit about the tree you are targetting > in order to not waste my time like this, by simply specifying the > tree in your "[PATCH xxx]" text in your subject line. Either > "[PATCH net]" or "[PATCH net-next]". Dully noted. Should I resend it? Thanks, -v -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime
On Tue, Aug 04, 2015 at 05:54:51AM +0200, Borislav Petkov wrote: > On Mon, Aug 03, 2015 at 11:45:24AM -0700, Andy Lutomirski wrote: > > P.P.P.S. Who thought that IRET faults unmasking NMIs made any sense > > whatsoever when NMIs run on an IST stack? Seriously, people? > > What happened with asking Intel for a sane IRET-NG? > > Should be relatively easy - take the current IRET microcode, get rid > of the nasty crap, allocate a new opcode and done. Validation should > actually have *less* to do and can reuse all current test cases. Even easier, just add a few flags (probably 2 or 3 only) that IRET can check to adjust its behaviour. Basically "don't re-enable NMIs yet", maybe something to adjust the behaviour on bad CS/SS/SP/IP and a few such things could possibly help. Maybe all of this could be summarized as a single flag "I'm in a fault handler". Willy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/4] net: switchdev: change fdb addr for a byte array
The address in the switchdev_obj_fdb structure is currently represented as a pointer. Replacing it for a 6-byte array allows switchdev to carry addresses directly read from hardware registers, not stored by the switch chip driver (as in Rocker). Signed-off-by: Vivien Didelot --- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/switchdev.h | 2 +- net/bridge/br_fdb.c | 2 +- net/switchdev/switchdev.c| 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 4cd5a71..faa5db0 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4543,7 +4543,7 @@ static int rocker_port_fdb_dump(const struct rocker_port *rocker_port, hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, found, entry) { if (found->key.pport != rocker_port->pport) continue; - fdb->addr = found->key.addr; + memcpy(fdb->addr, found->key.addr, ETH_ALEN); fdb->vid = rocker_port_vlan_to_vid(rocker_port, found->key.vlan_id); err = obj->cb(rocker_port->dev, obj); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 89da893..e90e1a0 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -70,7 +70,7 @@ struct switchdev_obj { u32 tb_id; } ipv4_fib; struct switchdev_obj_fdb { /* PORT_FDB */ - const unsigned char *addr; + u8 addr[ETH_ALEN]; u16 vid; } fdb; } u; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875d..2c64b6a 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -136,11 +136,11 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = f->addr.addr, .vid = f->vlan_id, }, }; + memcpy(obj.u.fdb.addr, f->addr.addr, ETH_ALEN); switchdev_port_obj_del(f->dst->dev, &obj); } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 33bafa2..28786e8 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -742,11 +742,11 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + memcpy(obj.u.fdb.addr, addr, ETH_ALEN); return switchdev_port_obj_add(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -769,11 +769,11 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct switchdev_obj obj = { .id = SWITCHDEV_OBJ_PORT_FDB, .u.fdb = { - .addr = addr, .vid = vid, }, }; + memcpy(obj.u.fdb.addr, addr, ETH_ALEN); return switchdev_port_obj_del(dev, &obj); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/4] net: dsa: add support for switchdev FDB objects
Remove the fdb_{add,del,getnext} function pointer in favor of new port_fdb_{add,del,getnext}. Implement the switchdev_port_obj_{add,del,dump} functions in DSA to support the SWITCHDEV_OBJ_PORT_FDB objects. These functions are called from switchdev_port_bridge_{get,set,del}link. Signed-off-by: Vivien Didelot --- drivers/net/dsa/mv88e6171.c | 3 - drivers/net/dsa/mv88e6352.c | 3 - include/net/dsa.h | 16 ++-- net/dsa/slave.c | 221 4 files changed, 129 insertions(+), 114 deletions(-) diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 1c78084..cfa21ed 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -116,9 +116,6 @@ struct dsa_switch_driver mv88e6171_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, - .fdb_add= mv88e6xxx_port_fdb_add, - .fdb_del= mv88e6xxx_port_fdb_del, - .fdb_getnext= mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6171"); diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c index af210ef..eb4630f 100644 --- a/drivers/net/dsa/mv88e6352.c +++ b/drivers/net/dsa/mv88e6352.c @@ -341,9 +341,6 @@ struct dsa_switch_driver mv88e6352_switch_driver = { .port_join_bridge = mv88e6xxx_join_bridge, .port_leave_bridge = mv88e6xxx_leave_bridge, .port_stp_update= mv88e6xxx_port_stp_update, - .fdb_add= mv88e6xxx_port_fdb_add, - .fdb_del= mv88e6xxx_port_fdb_del, - .fdb_getnext= mv88e6xxx_port_fdb_getnext, }; MODULE_ALIAS("platform:mv88e6172"); diff --git a/include/net/dsa.h b/include/net/dsa.h index fbca63b..a090c8a 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -296,12 +296,16 @@ struct dsa_switch_driver { u32 br_port_mask); int (*port_stp_update)(struct dsa_switch *ds, int port, u8 state); - int (*fdb_add)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_del)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - int (*fdb_getnext)(struct dsa_switch *ds, int port, - unsigned char *addr, bool *is_static); + + /* +* Forwarding database +*/ + int (*port_fdb_add)(struct dsa_switch *ds, int port, u16 vid, + u8 addr[ETH_ALEN]); + int (*port_fdb_del)(struct dsa_switch *ds, int port, u16 vid, + u8 addr[ETH_ALEN]); + int (*port_fdb_getnext)(struct dsa_switch *ds, int port, u16 *vid, + u8 addr[ETH_ALEN], bool *is_static); }; void register_switch_driver(struct dsa_switch_driver *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0010c69..0f99a17 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "dsa_priv.h" /* slave mii_bus handling ***/ @@ -200,105 +201,6 @@ out: return 0; } -static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], -struct net_device *dev, -const unsigned char *addr, u16 vid, u16 nlm_flags) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_add) - ret = ds->drv->fdb_add(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], -struct net_device *dev, -const unsigned char *addr, u16 vid) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->parent; - int ret = -EOPNOTSUPP; - - if (ds->drv->fdb_del) - ret = ds->drv->fdb_del(ds, p->port, addr, vid); - - return ret; -} - -static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb, - const unsigned char *addr, u16 vid, - bool is_static, - u32 portid, u32 seq, int type, - unsigned int flags) -{ - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1= 0; - ndm->ndm_pad2= 0; - ndm->ndm_flags = NTF_EXT_LEARNED;
[PATCH 2/4] net: switchdev: support static FDB addresses
This patch adds a is_static boolean to the switchdev_obj_fdb structure, in order to set the ndm_state to either NUD_NOARP or NUD_REACHABLE. Signed-off-by: Vivien Didelot --- include/net/switchdev.h | 1 + net/switchdev/switchdev.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index e90e1a0..0e296b8 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -72,6 +72,7 @@ struct switchdev_obj { struct switchdev_obj_fdb { /* PORT_FDB */ u8 addr[ETH_ALEN]; u16 vid; + bool is_static; } fdb; } u; }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 28786e8..b75897c 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_flags = NTF_SELF; ndm->ndm_type= 0; ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = NUD_REACHABLE; + ndm->ndm_state = obj->u.fdb.is_static ? NUD_NOARP : NUD_REACHABLE; if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) goto nla_put_failure; -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
net: dsa: support switchdev FDB objects
This patchset refactors the DSA and mv88e6xxx code to use the switchdev FDB objects. The first two patches add minor but necessary changes to switchdev, the third one implements the switchdev glue in DSA for FDB routines, and the forth one refactors the FDB access functions in the mv88e6xxx code. Below is an example (ports 0-2 belongs to br0, ports 3-4 belongs to br1): # bridge fdb add 3c:97:0e:11:30:6e dev swp2 # bridge fdb add 3c:97:0e:11:40:78 dev swp3 # bridge fdb add 3c:97:0e:11:50:86 dev swp4 # bridge fdb del 3c:97:0e:11:40:78 dev swp3 # bridge fdb 01:00:5e:00:00:01 dev eth0 self permanent 01:00:5e:00:00:01 dev eth1 self permanent 00:50:d2:10:78:15 dev swp0 master br0 permanent 3c:97:0e:11:30:6e dev swp2 self static 00:50:d2:10:78:15 dev swp3 master br1 permanent 3c:97:0e:11:50:86 dev swp4 self static # cat /sys/kernel/debug/dsa0/atu # DB T/P Vec State Addr # 001 Port 004 e 3c:97:0e:11:30:6e # 004 Port 010 e 3c:97:0e:11:50:86 For the 88E6xxx switches, FIDs 1 to num_ports will be reserved for non-bridged ports and bridge groups, and the remaining will be later used by VLANs. This change is necessary to welcome the support for hardware VLANs (which will follow soon). Cheers, -v Vivien Didelot (4): net: switchdev: change fdb addr for a byte array net: switchdev: support static FDB addresses net: dsa: add support for switchdev FDB objects net: dsa: mv88e6xxx: refactor FDB routines drivers/net/dsa/mv88e6171.c | 6 +- drivers/net/dsa/mv88e6352.c | 6 +- drivers/net/dsa/mv88e6xxx.c | 205 ++-- drivers/net/dsa/mv88e6xxx.h | 31 +++-- drivers/net/ethernet/rocker/rocker.c | 2 +- include/net/dsa.h| 16 ++- include/net/switchdev.h | 3 +- net/bridge/br_fdb.c | 2 +- net/dsa/slave.c | 221 +++ net/switchdev/switchdev.c| 6 +- 10 files changed, 308 insertions(+), 190 deletions(-) -- 2.4.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] perf: Clear MSRs on kexec
On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote: > On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote: > > hi, > > I'm getting following message on the kdump kernel start > > > > Broken BIOS detected, complain to your hardware vendor.\ > > [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0) > > > > it seems to be caused by NMI watchdog being configured > > and fixed counter values stays in MSRs, which triggers > > warning in check_hw_exists and disables perf support > > in kdump kernel.. which probably does not hurt ;-) > > > > zeroing MSRs during kdump shutdown seems to work (attached) > > but I'm not sure thats correct place for kdump perf callback > > Right, but why bother? All that kernel needs to do is write a memory > dump to someplace and reboot, right? The less you do, the less can go > wrong. well, I was hunting that 'Broken BIOS..' message which is wrong I wouldn't think anyone wants to use perf under kdump kernel, but you never know ;-) jirka -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 3/3] cpuidle/coupled: Add sanity check for safe_state_index
From: Xunlei Pang Since we're using cpuidle_driver::safe_state_index directly as the target state index, it's better to add the sanity check at the point of registering the driver. Signed-off-by: Xunlei Pang --- drivers/cpuidle/driver.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 5db1478..def299e 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -223,10 +223,23 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} static int __cpuidle_register_driver(struct cpuidle_driver *drv) { int ret; +#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED + int i; +#endif if (!drv || !drv->state_count) return -EINVAL; +#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED + for (i = drv->state_count - 1; i >= 0; i--) { + if (cpuidle_state_is_coupled(drv, i) && + (drv->safe_state_index == i || +drv->safe_state_index < 0 || +drv->safe_state_index >= drv->state_count)) + return -EINVAL; + } +#endif + if (cpuidle_disabled()) return -ENODEV; -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 1/3] cpuidle/coupled: Remove cpuidle_device::safe_state_index
From: Xunlei Pang cpuidle_device::safe_state_index need to be initialized before use, it should be the same as cpuidle_driver::safe_state_index. We tackled this issue by removing the safe_state_index from the cpuidle_device structure and use the one in the cpuidle_driver structure instead. Suggested-by: Daniel Lezcano Signed-off-by: Xunlei Pang --- drivers/cpuidle/coupled.c | 4 ++-- include/linux/cpuidle.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 7936dce..6493e40 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -473,7 +473,7 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, return entered_state; } entered_state = cpuidle_enter_state(dev, drv, - dev->safe_state_index); + drv->safe_state_index); local_irq_disable(); } @@ -521,7 +521,7 @@ retry: } entered_state = cpuidle_enter_state(dev, drv, - dev->safe_state_index); + drv->safe_state_index); local_irq_disable(); } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d075d34..786ad32 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -84,7 +84,6 @@ struct cpuidle_device { struct list_headdevice_list; #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED - int safe_state_index; cpumask_t coupled_cpus; struct cpuidle_coupled *coupled; #endif -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/3] cpuidle/coupled: Remove redundant 'dev' argument of cpuidle_state_is_coupled()
From: Xunlei Pang For cpuidle_state_is_coupled(), 'dev' is not used, so remove it. Signed-off-by: Xunlei Pang --- drivers/cpuidle/coupled.c | 4 +--- drivers/cpuidle/cpuidle.c | 4 ++-- drivers/cpuidle/cpuidle.h | 7 +++ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 6493e40..1523e2d 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -176,14 +176,12 @@ void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a) /** * cpuidle_state_is_coupled - check if a state is part of a coupled set - * @dev: struct cpuidle_device for the current cpu * @drv: struct cpuidle_driver for the platform * @state: index of the target state in drv->states * * Returns true if the target state is coupled with cpus besides this one */ -bool cpuidle_state_is_coupled(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int state) +bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state) { return drv->states[state].flags & CPUIDLE_FLAG_COUPLED; } diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 3325393..17a6dc0 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -214,7 +214,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, tick_broadcast_exit(); } - if (!cpuidle_state_is_coupled(dev, drv, entered_state)) + if (!cpuidle_state_is_coupled(drv, entered_state)) local_irq_enable(); diff = ktime_to_us(ktime_sub(time_end, time_start)); @@ -263,7 +263,7 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { - if (cpuidle_state_is_coupled(dev, drv, index)) + if (cpuidle_state_is_coupled(drv, index)) return cpuidle_enter_state_coupled(dev, drv, index); return cpuidle_enter_state(dev, drv, index); } diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index ee97e96..178c5ad 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -34,15 +34,14 @@ extern int cpuidle_add_sysfs(struct cpuidle_device *dev); extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED -bool cpuidle_state_is_coupled(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int state); +bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state); int cpuidle_enter_state_coupled(struct cpuidle_device *dev, struct cpuidle_driver *drv, int next_state); int cpuidle_coupled_register_device(struct cpuidle_device *dev); void cpuidle_coupled_unregister_device(struct cpuidle_device *dev); #else -static inline bool cpuidle_state_is_coupled(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int state) +static inline +bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state) { return false; } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support.
Hi Bharat, On 28 July 2015 at 23:28, Alex Williamson wrote: > On Tue, 2015-07-28 at 17:23 +, Bhushan Bharat wrote: >> Hi Alex, >> >> > -Original Message- >> > From: Alex Williamson [mailto:alex.william...@redhat.com] >> > Sent: Tuesday, July 28, 2015 9:52 PM >> > To: Pranavkumar Sawargaonkar >> > Cc: k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; linux-arm- >> > ker...@lists.infradead.org; linux-kernel@vger.kernel.org; >> > christoffer.d...@linaro.org; marc.zyng...@arm.com; will.dea...@arm.com; >> > bhelg...@google.com; a...@arndb.de; rob.herr...@linaro.org; >> > eric.au...@linaro.org; patc...@apm.com; Bhushan Bharat-R65777; Yoder >> > Stuart-B08248 >> > Subject: Re: [RFC 0/2] VFIO: Add virtual MSI doorbell support. >> > >> > On Fri, 2015-07-24 at 14:33 +0530, Pranavkumar Sawargaonkar wrote: >> > > In current VFIO MSI/MSI-X implementation, linux host kernel allocates >> > > MSI/MSI-X vectors when userspace requests through vfio ioctls. >> > > Vfio creates irqfd mappings to notify MSI/MSI-X interrupts to the >> > > userspace when raised. >> > > Guest OS will see emulated MSI/MSI-X controller and receives an >> > > interrupt when kernel notifies the same via irqfd. >> > > >> > > Host kernel allocates MSI/MSI-X using standard linux routines like >> > > pci_enable_msix_range() and pci_enable_msi_range(). >> > > These routines along with requset_irq() in host kernel sets up >> > > MSI/MSI-X vectors with Physical MSI/MSI-X addresses provided by >> > > interrupt controller driver in host kernel. >> > > >> > > This means when a device is assigned with the guest OS, MSI/MSI-X >> > > addresses present in PCIe EP are the PAs programmed by the host linux >> > kernel. >> > > >> > > In x86 MSI/MSI-X physical address range is reserved and iommu is aware >> > > about these addreses and transalation is bypassed for these address >> > > range. >> > > >> > > Unlike x86, ARM/ARM64 does not reserve MSI/MSI-X Physical address >> > > range and all the transactions including MSI go through iommu/smmu >> > without bypass. >> > > This requires extending current vfio MSI layer with additional >> > > functionality for ARM/ARM64 by 1. Programing IOVA (referred as a MSI >> > > virtual doorbell address) >> > >in device's MSI vector as a MSI address. >> > >This IOVA will be provided by the userspace based on the >> > >MSI/MSI-X addresses reserved for the guest. >> > > 2. Create an IOMMU mapping between this IOVA and >> > >Physical address (PA) assigned to the MSI vector. >> > > >> > > This RFC is proposing a solution for MSI/MSI-X passthrough for >> > ARM/ARM64. >> > >> > >> > Hi Pranavkumar, >> > >> > Freescale has the same, or very similar, need, so any solution in this >> > space >> > will need to work for both ARM and powerpc. I'm not a big fan of this >> > approach as it seems to require the user to configure MSI/X via ioctl and >> > then >> > call a separate ioctl mapping the doorbells. That's more code for the >> > user, >> > more code to get wrong and potentially a gap between configuring MSI/X >> > and enabling mappings where we could see IOMMU faults. >> > >> > If we know that doorbell mappings are required, why can't we set aside a >> > bank of IOVA space and have them mapped automatically as MSI/X is being >> > configured? Then the user's need for special knowledge and handling of >> > this >> > case is limited to setup. The IOVA space will be mapped and used as >> > needed, >> > we only need the user to specify the IOVA space reserved for this. Thanks, >> >> We probably need a mix of both to support Freescale PowerPC and ARM >> based machines. >> In this mix mode kernel vfio driver will reserve some IOVA for mapping >> MSI page/s. > > If vfio is reserving pages independently from the user, this becomes > what Marc called "shaping" the VM and what x86 effectively does. An > interface extension should expose these implicit regions so the user can > avoid them for DMA memory mapping. > >> If any other iova mapping will overlap with this then it will return >> error and user-space. Ideally this should be choosen in such a way >> that it never overlap, which is easy on some systems but can be tricky >> on some other system like Freescale PowerPC. This is not sufficient >> for at-least Freescale PowerPC based SOC. This is because of hardware >> limitation, where we need to fit this reserved iova address within >> aperture decided by user-space. So if we allow user-space to change >> this reserved iova address to a value decided by user-spece itself >> then we can support both ARM/PowerPC based solutions. > > Yes, that's my intention, to allow userspace to specify the reserved > region. I believe you have some additional restrictions on the number > of MSI banks available and whether MSI banks can be shared, but I would > hope that doesn't preclude a shared interface with ARM. > >> I have some implementation ready/tested with this approach and if this >> approach looks good then I ca
Re: [RFC][PATCH] ecryptfs: Allow only one instance per lower path
Tyler, Am 04.08.2015 um 01:07 schrieb Tyler Hicks: >> Okay, then I'd argument to give my patch a try although it is not the >> solution >> to the problem I've reported. :-) >> If you don't mind I'll resend with a proper changelog. > > That patch isn't correct since it assumes that all eCryptfs super blocks > are equal if the lower paths (and, ultimately, the lower inode) are > equal. However, the lower path is only one of many properties of an > eCryptfs superblock. For example, the second mount may have been > configured to use a different file encryption key. How would this work if I mount /foo using AES to /mnt_a and /foo again using 3DES to /mnt_b? Wouldn't both ecrytpfs instances kill each other's files? Thanks, //richard -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3] iio: adc: xilinx-xadc: Push interrupts into threaded context
On Fri, Jul 24, 2015 at 6:08 PM, Lars-Peter Clausen wrote: > Hi, > > Sorry, but I don't think this patch has been sufficiently tested against a > mainline kernel. The driver wont even probe the way it is right now. > > On 07/21/2015 01:14 AM, Xander Huff wrote: >> >> The driver currently registers a pair of irq handlers using >> request_threaded_irq(), however the synchronization mechanism between the >> hardirq and the threadedirq handler is a regular spinlock. > > > If everything runs in threaded context we don't really need the spinlock > anymore and can use the mutex throughout. that should be better from the performance point of view. > >> >> Unfortunately, this breaks PREEMPT_RT builds, where a spinlock can sleep, >> and is thus not able to be acquired from a hardirq handler. This patch >> gets >> rid of the hardirq handler and pushes all interrupt handling into the >> threaded context. > > > We actually might as well run everything in the hardirq handler (which will > be threaded in PREEMPT_RT). The reason why we have the threaded handler is > because xadc_handle_event() used to sleep, but it doesn't do this anymore. The point is why have the hard irq. If we use hardirq then not mutex can be used and spinlock will be busy. is there something i may be missing? > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
linux-next: Tree for Aug 4
Hi all, Changes since 20150803: The security tree gained a conflict against Linus' tree. Non-merge commits (relative to Linus' tree): 5232 5240 files changed, 257463 insertions(+), 119966 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc and an allmodconfig for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), it is also built with powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig and allyesconfig (this fails its final link) and i386, sparc, sparc64 and arm defconfig. Below is a summary of the state of the merge. I am currently merging 224 trees (counting Linus' and 32 trees of patches pending for Linus' tree). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwells...@canb.auug.org.au $ git checkout master $ git reset --hard stable Merging origin/master (7e884479bf50 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client) Merging fixes/master (c7e9ad7da219 Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip) Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on module install) Merging arc-current/for-curr (e4140819dadc ARC: signal handling robustify) Merging arm-current/fixes (3473f26592c1 ARM: 8405/1: VDSO: fix regression with toolchains lacking ld.bfd executable) Merging m68k-current/for-linus (1214c525484c m68k: Use for_each_sg()) Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached build errors) Merging mips-fixes/mips-fixes (1795cd9b3a91 Linux 3.16-rc5) Merging powerpc-fixes/fixes (b8d65e9662b1 powerpc/eeh-powernv: Fix unbalanced IRQ warning) Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2) Merging sparc/master (4a10a91756ef Merge branch 'upstream' of git://git.infradead.org/users/pcmoore/audit) Merging net/master (636dba8e12d7 act_mirred: avoid calling tcf_hash_release() when binding) Merging ipsec/master (158cd4af8ded packet: missing dev_put() in packet_do_bind()) Merging sound-current/for-linus (8ec7cfce3762 ALSA: oxygen: Fix logical-not-parentheses warning) Merging pci-current/for-linus (c9ddbac9c891 PCI: Restore PCI_MSIX_FLAGS_BIRMASK definition) Merging wireless-drivers/master (741e3b9902d1 rtlwifi: rtl8723be: Add module parameter for MSI interrupts) Merging driver-core.current/driver-core-linus (cbfe8fa6cd67 Linux 4.2-rc4) Merging tty.current/tty-linus (cbfe8fa6cd67 Linux 4.2-rc4) Merging usb.current/usb-linus (0f79fd807a24 Merge tag 'fixes-for-v4.2-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus) Merging usb-gadget-fixes/fixes (c93e64e91248 usb: udc: core: add device_del() call to error pathway) Merging usb-serial-fixes/usb-linus (74472233233f USB: sierra: add 1199:68AB device ID) Merging staging.current/staging-linus (40c3ef9d2f14 staging: comedi: das1800: add missing break in switch) Merging char-misc.current/char-misc-linus (eaf7e98d43c1 Merge tag 'extcon-fixes-for-4.2-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon into char-misc-linus) Merging input-current/for-linus (073e570d7c2c Input: alps - only Dell laptops have separate button bits for v2 dualpoint sticks) Merging crypto-current/master (17fb874dee09 hwrng: core - correct error check of kthread_run call) Merging ide/master (d681f1166919 ide: remove deprecated use of pci api) Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test for PPC_PSERIES) Merging rr-fixes/fixes (fe0d34d242fa module: weaken locking assertion for oops path.) Merging vfio-fixes/for-linus (4bc94d5dc95d vfio: Fix lockdep issue) Merging kselftest-fixes/fixes (fee50f3c8427 selftests/futex: Fix futex_cmp_requeue_pi() error handling) Merging backlight-fixes/for-backlight-fixes (68feaca0b13e backlight: pwm: H
[PATCH v2 net-next 2/2] RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.
Register pernet subsys init/stop functions that will set up and tear down per-net RDS-TCP listen endpoints. Unregister pernet subusys functions on 'modprobe -r' to clean up these end points. Enable keepalive on both accept and connect socket endpoints. The keepalive timer expiration will ensure that client socket endpoints will be removed as appropriate from the netns when an interface is removed from a namespace. Register a device notifier callback that will clean up all sockets (and thus avoid the need to wait for keepalive timeout) when the loopback device is unregistered from the netns indicating that the netns is getting deleted. Signed-off-by: Sowmini Varadhan --- v2: net_device notifier for synchronous cleanup of sockets. net/rds/tcp.c | 163 - net/rds/tcp.h |7 ++- net/rds/tcp_connect.c |6 +- net/rds/tcp_listen.c | 38 +++- 4 files changed, 164 insertions(+), 50 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 98f5de3..339392b 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -35,6 +35,9 @@ #include #include #include +#include +#include +#include #include "rds.h" #include "tcp.h" @@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void) } } -static void rds_tcp_exit(void) -{ - rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); - rds_tcp_listen_stop(); - rds_tcp_destroy_conns(); - rds_trans_unregister(&rds_tcp_transport); - rds_tcp_recv_exit(); - kmem_cache_destroy(rds_tcp_conn_slab); -} -module_exit(rds_tcp_exit); +static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check= rds_tcp_laddr_check, @@ -281,6 +275,138 @@ struct rds_transport rds_tcp_transport = { .t_prefer_loopback = 1, }; +static int rds_tcp_netid; + +/* per-network namespace private data for this module */ +struct rds_tcp_net { + struct socket *rds_tcp_listen_sock; + struct work_struct rds_tcp_accept_w; +}; + +static void rds_tcp_accept_worker(struct work_struct *work) +{ + struct rds_tcp_net *rtn = container_of(work, + struct rds_tcp_net, + rds_tcp_accept_w); + + while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0) + cond_resched(); +} + +void rds_tcp_accept_work(struct sock *sk) +{ + struct net *net = sock_net(sk); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + queue_work(rds_wq, &rtn->rds_tcp_accept_w); +} + +static __net_init int rds_tcp_init_net(struct net *net) +{ + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); + if (!rtn->rds_tcp_listen_sock) { + pr_warn("could not set up listen sock\n"); + return -EAFNOSUPPORT; + } + INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); + return 0; +} + +static void __net_exit rds_tcp_exit_net(struct net *net) +{ + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + /* If rds_tcp_exit_net() is called as a result of netns deletion, +* the rds_tcp_kill_sock() device notifier would already have cleaned +* up the listen socket, thus there is no work to do in this function. +* +* If rds_tcp_exit_net() is called as a result of module unload, +* i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then +* we do need to clean up the listen socket here. +*/ + if (rtn->rds_tcp_listen_sock) { + rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + rtn->rds_tcp_listen_sock = NULL; + flush_work(&rtn->rds_tcp_accept_w); + } +} + +static struct pernet_operations rds_tcp_net_ops = { + .init = rds_tcp_init_net, + .exit = rds_tcp_exit_net, + .id = &rds_tcp_netid, + .size = sizeof(struct rds_tcp_net), +}; + +static void rds_tcp_kill_sock(struct net *net) +{ + struct rds_tcp_connection *tc, *_tc; + struct sock *sk; + struct list_head tmp_list; + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + rtn->rds_tcp_listen_sock = NULL; + flush_work(&rtn->rds_tcp_accept_w); + INIT_LIST_HEAD(&tmp_list); + spin_lock_irq(&rds_tcp_conn_lock); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + struct net *c_net = read_pnet(&tc->conn->c_net); + + if (net != c_net || !tc->t_sock) + continue; + list_del(&tc->t_tcp_node); + list_add_tail(&tc->t_tcp_node, &tmp_list); + } + spin_unlock_irq(&rds_tcp_conn_lock); + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { + sk = tc->t_s
[PATCH v2 net-next 1/2] RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net
Open the sockets calling sock_create_kern() with the correct struct net pointer, and use that struct net pointer when verifying the address passed to rds_bind(). Signed-off-by: Sowmini Varadhan --- v2: David Ahern comments. net/rds/bind.c|3 ++- net/rds/connection.c | 16 ++-- net/rds/ib.c |2 +- net/rds/ib_cm.c |5 +++-- net/rds/iw.c |2 +- net/rds/iw_cm.c |5 +++-- net/rds/rds.h | 23 +++ net/rds/send.c|3 ++- net/rds/tcp.c |4 ++-- net/rds/tcp_connect.c |3 ++- net/rds/tcp_listen.c | 16 net/rds/transport.c |4 ++-- 12 files changed, 59 insertions(+), 27 deletions(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 4ebd29c..dd666fb 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -185,7 +185,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ret = 0; goto out; } - trans = rds_trans_get_preferred(sin->sin_addr.s_addr); + trans = rds_trans_get_preferred(sock_net(sock->sk), + sin->sin_addr.s_addr); if (!trans) { ret = -EADDRNOTAVAIL; rds_remove_bound(rs); diff --git a/net/rds/connection.c b/net/rds/connection.c index da6da57..d4fecb2 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -117,7 +117,8 @@ static void rds_conn_reset(struct rds_connection *conn) * For now they are not garbage collected once they're created. They * are torn down as the module is removed, if ever. */ -static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, +static struct rds_connection *__rds_conn_create(struct net *net, + __be32 laddr, __be32 faddr, struct rds_transport *trans, gfp_t gfp, int is_outgoing) { @@ -157,6 +158,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, conn->c_faddr = faddr; spin_lock_init(&conn->c_lock); conn->c_next_tx_seq = 1; + rds_conn_net_set(conn, net); init_waitqueue_head(&conn->c_waitq); INIT_LIST_HEAD(&conn->c_send_queue); @@ -174,7 +176,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, * can bind to the destination address then we'd rather the messages * flow through loopback rather than either transport. */ - loop_trans = rds_trans_get_preferred(faddr); + loop_trans = rds_trans_get_preferred(net, faddr); if (loop_trans) { rds_trans_put(loop_trans); conn->c_loopback = 1; @@ -260,17 +262,19 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, return conn; } -struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, +struct rds_connection *rds_conn_create(struct net *net, + __be32 laddr, __be32 faddr, struct rds_transport *trans, gfp_t gfp) { - return __rds_conn_create(laddr, faddr, trans, gfp, 0); + return __rds_conn_create(net, laddr, faddr, trans, gfp, 0); } EXPORT_SYMBOL_GPL(rds_conn_create); -struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, +struct rds_connection *rds_conn_create_outgoing(struct net *net, + __be32 laddr, __be32 faddr, struct rds_transport *trans, gfp_t gfp) { - return __rds_conn_create(laddr, faddr, trans, gfp, 1); + return __rds_conn_create(net, laddr, faddr, trans, gfp, 1); } EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); diff --git a/net/rds/ib.c b/net/rds/ib.c index ba2dffe..1381422 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -317,7 +317,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ -static int rds_ib_laddr_check(__be32 addr) +static int rds_ib_laddr_check(struct net *net, __be32 addr) { int ret; struct rdma_cm_id *cm_id; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0da2a45..f40d8f5 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -448,8 +448,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, (unsigned long long)be64_to_cpu(lguid), (unsigned long long)be64_to_cpu(fguid)); - conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport, - GFP_KERNEL); + /* RDS/IB is not currently netns aware, thus init_net */ + conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr, + &rds_ib_transport, GFP_KERNEL); if (IS_ERR(conn)) {
[PATCH v2 net-next 0/2] RDS-TCP: Network namespace support
This patch series contains the set of changes to correctly set up the infra for PF_RDS sockets that use TCP as the transport in multiple network namespaces. Patch 1 in the series is the minimal set of changes to allow a single instance of RDS-TCP to run in any (i.e init_net or other) net namespace. The changes in this patch set ensure that the execution of 'modprobe [-r] rds_tcp' sets up the kernel TCP sockets relative to the current netns, so that RDS applications can send/recv packets from that netns, and the netns can later be deleted cleanly. Patch 2 of the series further allows multiple RDS-TCP instances, one per network namespace. The changes in this patch allows dynamic creation/tear-down of RDS-TCP client and server sockets across all current and future namespaces. v2 changes from RFC sent out earlier: David Ahern comments in patch 1, net_device notifier in patch 2, patch 3 broken off and submitted separately. Sowmini Varadhan (2): Make RDS-TCP work correctly when it is set up in a netns other than init_net Support multiple RDS-TCP listen endpoints, one per netns. net/rds/bind.c|3 +- net/rds/connection.c | 16 +++-- net/rds/ib.c |2 +- net/rds/ib_cm.c |5 +- net/rds/iw.c |2 +- net/rds/iw_cm.c |5 +- net/rds/rds.h | 23 ++- net/rds/send.c|3 +- net/rds/tcp.c | 167 +++- net/rds/tcp.h |7 ++- net/rds/tcp_connect.c |9 ++- net/rds/tcp_listen.c | 40 net/rds/transport.c |4 +- 13 files changed, 216 insertions(+), 70 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: perf eBPF patch ordering. was: Re: perf test LLVM was: Re: [GIT PULL 00/39] perf tools: filtering events using eBPF programs
Hi Arnaldo, The following changes since commit 922cc21746202956acb41c89a6190bb50805fa31: perf tools: Introduce llvm config options (2015-07-31 12:17:50 -0300) are available in the git repository at: https://github.com/WangNan0/linux.git ebpf for you to fetch changes up to d85bf4b6470b8d860bbae25418e5ae3ccd9711e8: perf tools: Support attach BPF program on uprobe events (2015-08-04 04:59:20 +) The new cset has following improvements: 1. Improve error message: now don't dump LLVM environment setting messages if clang is found. Also, describe how to pre-compile .c file into .o. See: perf tools: Call clang to compile C source to object code https://github.com/WangNan0/linux/commit/264676a5b922aaf1e9be3800fe06d5b67b06cd12 2. Reorder patches, so when 'perf record' is able to accept '--event file.c', the BPF filter should work. Also, an example BPF script file is provided, and the compilation method is described in commit message. See: perf tools: Infrastructure for compiling scriptlets when passing '.c' to --event https://github.com/WangNan0/linux/commit/eca622f4a88e1a791fc2405c398256ad572eba54 3. Introduce 'perf test BPF', which uses previous introduced scriptlet, fork a 'perf record' to utilise it and uses 'perf report' to check the result. See: perf tests: Enforce LLVM test for BPF test https://github.com/WangNan0/linux/commit/a7cdab453863c580446dc2c3a3f3a86f21b770ce perf test: Enable 'perf test' run as test targets https://github.com/WangNan0/linux/commit/b14f2627e95d348be5ec19bd24a5117e8c2ffe46 and perf test: Add 'perf test BPF' https://github.com/WangNan0/linux/commit/8414217dbfa57df4dbb55642dc26205e1c7cbdf1 4. Fix a bug that if the filename doesn't contain '/' it is recongnised as event name then failed to be applied by order adjusting in parse-events.l: bring {bpf_object} and {bpf_source} ahead. You need to pop 9 patches from your perf/ebpf tree and rebase my tree. However, until patch "perf tools: Enable passing bpf object file to --event" the changes is tiny. Please check. Thank you. On 2015/8/4 3:49, Arnaldo Carvalho de Melo wrote: Em Mon, Aug 03, 2015 at 01:11:16PM -0300, Arnaldo Carvalho de Melo escreveu: ERROR:unable to compile ./foo.c Hint:Check error message shown above. LLVM 3.7 or newer is required. Which can be found from http://llvm.org You may want to try git trunk: git clone http://llvm.org/git/llvm.git and or: perf record [] -- [] -e, --eventevent selector. use 'perf list' to list available events [root@felicio ~]# Now to find a hello.c BPF scriptlet... So, we do not need to provide all this LLVM environment installation hints when we get to any error, i.e. the one above was just becasuse "./foo.c" doesn't exist, clang ran successfully, so no need for telling the user how to install it. The following error also shouldn't emit those hints: [root@felicio ~]# perf record -e ./lock_page.bpf.c sleep 1 /root/./lock_page.bpf.c:1:5: error: expected parameter declarator SEC("lock_page=__lock_page page->flags") ^ /root/./lock_page.bpf.c:1:5: error: expected ')' /root/./lock_page.bpf.c:1:4: note: to match this '(' SEC("lock_page=__lock_page page->flags") ^ /root/./lock_page.bpf.c:1:1: warning: type specifier missing, defaults to 'int' [-Wimplicit-int] SEC("lock_page=__lock_page page->flags") ^ /root/./lock_page.bpf.c:1:41: error: expected ';' after top level declarator SEC("lock_page=__lock_page page->flags") ^ ; /root/./lock_page.bpf.c:2:22: warning: declaration of 'struct pt_regs' will not be visible outside of this function [-Wvisibility] int lock_page(struct pt_regs *ctx, int err, unsigned long flags) ^ 2 warnings and 3 errors generated. ERROR: unable to compile ./lock_page.bpf.c Hint:Check error message shown above. LLVM 3.7 or newer is required. Which can be found from http://llvm.org You may want to try git trunk: git clone http://llvm.org/git/llvm.git and git clone http://llvm.org/git/clang.git Or fetch the latest clang/llvm 3.7 from pre-built llvm packages for debian/ubuntu: http://llvm.org/apt If you are using old version of clang, change 'clang-bpf-cmd-template' option in [llvm] section of ~/.perfconfig to: "$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ -working-directory $WORKING_DIR -c $CLANG_SOURCE \ -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -" (Replace /path/to/llc with path to your llc) Hint:You can also pre-compile it into .o invalid or unsupported event: './
Re: [PATCH] net: dsa: fix EDSA frame from hwaccel frame
From: Vivien Didelot Date: Sun, 2 Aug 2015 21:46:02 -0400 > If the underlying network device features NETIF_F_HW_VLAN_CTAG_TX, > an EDSA frame is prepended with a 802.1q header once queued. > > To fix this, push the VLAN tag to the payload if present, before > checking the frame protocol. > > [note: we may prefer to access directly VLAN TCI from hwaccel frames, > but this approach is simpler.] > > Signed-off-by: Vivien Didelot This is a bug fix so should target 'net', but you generated the patch against 'net-next'. In any event, you should be explicit about the tree you are targetting in order to not waste my time like this, by simply specifying the tree in your "[PATCH xxx]" text in your subject line. Either "[PATCH net]" or "[PATCH net-next]". Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] cpuidle/coupled: Init cpuidle_device::safe_state_index
Hi Daniel, Daniel Lezcano wrote 2015-08-04 AM 12:22:54: > Re: [PATCH] cpuidle/coupled: Init cpuidle_device::safe_state_index > > On 07/23/2015 02:31 PM, Xunlei Pang wrote: > > From: Xunlei Pang > > > > cpuidle_device::safe_state_index need to be initialized before use, > > so assign the driver's safe_state_index to it. > > > > Signed-off-by: Xunlei Pang > > --- > > drivers/cpuidle/cpuidle.c | 2 ++ > > 1 file changed, 2 insertions(+) > > > > diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c > > index e8e2775..ed5c8efe 100644 > > --- a/drivers/cpuidle/cpuidle.c > > +++ b/drivers/cpuidle/cpuidle.c > > @@ -585,6 +585,8 @@ int cpuidle_register(struct cpuidle_driver *drv, > > */ > > if (coupled_cpus) > >device->coupled_cpus = *coupled_cpus; > > + > > + device->safe_state_index = drv->safe_state_index; > > Hey, good catch. We are lucky the safe_state_index is always zero. > > I think we can simplify the code by removing the safe_state_index from > the cpuidle_device structure and use the one in the cpuidle_driver > structure in coupled.c Will do, thanks! Regards, -Xunlei ZTE Information Security Notice: The information contained in this mail (and any attachment transmitted herewith) is privileged and confidential and is intended for the exclusive use of the addressee(s). If you are not an intended recipient, any disclosure, reproduction, distribution or other dissemination or use of the information contained is strictly prohibited. If you have received this mail in error, please delete it and notify us immediately. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] smaps: fill missing fields for vma(VM_HUGETLB)
On Tue, Aug 04, 2015 at 02:55:30AM +, Naoya Horiguchi wrote: > On Wed, Jul 29, 2015 at 04:20:59PM -0700, Mike Kravetz wrote: > > On 07/29/2015 12:08 PM, David Rientjes wrote: > > >On Tue, 28 Jul 2015, Jörn Engel wrote: > > > > > >>Well, we definitely need something. Having a 100GB process show 3GB of > > >>rss is not very useful. How would we notice a memory leak if it only > > >>affects hugepages, for example? > > >> > > > > > >Since the hugetlb pool is a global resource, it would also be helpful to > > >determine if a process is mapping more than expected. You can't do that > > >just by adding a huge rss metric, however: if you have 2MB and 1GB > > >hugepages configured you wouldn't know if a process was mapping 512 2MB > > >hugepages or 1 1GB hugepage. > > > > > >That's the purpose of hugetlb_cgroup, after all, and it supports usage > > >counters for all hstates. The test could be converted to use that to > > >measure usage if configured in the kernel. > > > > > >Beyond that, I'm not sure how a per-hstate rss metric would be exported to > > >userspace in a clean way and other ways of obtaining the same data are > > >possible with hugetlb_cgroup. I'm not sure how successful you'd be in > > >arguing that we need separate rss counters for it. > > > > If I want to track hugetlb usage on a per-task basis, do I then need to > > create one cgroup per task? > > > > For example, suppose I have many tasks using hugetlb and the global pool > > is getting low on free pages. It might be useful to know which tasks are > > using hugetlb pages, and how many they are using. > > > > I don't actually have this need (I think), but it appears to be what > > Jörn is asking for. > > One possible way to get hugetlb metric in per-task basis is to walk page > table via /proc/pid/pagemap, and counting page flags for each mapped page > (we can easily do this with tools/vm/page-types.c like "page-types -p > -b huge"). This is obviously slower than just storing the counter as > in-kernel data and just exporting it, but might be useful in some situation. BTW, currently smaps doesn't report any meaningful info for vma(VM_HUGETLB). I wrote the following patch, which hopefully is helpful for your purpose. Thanks, Naoya Horiguchi --- From: Naoya Horiguchi Subject: [PATCH] smaps: fill missing fields for vma(VM_HUGETLB) Currently smaps reports many zero fields for vma(VM_HUGETLB), which is inconvenient when we want to know per-task or per-vma base hugetlb usage. This patch enables these fields by introducing smaps_hugetlb_range(). before patch: Size: 20480 kB Rss: 0 kB Pss: 0 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced:0 kB Anonymous: 0 kB AnonHugePages: 0 kB Swap: 0 kB KernelPageSize: 2048 kB MMUPageSize:2048 kB Locked:0 kB VmFlags: rd wr mr mw me de ht after patch: Size: 20480 kB Rss: 18432 kB Pss: 18432 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 18432 kB Referenced:18432 kB Anonymous: 18432 kB AnonHugePages: 0 kB Swap: 0 kB KernelPageSize: 2048 kB MMUPageSize:2048 kB Locked:0 kB VmFlags: rd wr mr mw me de ht Signed-off-by: Naoya Horiguchi --- fs/proc/task_mmu.c | 27 +++ 1 file changed, 27 insertions(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ca1e091881d4..c7218603306d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -610,12 +610,39 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) seq_putc(m, '\n'); } +#ifdef CONFIG_HUGETLB_PAGE +static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, +unsigned long addr, unsigned long end, +struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = walk->vma; + struct page *page = NULL; + + if (pte_present(*pte)) { + page = vm_normal_page(vma, addr, *pte); + } else if (is_swap_pte(*pte)) { + swp_entry_t swpent = pte_to_swp_entry(*pte); + + if (is_migration_entry(swpent)) + page = migration_entry_to_page(swpent); + } + if (page) + smaps_account(mss, page, huge_page_size(hstate_vma(vma)), + pte_young(*pte), pte_dirty(*pte)); + return 0; +} +#endif /* HUGETLB_PAGE */ + static int show_smap(struct seq_file *m, void *v, int is_pid) { struct vm_area_struct *vma = v; struct mem_size_stats mss; struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_ran
Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG
From: Joe Perches Date: Mon, 03 Aug 2015 21:02:21 -0700 > On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote: >> On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote: >> > This patch replaces calls to net_dbg_ratelimited when !DEBUG with >> > no_printk, keeping with the idiom of all the other debug print helpers. >> >> Makes sense, thanks Jason. > > Perhaps better still would be to use if (0) no_printk so that > the call and whatever argument calls the net_dbg_ratelimited > makes are completely eliminated. Agreed. Jason please respin your patch to work this way. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: 4.2-rc5 rcu stalls.
On 08/03/2015 06:03 PM, Paul E. McKenney wrote: >> > Ugh, that doesn't revert cleanly. Got something handy ? > I do not, but perhaps either Sasha or Frederic do. I've attached a revert courtesy of Peter. Thanks, Sasha include/linux/preempt.h | 12 kernel/sched/core.c | 34 +++--- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 84991f185173..3a93d4cdcce9 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -137,18 +137,6 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) -#define preempt_active_enter() \ -do { \ - preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ - barrier(); \ -} while (0) - -#define preempt_active_exit() \ -do { \ - barrier(); \ - preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ -} while (0) - #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..bd378bd21a0e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2983,7 +2983,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev) * - return from syscall or exception to user-space * - return from interrupt-handler to user-space * - * WARNING: must be called with preemption disabled! + * WARNING: all callers must re-check need_resched() afterward and reschedule + * accordingly in case an event triggered the need for rescheduling (such as + * an interrupt waking up a task) while preemption was disabled in __schedule(). */ static void __sched __schedule(void) { @@ -2992,6 +2994,7 @@ static void __sched __schedule(void) struct rq *rq; int cpu; + preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_note_context_switch(); @@ -3058,6 +3061,8 @@ static void __sched __schedule(void) } balance_callback(rq); + + sched_preempt_enable_no_resched(); } static inline void sched_submit_work(struct task_struct *tsk) @@ -3078,9 +3083,7 @@ asmlinkage __visible void __sched schedule(void) sched_submit_work(tsk); do { - preempt_disable(); __schedule(); - sched_preempt_enable_no_resched(); } while (need_resched()); } EXPORT_SYMBOL(schedule); @@ -3119,14 +3122,15 @@ void __sched schedule_preempt_disabled(void) static void __sched notrace preempt_schedule_common(void) { do { - preempt_active_enter(); + __preempt_count_add(PREEMPT_ACTIVE); __schedule(); - preempt_active_exit(); + __preempt_count_sub(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity * between schedule and now. */ + barrier(); } while (need_resched()); } @@ -3172,13 +3176,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) return; do { - /* - * Use raw __prempt_count() ops that don't call function. - * We can't call functions before disabling preemption which - * disarm preemption tracing recursions. - */ - __preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); - barrier(); + __preempt_count_add(PREEMPT_ACTIVE); /* * Needs preempt disabled in case user_exit() is traced * and the tracer calls preempt_enable_notrace() causing @@ -3188,8 +3186,8 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) __schedule(); exception_exit(prev_ctx); + __preempt_count_sub(PREEMPT_ACTIVE); barrier(); - __preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); } while (need_resched()); } EXPORT_SYMBOL_GPL(preempt_schedule_notrace); @@ -3212,11 +3210,17 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) prev_state = exception_enter(); do { - preempt_active_enter(); + __preempt_count_add(PREEMPT_ACTIVE); local_irq_enable(); __schedule(); local_irq_disable(); - preempt_active_exit(); + __preempt_count_sub(PREEMPT_ACTIVE); + + /* + * Check again in case we missed a preemption opportunity + * between schedule and now. + */ + barrier(); } while (need_resched()); exception_exit(prev_state);
[PATCH 1/2] x86/lguest: clean up lguest_setup_irq.
We make it static and hoist it higher in the file for the next patch. We also give a nice panic if it fails during boot. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 43 ++- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 433e5a7dd37f..f38b7e8a88d2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -835,6 +835,26 @@ static struct irq_chip lguest_irq_controller = { .irq_unmask = enable_lguest_irq, }; +/* + * Interrupt descriptors are allocated as-needed, but low-numbered ones are + * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it + * tells us the irq is already used: other errors (ie. ENOMEM) we take + * seriously. + */ +static int lguest_setup_irq(unsigned int irq) +{ + int err; + + /* Returns -ve error or vector number. */ + err = irq_alloc_desc_at(irq, 0); + if (err < 0 && err != -EEXIST) + return err; + + irq_set_chip_and_handler_name(irq, &lguest_irq_controller, + handle_level_irq, "level"); + return 0; +} + static int lguest_enable_irq(struct pci_dev *dev) { u8 line = 0; @@ -879,26 +899,6 @@ static void __init lguest_init_IRQ(void) } /* - * Interrupt descriptors are allocated as-needed, but low-numbered ones are - * reserved by the generic x86 code. So we ignore irq_alloc_desc_at if it - * tells us the irq is already used: other errors (ie. ENOMEM) we take - * seriously. - */ -int lguest_setup_irq(unsigned int irq) -{ - int err; - - /* Returns -ve error or vector number. */ - err = irq_alloc_desc_at(irq, 0); - if (err < 0 && err != -EEXIST) - return err; - - irq_set_chip_and_handler_name(irq, &lguest_irq_controller, - handle_level_irq, "level"); - return 0; -} - -/* * Time. * * It would be far better for everyone if the Guest had its own clock, but @@ -1028,7 +1028,8 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ - lguest_setup_irq(0); + if (lguest_setup_irq(0) != 0) + panic("Could not set up timer irq"); irq_set_handler(0, lguest_time_irq); clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] x86/lguest: Do not setup unused irq vectors
From: Thomas Gleixner No point in assigning the interrupt vectors if there is no interrupt chip installed. Move it to lguest_setup_irq(). (And call it from lguest_enable_irq). Signed-off-by: Thomas Gleixner Signed-off-by: Rusty Russell (fixed typo) Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 22 +- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index f38b7e8a88d2..2566c97c01c8 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -850,21 +850,29 @@ static int lguest_setup_irq(unsigned int irq) if (err < 0 && err != -EEXIST) return err; + /* +* Tell the Linux infrastructure that the interrupt is +* controlled by our level-based lguest interrupt controller. +*/ irq_set_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); + + /* Some systems map "vectors" to interrupts weirdly. Not us! */ + __this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq], irq); return 0; } static int lguest_enable_irq(struct pci_dev *dev) { + int err; u8 line = 0; /* We literally use the PCI interrupt line as the irq number. */ pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line); - irq_set_chip_and_handler_name(line, &lguest_irq_controller, - handle_level_irq, "level"); - dev->irq = line; - return 0; + err = lguest_setup_irq(line); + if (!err) + dev->irq = line; + return err; } /* We don't do hotplug PCI, so this shouldn't be called. */ @@ -875,17 +883,13 @@ static void lguest_disable_irq(struct pci_dev *dev) /* * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware - * interrupt (except 128, which is used for system calls), and then tells the - * Linux infrastructure that each interrupt is controlled by our level-based - * lguest interrupt controller. + * interrupt (except 128, which is used for system calls). */ static void __init lguest_init_IRQ(void) { unsigned int i; for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) { - /* Some systems map "vectors" to interrupts weirdly. Not us! */ - __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR)); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch 2/7] x86/lguest: Do not setup unused irq vectors
Thomas Gleixner writes: > On Mon, 3 Aug 2015, Rusty Russell wrote: >> Thomas Gleixner writes: >> > + >> > + /* Some systems map "vectors" to interrupts weirdly. Not us! */ >> > + __this_cpu_write(vector_irq[FIRST_EXTERNAL_VECTOR + irq, irq); >> >> Missing ]. > > Doh. > >> [ 17.751889] do_IRQ: 0.33 No irq handler for vector (irq -1) >> >> You broke interrupts :( > > Right, because I missed the other place which fiddles with > interrupts. Does the patch below fix the issue? Yep. I added error handling. I reworked it into two patches: one which staticizes lguest_setup_irq() and moves it up, the other of which applies your changes. Will post, you can take them... Thanks, Rusty. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH V9 0/5] map GHES memory region according to EFI memory map
On Mon, Aug 03, 2015 at 05:23:54PM +0100, Matt Fleming wrote: > Rafael, Boris? The ghes.c change looks fine I guess. The whole patchset makes sense now, with the arch bits extracted. So Acked-by: Borislav Petkov However, we probably should work towards adhering to EFI memory attributes on x86, long term, as we talked. But that's a future thing. Thanks. -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface
On Mon, Aug 03, 2015 at 09:07:53PM -0700, Andy Lutomirski wrote: > Except that, with the new interface, static_key_likely is the other > way around, right? If the key is true (i.e. enabled), then it doesn't > branch. > > I think of the key as a boolean thing that happens to work by code > patching under the hood. The fancy patching affects the performance > but doesn't really make it functionally different from a regular > variable. How about making it extra explicit: > > static_key_set(&key, value); > > where value is a bool or maybe even an unsigned int? Let's have an actual example: + if (static_branch_likely(&__use_tsc)) { + u64 tsc_now = rdtsc(); + + /* return the value in ns */ + return cycles_2_ns(tsc_now); + } Well, I can see how the likely/unlikely things can confuse. They actually don't have anything to do with where we will branch to but how the code will be laid out, AFAICT. So I'm reading this as: if (use_tsc)) { RDTSC; return; } and then it is straightforward. So in this case, the jump will be disabled and we won't branch anywhere. It actually becomes: RDTSC; return; which can't get any more optimal than it is. Hmm, yeah, I see how that can be confusing... But the asm is finally fine. Hey, at least one thing... -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface
On Mon, Aug 3, 2015 at 8:37 PM, Borislav Petkov wrote: > On Mon, Aug 03, 2015 at 05:57:57PM -0400, Steven Rostedt wrote: >> That's implementation details, not a general concept that users will >> need to know about. > > Why? > > It is a branch, regardless of which insn is used on which arch - it is > either active and you *branch* to that code or *inactive* and you don't. > So now it is actually what it should've been from the beginning... Except that, with the new interface, static_key_likely is the other way around, right? If the key is true (i.e. enabled), then it doesn't branch. I think of the key as a boolean thing that happens to work by code patching under the hood. The fancy patching affects the performance but doesn't really make it functionally different from a regular variable. How about making it extra explicit: static_key_set(&key, value); where value is a bool or maybe even an unsigned int? --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [TRIVIAL PATCH] block: Correct misuses of 0x%
> On Aug 4, 2015, at 10:27, Joe Perches wrote: > > On Tue, 2015-08-04 at 10:19 +0800, yalin wang wrote: >> Ping ? >>> On Aug 3, 2015, at 16:56, yalin wang wrote: >>> >>> On Aug 3, 2015, at 16:03, Joe Perches wrote: On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote: >> On Aug 3, 2015, at 04:25, Joe Perches wrote: >> >> Correct misuse of 0x%d in logging messages. >> [] > why not use like this : dev_dbg(&h->pdev->dev, " Max outstanding > commands = %#x\n” ? > %#x will add 0x prefix automatically . It's generally a consistency thing. A 0 value would be emitted as 0 and not 0x0. >>> i try on my ubuntu , >>> >>> static int __init throtl_init(void) >>> >>> { >>> >>>printk("module init test: %#x %p\n", 0, (void *)0x123); >>> >>> return 0; >>> >>> >>> } >>> >>> >>> module_init(throtl_init); >>> >>> #uname -a >>> Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 >>> UTC 2015 x86_64 x86_64 x86_64 GNU/Linux >>> >>> #dmesg >>> [259356.375586] module init test: 0x0 0123 >>> >>> it seems don’t need 0x%x for 0, just need %#x for all numbers. >>> there are lots of use like this, i can change them if needed: >>> >>> # egrep -r -i '0x%\d*x' . | wc -l >>> 11776 > > I suggest not, it's not a standard usage and the 0 > may be unexpected. > ok, printk for %x is really not compatible with glibc printf behavior, another is %p, printk print %p as hex but not with 0x prefix, while printf print %p as 0x…. with prefix, is this need change ? so we don’t need lots of 0x%p printk . -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG
On Mon, 2015-08-03 at 20:57 -0700, Joe Perches wrote: > On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote: > > This patch replaces calls to net_dbg_ratelimited when !DEBUG with > > no_printk, keeping with the idiom of all the other debug print helpers. > > Makes sense, thanks Jason. Perhaps better still would be to use if (0) no_printk so that the call and whatever argument calls the net_dbg_ratelimited makes are completely eliminated. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG
On Tue, 2015-08-04 at 05:26 +0200, Jason A. Donenfeld wrote: > The pr_debug family of functions turns into a no-op when -DDEBUG is not > specified, opting instead to call "no_printk", which gets compiled to a > no-op (but retains gcc's nice warnings about printf-style arguments). > > The problem with net_dbg_ratelimited is that it is defined to be a > variant of net_ratelimited_function, which expands to essentially: > > if (net_ratelimit()) > pr_debug(fmt, ...); > > When DEBUG is not defined, then this becomes, > > if (net_ratelimit()) > ; > > This seems benign, except it isn't. Firstly, there's the obvious > overhead of calling net_ratelimit needlessly, which does quite some book > keeping for the rate limiting. Given that the pr_debug and > net_dbg_ratelimited family of functions are sprinkled liberally through > performance critical code, with developers assuming they'll be compiled > out to a no-op most of the time, we certainly do not want this needless > book keeping. Secondly, and most visibly, even though no debug message > is printed when DEBUG is not defined, if there is a flood of > invocations, dmesg winds up peppered with messages such as > "net_ratelimit: 320 callbacks suppressed". This is because our > aforementioned net_ratelimit() function actually prints this text in > some circumstances. It's especially odd to see this when there isn't any > other accompanying debug message. > > So, in sum, it doesn't make sense to have this function's current > behavior, and instead it should match what every other debug family of > functions in the kernel does with !DEBUG -- nothing. > > This patch replaces calls to net_dbg_ratelimited when !DEBUG with > no_printk, keeping with the idiom of all the other debug print helpers. Makes sense, thanks Jason. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/3] Reverted "selftests: add hugetlbfstest"
Rebased as suggested by Naoya Horiguch This manually reverts 7e50533d4b84289e4f01de56d6f98e9c64e2229e The hugetlbfstest test depends on hugetlb pages being counted in a task's rss. This functionality is not in the kernel, so the test will always fail. Remove test to avoid confusion. Signed-off-by: Mike Kravetz --- tools/testing/selftests/vm/Makefile| 1 - tools/testing/selftests/vm/hugetlbfstest.c | 86 -- tools/testing/selftests/vm/run_vmtests | 11 3 files changed, 98 deletions(-) delete mode 100644 tools/testing/selftests/vm/hugetlbfstest.c diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 2da6608..bb888c6 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -4,7 +4,6 @@ CFLAGS = -Wall BINARIES = compaction_test BINARIES += hugepage-mmap BINARIES += hugepage-shm -BINARIES += hugetlbfstest BINARIES += map_hugetlb BINARIES += mlock2-tests BINARIES += on-fault-limit diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c deleted file mode 100644 index 02e1072..000 --- a/tools/testing/selftests/vm/hugetlbfstest.c +++ /dev/null @@ -1,86 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef unsigned long long u64; - -static size_t length = 1 << 24; - -static u64 read_rss(void) -{ - char buf[4096], *s = buf; - int i, fd; - u64 rss; - - fd = open("/proc/self/statm", O_RDONLY); - assert(fd > 2); - memset(buf, 0, sizeof(buf)); - read(fd, buf, sizeof(buf) - 1); - for (i = 0; i < 1; i++) - s = strchr(s, ' ') + 1; - rss = strtoull(s, NULL, 10); - return rss << 12; /* assumes 4k pagesize */ -} - -static void do_mmap(int fd, int extra_flags, int unmap) -{ - int *p; - int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; - u64 before, after; - int ret; - - before = read_rss(); - p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); - assert(p != MAP_FAILED || - !"mmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before - length) < 0x4 || - !"rss didn't grow as expected"); - if (!unmap) - return; - ret = munmap(p, length); - assert(!ret || !"munmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before) < 0x4 || - !"rss didn't shrink as expected"); -} - -static int open_file(const char *path) -{ - int fd, err; - - unlink(path); - fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL - | O_LARGEFILE | O_CLOEXEC, 0600); - assert(fd > 2); - unlink(path); - err = ftruncate(fd, length); - assert(!err); - return fd; -} - -int main(void) -{ - int hugefd, fd; - - fd = open_file("/dev/shm/hugetlbhog"); - hugefd = open_file("/hugepages/hugetlbhog"); - - system("echo 100 > /proc/sys/vm/nr_hugepages"); - do_mmap(-1, MAP_ANONYMOUS, 1); - do_mmap(fd, 0, 1); - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1); - do_mmap(hugefd, 0, 1); - do_mmap(hugefd, MAP_HUGETLB, 1); - /* Leak the last one to test do_exit() */ - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0); - printf("oll korrekt.\n"); - return 0; -} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 231174a..b7ae2b6 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -76,17 +76,6 @@ else fi echo "" -echo "running hugetlbfstest" -echo "" -./hugetlbfstest -if [ $? -ne 0 ]; then - echo "[FAIL]" - exitcode=1 -else - echo "[PASS]" -fi - -echo "" echo "running userfaultfd" echo "" ./userfaultfd 128 32 -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime
On Mon, Aug 03, 2015 at 11:45:24AM -0700, Andy Lutomirski wrote: > P.P.P.S. Who thought that IRET faults unmasking NMIs made any sense > whatsoever when NMIs run on an IST stack? Seriously, people? What happened with asking Intel for a sane IRET-NG? Should be relatively easy - take the current IRET microcode, get rid of the nasty crap, allocate a new opcode and done. Validation should actually have *less* to do and can reuse all current test cases. :-) -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] x86, gfp: Cache best near node for memory allocation.
Hi TJ, Sorry for the late reply. On 07/16/2015 05:48 AM, Tejun Heo wrote: .. so in initialization pharse makes no sense any more. The best near online node for each cpu should be cached somewhere. I'm not really following. Is this because the now offline node can later come online and we'd have to break the constant mapping invariant if we update the mapping later? If so, it'd be nice to spell that out. Yes. Will document this in the next version. .. +int get_near_online_node(int node) +{ + return per_cpu(x86_cpu_to_near_online_node, + cpumask_first(&node_to_cpuid_mask_map[node])); +} +EXPORT_SYMBOL(get_near_online_node); Umm... this function is sitting on a fairly hot path and scanning a cpumask each time. Why not just build a numa node -> numa node array? Indeed. Will avoid to scan a cpumask. .. static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask, unsigned int order) { - VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)); + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); + +#if IS_ENABLED(CONFIG_X86) && IS_ENABLED(CONFIG_NUMA) + if (!node_online(nid)) + nid = get_near_online_node(nid); +#endif return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } Ditto. Also, what's the synchronization rules for NUMA node on/offlining. If you end up updating the mapping later, how would that be synchronized against the above usages? I think the near online node map should be updated when node online/offline happens. But about this, I think the current numa code has a little problem. As you know, firmware info binds a set of CPUs and memory to a node. But at boot time, if the node has no memory (a memory-less node) , it won't be online. But the CPUs on that node is available, and bound to the near online node. (Here, I mean numa_set_node(cpu, node).) Why does the kernel do this ? I think it is used to ensure that we can allocate memory successfully by calling functions like alloc_pages_node() and alloc_pages_exact_node(). By these two fuctions, any CPU should be bound to a node who has memory so that memory allocation can be successful. That means, for a memory-less node at boot time, CPUs on the node is online, but the node is not online. That also means, "the node is online" equals to "the node has memory". Actually, there are a lot of code in the kernel is using this rule. But, 1) in cpu_up(), it will try to online a node, and it doesn't check if the node has memory. 2) in try_offline_node(), it offlines CPUs first, and then the memory. This behavior looks a little wired, or let's say it is ambiguous. It seems that a NUMA node consists of CPUs and memory. So if the CPUs are online, the node should be online. And also, The main purpose of this patch-set is to make the cpuid <-> nodeid mapping persistent. After this patch-set, alloc_pages_node() and alloc_pages_exact_node() won't depend on cpuid <-> nodeid mapping any more. So the node should be online if the CPUs on it are online. Otherwise, we cannot setup interfaces of CPUs under /sys. Unfortunately, since I don't have a machine a with memory-less node, I cannot reproduce the problem right now. How do you think the node online behavior should be changed ? Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC] kcore:change kcore_read to make sure the kernel read is safe
This change kcore_read() to use __copy_from_user_inatomic() to copy data from kernel address, because kern_addr_valid() just make sure page table is valid during call it, whne it return, the page table may change, for example, like set_fixmap() function will change kernel page table, then maybe trigger kernel crash if encounter this unluckily. Signed-off-by: yalin wang --- fs/proc/kcore.c | 30 -- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 92e6726..b085fde 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -86,8 +86,8 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) size = try; *nphdr = *nphdr + 1; } - *elf_buflen = sizeof(struct elfhdr) + - (*nphdr + 2)*sizeof(struct elf_phdr) + + *elf_buflen = sizeof(struct elfhdr) + + (*nphdr + 2)*sizeof(struct elf_phdr) + 3 * ((sizeof(struct elf_note)) + roundup(sizeof(CORE_STR), 4)) + roundup(sizeof(struct elf_prstatus), 4) + @@ -435,6 +435,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) size_t elf_buflen; int nphdr; unsigned long start; + unsigned long page = 0; read_lock(&kclist_lock); size = get_kcore_size(&nphdr, &elf_buflen); @@ -485,7 +486,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) start = kc_offset_to_vaddr(*fpos - elf_buflen); if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) tsz = buflen; - + while (buflen) { struct kcore_list *m; @@ -515,15 +516,32 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) } else { if (kern_addr_valid(start)) { unsigned long n; + mm_segment_t old_fs = get_fs(); + + if (page == 0) { + page = __get_free_page(GFP_KERNEL); + if (page == 0) + return -ENOMEM; - n = copy_to_user(buffer, (char *)start, tsz); + } + set_fs(KERNEL_DS); + pagefault_disable(); + n = __copy_from_user_inatomic((void *)page, + (__force const void __user *)start, + tsz); + pagefault_enable(); + set_fs(old_fs); + if (n) + memset((void *)page + tsz - n, 0, n); + + n = copy_to_user(buffer, (char *)page, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens * we clear too and hope it will trigger the * EFAULT again. */ - if (n) { + if (n) { if (clear_user(buffer + tsz - n, n)) return -EFAULT; @@ -540,7 +558,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) start += tsz; tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); } - + free_page(page); return acc; } -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH -v2 6/8] jump_label: Add a new static_key interface
On Mon, Aug 03, 2015 at 05:57:57PM -0400, Steven Rostedt wrote: > That's implementation details, not a general concept that users will > need to know about. Why? It is a branch, regardless of which insn is used on which arch - it is either active and you *branch* to that code or *inactive* and you don't. So now it is actually what it should've been from the beginning... I realize simplifying the terminology around those jump labels/static branches things comes kinda unnatural now. -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
linux-next: manual merge of the target-updates tree with the libata tree
Hi Nicholas, Today's linux-next merge of the target-updates tree got a conflict in: drivers/ata/libata-scsi.c between commit: fe16d4f202c5 ("Revert "libata-eh: Set 'information' field for autosense"") from the libata tree and commit: f5a8b3a796db ("scsi: Protect against buffer possible overflow in scsi_set_sense_information") from the target-updates tree. I fixed it up (the former removed some code that was updated by the latter, so I just removed the code) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwells...@canb.auug.org.au -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
linux-next: manual merge of the target-updates tree with the libata tree
Hi Nicholas, Today's linux-next merge of the target-updates tree got conflicts in: drivers/scsi/scsi_error.c include/scsi/scsi_eh.h between commit: fe16d4f202c5 ("Revert "libata-eh: Set 'information' field for autosense"") from the libata tree and commit: 7708c1656552 ("scsi: Move sense handling routines to scsi_common") from the target-updates tree. I fixed it up (I left scsi_set_sense_information in its new place) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwells...@canb.auug.org.au -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 1/6] mmc: sdhci-esdhc-imx: add imx7d support and support HS400
On Wed, Jul 29, 2015 at 05:03:52PM +0800, Haibo Chen wrote: > The imx7d usdhc is derived from imx6sx, the difference is that > imx7d support HS400. > > So introduce a new compatible string for imx7d and add HS400 > support for imx7d usdhc. > > Signed-off-by: Haibo Chen > --- > drivers/mmc/host/sdhci-esdhc-imx.c | 66 > ++ > 1 file changed, 66 insertions(+) > > diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c > b/drivers/mmc/host/sdhci-esdhc-imx.c > index c6b9f64..b441eed 100644 > --- a/drivers/mmc/host/sdhci-esdhc-imx.c > +++ b/drivers/mmc/host/sdhci-esdhc-imx.c > @@ -44,6 +44,7 @@ > #define ESDHC_MIX_CTRL_EXE_TUNE (1 << 22) > #define ESDHC_MIX_CTRL_SMPCLK_SEL (1 << 23) > #define ESDHC_MIX_CTRL_FBCLK_SEL(1 << 25) > +#define ESDHC_MIX_CTRL_HS400_EN (1 << 26) > /* Bits 3 and 6 are not SDHCI standard definitions */ > #define ESDHC_MIX_CTRL_SDHCI_MASK 0xb7 > /* Tuning bits */ > @@ -60,6 +61,16 @@ > #define ESDHC_TUNE_CTRL_MIN 0 > #define ESDHC_TUNE_CTRL_MAX ((1 << 7) - 1) > > +/* strobe dll register */ > +#define ESDHC_STROBE_DLL_CTRL0x70 > +#define ESDHC_STROBE_DLL_CTRL_ENABLE (1 << 0) > +#define ESDHC_STROBE_DLL_CTRL_RESET (1 << 1) > +#define ESDHC_STROBE_DLL_CTRL_SLV_DLY_TARGET_SHIFT 3 > + > +#define ESDHC_STROBE_DLL_STATUS 0x74 > +#define ESDHC_STROBE_DLL_STS_REF_LOCK(1 << 1) > +#define ESDHC_STROBE_DLL_STS_SLV_LOCK0x1 > + > #define ESDHC_TUNING_CTRL0xcc > #define ESDHC_STD_TUNING_EN (1 << 24) > /* NOTE: the minimum valid tuning start tap for mx6sl is 1 */ > @@ -120,6 +131,8 @@ > #define ESDHC_FLAG_ERR004536 BIT(7) > /* The IP supports HS200 mode */ > #define ESDHC_FLAG_HS200 BIT(8) > +/* The IP supports HS400 mode */ > +#define ESDHC_FLAG_SUP_HS400 BIT(9) > > struct esdhc_soc_data { > u32 flags; > @@ -156,6 +169,12 @@ static struct esdhc_soc_data usdhc_imx6sx_data = { > | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200, > }; > > +static struct esdhc_soc_data usdhc_imx7d_data = { > + .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING > + | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 > + | ESDHC_FLAG_SUP_HS400, Better to use ESDHC_FLAG_HS400 to keep align with exist ESDHC_FLAG_HS200. Regards Dong Aisheng -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] net_dbg_ratelimited: turn into no-op when !DEBUG
The pr_debug family of functions turns into a no-op when -DDEBUG is not specified, opting instead to call "no_printk", which gets compiled to a no-op (but retains gcc's nice warnings about printf-style arguments). The problem with net_dbg_ratelimited is that it is defined to be a variant of net_ratelimited_function, which expands to essentially: if (net_ratelimit()) pr_debug(fmt, ...); When DEBUG is not defined, then this becomes, if (net_ratelimit()) ; This seems benign, except it isn't. Firstly, there's the obvious overhead of calling net_ratelimit needlessly, which does quite some book keeping for the rate limiting. Given that the pr_debug and net_dbg_ratelimited family of functions are sprinkled liberally through performance critical code, with developers assuming they'll be compiled out to a no-op most of the time, we certainly do not want this needless book keeping. Secondly, and most visibly, even though no debug message is printed when DEBUG is not defined, if there is a flood of invocations, dmesg winds up peppered with messages such as "net_ratelimit: 320 callbacks suppressed". This is because our aforementioned net_ratelimit() function actually prints this text in some circumstances. It's especially odd to see this when there isn't any other accompanying debug message. So, in sum, it doesn't make sense to have this function's current behavior, and instead it should match what every other debug family of functions in the kernel does with !DEBUG -- nothing. This patch replaces calls to net_dbg_ratelimited when !DEBUG with no_printk, keeping with the idiom of all the other debug print helpers. Signed-off-by: Jason A. Donenfeld --- include/linux/net.h | 5 + 1 file changed, 5 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index 04aa068..500fdfe 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -239,8 +239,13 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) +#if defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) +#else +#define net_dbg_ratelimited(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); -- 2.4.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v4 1/7] locking/pvqspinlock: Unconditional PV kick with _Q_SLOW_VAL
On 08/01/2015 06:29 PM, Peter Zijlstra wrote: On Fri, Jul 31, 2015 at 10:21:58PM -0400, Waiman Long wrote: The smp_store_release() is not a full barrier. In order to avoid missed wakeup, we may need to add memory barrier around locked and cpu state variables adding to complexity. As the chance of spurious wakeup is very low, it is easier and safer to just do an unconditional kick at unlock time. Signed-off-by: Waiman Long --- kernel/locking/qspinlock_paravirt.h | 11 --- 1 files changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 15d3733..2dd4b39 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -240,7 +240,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) cpu_relax(); } - WRITE_ONCE(pn->state, vcpu_halted); if (!lp) { /* ONCE */ lp = pv_hash(lock, pn); /* @@ -320,9 +319,15 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock) /* * At this point the memory pointed at by lock can be freed/reused, * however we can still use the pv_node to kick the CPU. +* +* As smp_store_release() is not a full barrier, adding a check to +* the node->state doesn't guarantee the checking is really done +* after clearing the lock byte This is true, but _WHY_ is that a problem ? since they are in 2 separate +* cachelines and so hardware can reorder them. That's just gibberish, even in the same cacheline stuff can get reordered. So either we insert +* memory barrier here and in the corresponding pv_wait_head() +* function or we do an unconditional kick which is what is done here. why, why why ? You've added words, but you've not actually described what the problem is you're trying to fix. AFAICT the only thing we really care about here is that the load in question happens _after_ we observe SLOW, and that is still true. The order against the unlock is irrelevant. So we set ->state before we hash and before we set SLOW. Given that we've seen SLOW, we must therefore also see ->state. If ->state == halted, this means the CPU in question is blocked and the pv_node will not get re-used -- if it does get re-used, it wasn't blocked and we don't care either. Therefore, ->cpu is stable and we'll kick it into action. How do you end up not waking a waiting cpu? Explain that. Yes, it is safe in the current code. In some versions of my pvqspinlock patch, I was resetting the state back to running in pv_wait_head(). This causes race problem. The current code, however, will not reset the state back to running and so the check is redundant. I will clarify that in the next patch. */ - if (READ_ONCE(node->state) == vcpu_halted) - pv_kick(node->cpu); + pv_kick(node->cpu); } Also, this patch clearly isn't against my tree. Yes, I was backing against the latest tip tree. As some of the files in the patch were modified in the latest tip tree, I will rebase my patch and update it. Please let me know if I should be using your tree instead. Cheers, Longman -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
RE: [PATCH] iio: adc: vf610: Add IIO buffer support for Vybrid ADC
From: Sanchayan Maity Sent: Monday, August 03, 2015 11:10 PM > To: ji...@kernel.org; linux-...@vger.kernel.org > Cc: ste...@agner.ch; Duan Fugang-B38611; hof...@osadl.org; > sanjeev_sha...@mentor.com; Estevam Fabio-R49496; knaac...@gmx.de; > l...@metafoo.de; pme...@pmeerw.net; antoine.ten...@free-electrons.com; > linux-kernel@vger.kernel.org; linux-arm-ker...@lists.infradead.org; > Sanchayan Maity > Subject: [PATCH] iio: adc: vf610: Add IIO buffer support for Vybrid ADC > > This patch adds support for IIO buffer to the Vybrid ADC driver. > IIO triggered buffer infrastructure along with iio sysfs trigger is used > to leverage continuous sampling support provided by the ADC block. > > Signed-off-by: Sanchayan Maity > --- > drivers/iio/adc/Kconfig | 4 ++ > drivers/iio/adc/vf610_adc.c | 122 > +--- > 2 files changed, 120 insertions(+), 6 deletions(-) > > diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index > 7c55658..4661241 100644 > --- a/drivers/iio/adc/Kconfig > +++ b/drivers/iio/adc/Kconfig > @@ -337,6 +337,10 @@ config TWL6030_GPADC config VF610_ADC > tristate "Freescale vf610 ADC driver" > depends on OF > + select IIO_BUFFER > + select IIO_TRIGGER > + select IIO_SYSFS_TRIGGER > + select IIO_TRIGGERED_BUFFER > help > Say yes here to support for Vybrid board analog-to-digital > converter. > Since the IP is used for i.MX6SLX, the driver also support > i.MX6SLX. > diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c > index 23b8fb9..af72b9a 100644 > --- a/drivers/iio/adc/vf610_adc.c > +++ b/drivers/iio/adc/vf610_adc.c > @@ -34,8 +34,11 @@ > #include > > #include > +#include > #include > -#include > +#include > +#include #include > + > > /* This will be the driver name the kernel reports */ #define > DRIVER_NAME "vf610-adc" > @@ -170,6 +173,7 @@ struct vf610_adc { > u32 sample_freq_avail[5]; > > struct completion completion; > + u16 *buffer; > }; > > static const u32 vf610_hw_avgs[] = { 1, 4, 8, 16, 32 }; @@ -505,12 > +509,22 @@ static const struct iio_chan_spec_ext_info vf610_ext_info[] = > { > .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) | \ > BIT(IIO_CHAN_INFO_SAMP_FREQ), \ > .ext_info = vf610_ext_info, \ > + .address = (_idx), \ > + .scan_index = (_idx), \ > + .scan_type.sign = 'u', \ > + .scan_type.realbits = 12, \ > + .scan_type.storagebits = 16,\ > } > > #define VF610_ADC_TEMPERATURE_CHAN(_idx, _chan_type) { \ > .type = (_chan_type), \ > .channel = (_idx), \ > .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), \ > + .address = (_idx), \ > + .scan_index = (_idx), \ > + .scan_type.sign = 'u', \ > + .scan_type.realbits = 12, \ > + .scan_type.storagebits = 16,\ > } > > static const struct iio_chan_spec vf610_adc_iio_channels[] = { @@ -531,6 > +545,7 @@ static const struct iio_chan_spec vf610_adc_iio_channels[] = { > VF610_ADC_CHAN(14, IIO_VOLTAGE), > VF610_ADC_CHAN(15, IIO_VOLTAGE), > VF610_ADC_TEMPERATURE_CHAN(26, IIO_TEMP), > + IIO_CHAN_SOFT_TIMESTAMP(32), > /* sentinel */ > }; > > @@ -559,13 +574,21 @@ static int vf610_adc_read_data(struct vf610_adc > *info) > > static irqreturn_t vf610_adc_isr(int irq, void *dev_id) { > - struct vf610_adc *info = (struct vf610_adc *)dev_id; > + struct iio_dev *indio_dev = (struct iio_dev *)dev_id; > + struct vf610_adc *info = iio_priv(indio_dev); > int coco; > > coco = readl(info->regs + VF610_REG_ADC_HS); > if (coco & VF610_ADC_HS_COCO0) { > info->value = vf610_adc_read_data(info); > - complete(&info->completion); > + if (iio_buffer_enabled(indio_dev)) { > + info->buffer[0] = info->value; > + writel(0, info->regs + VF610_REG_ADC_HS); The register is read only. After ADC_Rn is read, the coco bit is cleared. > + iio_push_to_buffers_with_timestamp(indio_dev, > + info->buffer, iio_get_time_ns()); > + iio_trigger_notify_done(indio_dev->trig); > + } else > + complete(&info->completion); > } > > return IRQ_HANDLED; > @@ -612,6 +635,9 @@ static int vf610_read_raw(struct iio_dev *indio_dev, > switch (mask) { > case IIO_CHAN_INFO_RAW: > case IIO_CHAN_INFO_PROCESSED: > + if (iio_buffer_enabled(indio_dev)) > + return -EBUSY; > + > mutex_lock(&indio_dev->mlock); > rein
Re: [PATCH 0/3] vm hugetlb selftest cleanup
On Thu, Jul 30, 2015 at 05:59:50PM -0700, Mike Kravetz wrote: > As a followup to discussions of hugetlbfs fallocate, this provides > cleanup the vm hugetlb selftests. Remove hugetlbfstest as it tests > functionality not present in the kernel. Emphasize that libhugetlbfs > test suite should be used for hugetlb regression testing. > > Mike Kravetz (3): > Reverted "selftests: add hugetlbfstest" > selftests:vm: Point to libhugetlbfs for regression testing > Documentation: update libhugetlbfs location and use for testing It seems that patch 1 conflicts with commit bd67d5c15cc1 ("Test compaction of mlocked memory"), but the resolution is trivial, so for the series ... Acked-by: Naoya Horiguchi Thanks!-- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: hugetlb pages not accounted for in rss
On Wed, Jul 29, 2015 at 04:20:59PM -0700, Mike Kravetz wrote: > On 07/29/2015 12:08 PM, David Rientjes wrote: > >On Tue, 28 Jul 2015, Jörn Engel wrote: > > > >>Well, we definitely need something. Having a 100GB process show 3GB of > >>rss is not very useful. How would we notice a memory leak if it only > >>affects hugepages, for example? > >> > > > >Since the hugetlb pool is a global resource, it would also be helpful to > >determine if a process is mapping more than expected. You can't do that > >just by adding a huge rss metric, however: if you have 2MB and 1GB > >hugepages configured you wouldn't know if a process was mapping 512 2MB > >hugepages or 1 1GB hugepage. > > > >That's the purpose of hugetlb_cgroup, after all, and it supports usage > >counters for all hstates. The test could be converted to use that to > >measure usage if configured in the kernel. > > > >Beyond that, I'm not sure how a per-hstate rss metric would be exported to > >userspace in a clean way and other ways of obtaining the same data are > >possible with hugetlb_cgroup. I'm not sure how successful you'd be in > >arguing that we need separate rss counters for it. > > If I want to track hugetlb usage on a per-task basis, do I then need to > create one cgroup per task? > > For example, suppose I have many tasks using hugetlb and the global pool > is getting low on free pages. It might be useful to know which tasks are > using hugetlb pages, and how many they are using. > > I don't actually have this need (I think), but it appears to be what > Jörn is asking for. One possible way to get hugetlb metric in per-task basis is to walk page table via /proc/pid/pagemap, and counting page flags for each mapped page (we can easily do this with tools/vm/page-types.c like "page-types -p -b huge"). This is obviously slower than just storing the counter as in-kernel data and just exporting it, but might be useful in some situation. Thanks, Naoya HoriguchiN�r��yb�X��ǧv�^�){.n�+{zX����ܨ}���Ơz�&j:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a��� 0��h���i
[PATCH] ARM64: dts: mt6795: enable basic SMP bringup for MT6795
This patch adds support SMP on MediaTek MT6795 Cortex-A53 Octa-core SoC. The patch is based on v4.2-rc1 and following patch series: (1) Mars Cheng's "Add mt6795 basic chip support" [1] [1] https://lkml.org/lkml/2015/7/14/63 Signed-off-by: Scott Shu --- arch/arm64/boot/dts/mediatek/mt6795.dtsi | 13 + 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt6795.dtsi b/arch/arm64/boot/dts/mediatek/mt6795.dtsi index da200e7..c85659d 100644 --- a/arch/arm64/boot/dts/mediatek/mt6795.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6795.dtsi @@ -20,6 +20,11 @@ #address-cells = <2>; #size-cells = <2>; + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -27,48 +32,56 @@ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x000>; }; cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x001>; }; cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x002>; }; cpu3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x003>; }; cpu4: cpu@100 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x100>; }; cpu5: cpu@101 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x101>; }; cpu6: cpu@102 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x102>; }; cpu7: cpu@103 { device_type = "cpu"; compatible = "arm,cortex-a53"; + enable-method = "psci"; reg = <0x103>; }; }; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v5] powerpc/rcpm: add RCPM driver
On Tue, Aug 4, 2015 at 4:23 AM, Scott Wood wrote: On Mon, 2015-08-03 at 19:14 +0800, Chenhui Zhao wrote: On Sat, Aug 1, 2015 at 8:45 AM, Scott Wood wrote: > On Fri, 2015-06-26 at 15:44 +0800, Yuantian.Tang@freescale.comwrote: > > +static void rcpm_v1_set_ip_power(bool enable, u32 *mask) > > +{ > > + if (enable) > > + setbits32(&rcpm_v1_regs->ippdexpcr, *mask); > > + else > > + clrbits32(&rcpm_v1_regs->ippdexpcr, *mask); > > +} > > + > > +static void rcpm_v2_set_ip_power(bool enable, u32 *mask) > > +{ > > + if (enable) > > + setbits32(&rcpm_v2_regs->ippdexpcr[0], *mask); > > + else > > + clrbits32(&rcpm_v2_regs->ippdexpcr[0], *mask); > > +} > > Why do these take "u32 *mask" instead of "u32 mask"? > > -Scott I think it can be used in the case where there are several mask values. When would that be? -Scott So far, only use one register, even though the register name is "IPPDEXPCRn" (has "n" suffix) in T4 RM. OK. Just change the parameter to "u32 mask". -Chenhui -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [TRIVIAL PATCH] block: Correct misuses of 0x%
On Tue, 2015-08-04 at 10:19 +0800, yalin wang wrote: > Ping ? > > On Aug 3, 2015, at 16:56, yalin wang wrote: > > > > > >> On Aug 3, 2015, at 16:03, Joe Perches wrote: > >> > >> On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote: > On Aug 3, 2015, at 04:25, Joe Perches wrote: > > Correct misuse of 0x%d in logging messages. > > >> [] > >>> why not use like this : dev_dbg(&h->pdev->dev, " Max outstanding > >>> commands = %#x\n” ? > >>> %#x will add 0x prefix automatically . > >> > >> It's generally a consistency thing. > >> A 0 value would be emitted as 0 and not 0x0. > >> > > i try on my ubuntu , > > > > static int __init throtl_init(void) > > > > { > > > > printk("module init test: %#x %p\n", 0, (void *)0x123); > > > > return 0; > > > > > > } > > > > > > module_init(throtl_init); > > > > #uname -a > > Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 > > UTC 2015 x86_64 x86_64 x86_64 GNU/Linux > > > > #dmesg > > [259356.375586] module init test: 0x0 0123 > > > > it seems don’t need 0x%x for 0, just need %#x for all numbers. > > there are lots of use like this, i can change them if needed: > > > > # egrep -r -i '0x%\d*x' . | wc -l > > 11776 I suggest not, it's not a standard usage and the 0 may be unexpected. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] usb: gadget: f_printer: fix the bug of deadlock caused by nested spinlock
On 08/03/2015 10:47 PM, Felipe Balbi wrote: Hi, On Mon, Aug 03, 2015 at 07:19:43PM +0800, fupan...@windriver.com wrote: From: fli Function printer_func_disable() has called spinlock on printer_dev->lock, and it'll call function chain of printer_reset_interface() | +---dwc3_gadget_ep_disable() | +---__dwc3_gadget_ep_disable() | +---dwc3_remove_requests() | +---dwc3_gadget_giveback() | +---rx_complete() in the protected block. However, rx_complete() in f_printer.c calls spinlock on printer_dev->lock again, which will cause system hang. The following steps can reproduce this hang: 1. Build the test program from Documentation/usb/gadget_printer.txt as g_printer 2. Plug in the USB device to a host(such as Ubuntu). 3. on the USB device system run: #modprobe g_printer.ko #./g_printer -read_data 4. Unplug the USB device from the host The system will hang later. In order to avoid this deadlock, moving the spinlock from printer_func_disable() into printer_reset_interface() and excluding the block of calling dwc3_gadget_ep_disable(), in which the critical resource will be protected by its spinlock in rx_complete(). This commit will fix the system hang with the following calltrace: INFO: rcu_preempt detected stalls on CPUs/tasks: { 3} (detected by 0, t=21006 jiffies, g=524, c=523, q=2) sending NMI to all CPUs: NMI backtrace for cpu 3 CPU: 3 PID: 718 Comm: irq/22-dwc3 Not tainted 3.10.38-ltsi-WR6.0.0.11_standard #2 Hardware name: Intel Corp. VALLEYVIEW B3 PLATFORM/NOTEBOOK, BIOS BYTICRB1.86C.0092.R32.1410021707 10/02/2014 task: f44f4c20 ti: f40f6000 task.ti: f40f6000 EIP: 0060:[] EFLAGS: 0097 CPU: 3 EIP is at _raw_spin_lock_irqsave+0x35/0x40 EAX: 0076 EBX: f80fad00 ECX: 0076 EDX: 0075 ESI: 0096 EDI: ff94 EBP: f40f7e20 ESP: f40f7e18 DS: 007b ES: 007b FS: 00d8 GS: SS: 0068 CR0: 8005003b CR2: b77ac000 CR3: 01c3 CR4: 001007f0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Stack: f474a720 f80fad00 f40f7e3c f80f93cc c135d486 f474a720 f468fb00 f4bea894 f40f7e54 f7e35f19 ff00 f468fb00 f468fb24 0086 f40f7e64 f7e36577 f468fb00 f4bea810 f40f7e74 f7e365a8 f468fb00 f4bea894 f40f7e9c Call Trace: [] rx_complete+0x1c/0xb0 [g_printer] [] ? vsnprintf+0x166/0x390 [] dwc3_gadget_giveback+0xc9/0xf0 [dwc3] [] dwc3_remove_requests+0x57/0x70 [dwc3] [] __dwc3_gadget_ep_disable+0x18/0x60 [dwc3] [] dwc3_gadget_ep_disable+0x89/0xf0 [dwc3] [] printer_reset_interface+0x31/0x50 [g_printer] [] printer_func_disable+0x20/0x30 [g_printer] [] composite_disconnect+0x4b/0x90 [libcomposite] [] dwc3_disconnect_gadget+0x38/0x43 [dwc3] [] dwc3_gadget_disconnect_interrupt+0x3e/0x5a [dwc3] [] dwc3_thread_interrupt+0x5c8/0x610 [dwc3] [] irq_thread_fn+0x18/0x30 [] irq_thread+0x100/0x130 [] ? irq_finalize_oneshot.part.29+0xb0/0xb0 [] ? wake_threads_waitq+0x40/0x40 [] ? irq_thread_dtor+0xb0/0xb0 [] kthread+0x94/0xa0 [] ret_from_kernel_thread+0x1b/0x28 [] ? kthread_create_on_node+0xc0/0xc0 Signed-off-by: fupan li Thanks, out of curiosity, do you plan on sending a glue layer for Windriver's DWC3 ? No, just this fix patch. Fupan cheers -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [TRIVIAL PATCH] block: Correct misuses of 0x%
Ping ? > On Aug 3, 2015, at 16:56, yalin wang wrote: > > >> On Aug 3, 2015, at 16:03, Joe Perches wrote: >> >> On Mon, 2015-08-03 at 15:25 +0800, yalin wang wrote: On Aug 3, 2015, at 04:25, Joe Perches wrote: Correct misuse of 0x%d in logging messages. >> [] >>> why not use like this : dev_dbg(&h->pdev->dev, " Max outstanding >>> commands = %#x\n” ? >>> %#x will add 0x prefix automatically . >> >> It's generally a consistency thing. >> A 0 value would be emitted as 0 and not 0x0. >> > i try on my ubuntu , > > static int __init throtl_init(void) > > { > > printk("module init test: %#x %p\n", 0, (void *)0x123); > > return 0; > > > } > > > module_init(throtl_init); > > #uname -a > Linux ubuntu 3.16.0-38-generic #52~14.04.1-Ubuntu SMP Fri May 8 09:43:57 UTC > 2015 x86_64 x86_64 x86_64 GNU/Linux > > #dmesg > [259356.375586] module init test: 0x0 0123 > > it seems don’t need 0x%x for 0, just need %#x for all numbers. > there are lots of use like this, i can change them if needed: > > # egrep -r -i '0x%\d*x' . | wc -l > 11776 > > > Thanks -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 00/15] trivial: Drop unlikely before IS_ERR(_OR_NULL)
On 03-08-15, 17:38, Steven Rostedt wrote: > On Fri, 31 Jul 2015 13:23:10 +0300 > "Kirill A. Shutemov" wrote: > > We have two cases in code: > > > > drivers/rtc/rtc-gemini.c: if (likely(IS_ERR(rtc->rtc_dev))) > > drivers/staging/lustre/lustre/obdclass/lu_object.c: if > > (likely(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT)) { > > > > The first one is mistake, I think. Or do we expect rtc_device_register() > > to fail? > > > > The second is redundant. "if (PTR_ERR(shadow) == -ENOENT)" should do the > > job. > > > > Yep, those look like bugs to me. Yeah, I have fixed both of them :) -- viresh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/8] watchdog: watchdog_dev: Use single variable name for struct watchdog_device
The current code uses 'wdd', wddev', and 'watchdog' as variable names for struct watchdog_device. This is confusing and makes it difficult to enhance the code. Replace it all with 'wdd'. Cc: Timo Kokkonen Cc: Uwe Kleine-König Signed-off-by: Guenter Roeck --- drivers/watchdog/watchdog_dev.c | 151 1 file changed, 75 insertions(+), 76 deletions(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 6aaefbad303e..06171c73daf5 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -51,7 +51,7 @@ static struct watchdog_device *old_wdd; /* * watchdog_ping: ping the watchdog. - * @wddev: the watchdog device to ping + * @wdd: the watchdog device to ping * * If the watchdog has no own ping operation then it needs to be * restarted via the start operation. This wrapper function does @@ -59,65 +59,65 @@ static struct watchdog_device *old_wdd; * We only ping when the watchdog device is running. */ -static int watchdog_ping(struct watchdog_device *wddev) +static int watchdog_ping(struct watchdog_device *wdd) { int err = 0; - mutex_lock(&wddev->lock); + mutex_lock(&wdd->lock); - if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + if (test_bit(WDOG_UNREGISTERED, &wdd->status)) { err = -ENODEV; goto out_ping; } - if (!watchdog_active(wddev)) + if (!watchdog_active(wdd)) goto out_ping; - if (wddev->ops->ping) - err = wddev->ops->ping(wddev); /* ping the watchdog */ + if (wdd->ops->ping) + err = wdd->ops->ping(wdd); /* ping the watchdog */ else - err = wddev->ops->start(wddev); /* restart watchdog */ + err = wdd->ops->start(wdd); /* restart watchdog */ out_ping: - mutex_unlock(&wddev->lock); + mutex_unlock(&wdd->lock); return err; } /* * watchdog_start: wrapper to start the watchdog. - * @wddev: the watchdog device to start + * @wdd: the watchdog device to start * * Start the watchdog if it is not active and mark it active. * This function returns zero on success or a negative errno code for * failure. */ -static int watchdog_start(struct watchdog_device *wddev) +static int watchdog_start(struct watchdog_device *wdd) { int err = 0; - mutex_lock(&wddev->lock); + mutex_lock(&wdd->lock); - if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + if (test_bit(WDOG_UNREGISTERED, &wdd->status)) { err = -ENODEV; goto out_start; } - if (watchdog_active(wddev)) + if (watchdog_active(wdd)) goto out_start; - err = wddev->ops->start(wddev); + err = wdd->ops->start(wdd); if (err == 0) - set_bit(WDOG_ACTIVE, &wddev->status); + set_bit(WDOG_ACTIVE, &wdd->status); out_start: - mutex_unlock(&wddev->lock); + mutex_unlock(&wdd->lock); return err; } /* * watchdog_stop: wrapper to stop the watchdog. - * @wddev: the watchdog device to stop + * @wdd: the watchdog device to stop * * Stop the watchdog if it is still active and unmark it active. * This function returns zero on success or a negative errno code for @@ -125,155 +125,154 @@ out_start: * If the 'nowayout' feature was set, the watchdog cannot be stopped. */ -static int watchdog_stop(struct watchdog_device *wddev) +static int watchdog_stop(struct watchdog_device *wdd) { int err = 0; - mutex_lock(&wddev->lock); + mutex_lock(&wdd->lock); - if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + if (test_bit(WDOG_UNREGISTERED, &wdd->status)) { err = -ENODEV; goto out_stop; } - if (!watchdog_active(wddev)) + if (!watchdog_active(wdd)) goto out_stop; - if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) { - dev_info(wddev->dev, "nowayout prevents watchdog being stopped!\n"); + if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) { + dev_info(wdd->dev, "nowayout prevents watchdog being stopped!\n"); err = -EBUSY; goto out_stop; } - err = wddev->ops->stop(wddev); + err = wdd->ops->stop(wdd); if (err == 0) - clear_bit(WDOG_ACTIVE, &wddev->status); + clear_bit(WDOG_ACTIVE, &wdd->status); out_stop: - mutex_unlock(&wddev->lock); + mutex_unlock(&wdd->lock); return err; } /* * watchdog_get_status: wrapper to get the watchdog status - * @wddev: the watchdog device to get the status from + * @wdd: the watchdog device to get the status from * @status: the status of the watchdog device * * Get the watchdog's stat
[PATCH 3/8] watchdog: Introduce WDOG_RUNNING flag
The WDOG_RUNNING flag is expected to be set by watchdog drivers if the hardware watchdog is running. If the flag is set, the watchdog subsystem will ping the watchdog even if the watchdog device is closed. The watchdog driver stop function is now optional and may be omitted if the watchdog can not be stopped. If stopping the watchdog is not possible but the driver implements a stop function, it is responsible to set the WDOG_RUNNING flag in its stop function. Cc: Timo Kokkonen Cc: Uwe Kleine-König Signed-off-by: Guenter Roeck --- Documentation/watchdog/watchdog-kernel-api.txt | 19 - drivers/watchdog/watchdog_core.c | 2 +- drivers/watchdog/watchdog_dev.c| 39 -- include/linux/watchdog.h | 7 + 4 files changed, 50 insertions(+), 17 deletions(-) diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index 5fa085276874..7fda3c86cf46 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -144,17 +144,18 @@ are: device. The routine needs a pointer to the watchdog timer device structure as a parameter. It returns zero on success or a negative errno code for failure. -* stop: with this routine the watchdog timer device is being stopped. - The routine needs a pointer to the watchdog timer device structure as a - parameter. It returns zero on success or a negative errno code for failure. - Some watchdog timer hardware can only be started and not be stopped. The - driver supporting this hardware needs to make sure that a start and stop - routine is being provided. This can be done by using a timer in the driver - that regularly sends a keepalive ping to the watchdog timer hardware. Not all watchdog timer hardware supports the same functionality. That's why all other routines/operations are optional. They only need to be provided if they are supported. These optional routines/operations are: +* stop: with this routine the watchdog timer device is being stopped. + The routine needs a pointer to the watchdog timer device structure as a + parameter. It returns zero on success or a negative errno code for failure. + Some watchdog timer hardware can only be started and not be stopped. A + driver supporting such hardware does not have to implement the stop routine. + If a driver has no stop function, the watchdog core will set WDOG_RUNNING and + start calling the driver's keepalive pings function after the watchdog device + is closed. * ping: this is the routine that sends a keepalive ping to the watchdog timer hardware. The routine needs a pointer to the watchdog timer device structure as a @@ -206,6 +207,10 @@ bit-operations. The status bits that are defined are: any watchdog_ops, so that you can be sure that no operations (other then unref) will get called after unregister, even if userspace still holds a reference to /dev/watchdog +* WDOG_RUNNING: Set by the watchdog driver if the hardware watchdog is running. + The bit must be set if the watchdog timer hardware can not be stopped; + otherwise it is optional. If set, the watchdog driver core will send + keepalive pings to the watchdog hardware while the watchdog device is closed. To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog timer device) you can either: diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 1a8059455413..b38d1b7ae10e 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -145,7 +145,7 @@ static int __watchdog_register_device(struct watchdog_device *wdd) return -EINVAL; /* Mandatory operations need to be supported */ - if (wdd->ops->start == NULL || wdd->ops->stop == NULL) + if (!wdd->ops->start) return -EINVAL; watchdog_check_min_max_timeout(wdd); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 25849c1d6dc1..e0fbc4ac9bb7 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -58,8 +58,9 @@ static inline bool watchdog_need_worker(struct watchdog_device *wdd) unsigned int hm = wdd->max_hw_timeout_ms; unsigned int m = wdd->max_timeout * 1000; - return watchdog_active(wdd) && hm && hm != m && - wdd->timeout * 500 > hm; + return (watchdog_active(wdd) && hm && hm != m && + wdd->timeout * 500 > hm) || + (!watchdog_active(wdd) && watchdog_running(wdd)); } static inline void watchdog_update_worker(struct watchdog_device *wdd, @@ -87,7 +88,7 @@ static int _watchdog_ping(struct watchdog_device *wdd) if (test_bit(WDOG_UNREGISTERED, &wdd->status)) return -ENODEV; - if (!watchdog_active(wdd)) + if (!watchdog_active(wdd) && !watchdog_running(wdd)) retu
[PATCH 8/8] watchdog: at91sam9: Convert to use infrastructure triggered keepalives
The watchdog infrastructure now supports handling watchdog keepalive if the watchdog is running while the watchdog device is closed. The infrastructure now also supports generating additional heartbeats if the maximum hardware timeout is smaller than or close to the configured timeout. Convert the driver to use this infrastructure. Signed-off-by: Guenter Roeck --- drivers/watchdog/at91sam9_wdt.c | 102 +--- 1 file changed, 11 insertions(+), 91 deletions(-) diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index e4698f7c5f93..0de39b52962c 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -48,8 +47,8 @@ * use this to convert a watchdog * value from/to milliseconds. */ -#define ticks_to_hz_rounddown(t) t) + 1) * HZ) >> 8) -#define ticks_to_hz_roundup(t) (t) + 1) * HZ) + 255) >> 8) +#define ticks_to_ms_rounddown(t) t) + 1) * 1000) >> 8) +#define ticks_to_ms_roundup(t) (t) + 1) * 1000) + 255) >> 8) #define ticks_to_secs(t) (((t) + 1) >> 8) #define secs_to_ticks(s) ((s) ? (((s) << 8) - 1) : 0) @@ -64,9 +63,6 @@ /* Hardware timeout in seconds */ #define WDT_HW_TIMEOUT 2 -/* Timer heartbeat (500ms) */ -#define WDT_TIMEOUT(HZ/2) - /* User land timeout */ #define WDT_HEARTBEAT 15 static int heartbeat; @@ -83,11 +79,8 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " struct at91wdt { struct watchdog_device wdd; void __iomem *base; - unsigned long next_heartbeat; /* the next_heartbeat for the timer */ - struct timer_list timer;/* The timer that pings the watchdog */ u32 mr; u32 mr_mask; - unsigned long heartbeat;/* WDT heartbeat in jiffies */ bool nowayout; unsigned int irq; }; @@ -107,47 +100,13 @@ static irqreturn_t wdt_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -/* - * Reload the watchdog timer. (ie, pat the watchdog) - */ -static inline void at91_wdt_reset(struct at91wdt *wdt) -{ - wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT); -} - -/* - * Timer tick - */ -static void at91_ping(unsigned long data) -{ - struct at91wdt *wdt = (struct at91wdt *)data; - if (time_before(jiffies, wdt->next_heartbeat) || - !watchdog_active(&wdt->wdd)) { - at91_wdt_reset(wdt); - mod_timer(&wdt->timer, jiffies + wdt->heartbeat); - } else { - pr_crit("I will reset your machine !\n"); - } -} - static int at91_wdt_start(struct watchdog_device *wdd) { struct at91wdt *wdt = to_wdt(wdd); - /* calculate when the next userspace timeout will be */ - wdt->next_heartbeat = jiffies + wdd->timeout * HZ; - return 0; -} -static int at91_wdt_stop(struct watchdog_device *wdd) -{ - /* The watchdog timer hardware can not be stopped... */ - return 0; -} + wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT); -static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_timeout) -{ - wdd->timeout = new_timeout; - return at91_wdt_start(wdd); + return 0; } static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt) @@ -157,8 +116,8 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt) u32 value; int err; u32 mask = wdt->mr_mask; - unsigned long min_heartbeat = 1; - unsigned long max_heartbeat; + unsigned int min_timeout = jiffies_to_msecs(1); + unsigned int hw_timeout; struct device *dev = &pdev->dev; tmp = wdt_read(wdt, AT91_WDT_MR); @@ -180,31 +139,15 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt) delta = (tmp & AT91_WDT_WDD) >> 16; if (delta < value) - min_heartbeat = ticks_to_hz_roundup(value - delta); + min_timeout = ticks_to_ms_roundup(value - delta); - max_heartbeat = ticks_to_hz_rounddown(value); - if (!max_heartbeat) { + hw_timeout = ticks_to_ms_rounddown(value); + if (hw_timeout < min_timeout * 2) { dev_err(dev, "heartbeat is too small for the system to handle it correctly\n"); return -EINVAL; } - - /* -* Try to reset the watchdog counter 4 or 2 times more often than -* actually requested, to avoid spurious watchdog reset. -* If this is not possible because of the min_heartbeat value, reset -* it at the min_heartbeat period. -*/ - if ((max_heartbeat / 4) >= min_heartbeat) - wdt->heartbeat = max_heartbeat / 4; - else if ((max_heartbeat / 2) >= min_heartbeat) - wdt->heartbeat = max_heartb
[PATCH 4/8] watchdog: Make set_timeout function optional
For some watchdogs, the hardware timeout is fixed, and the watchdog driver depends on the watchdog core to handle the actual timeout. In this situation, the watchdog driver might only set the 'timeout' variable but do nothing else. This can as well be handled by the infrastructure, so make the set_timeout callback optional. If WDIOF_SETTIMEOUT is configured but the .set_timeout callback is not available, update the timeout variable in the infrastructure code. Signed-off-by: Guenter Roeck --- Documentation/watchdog/watchdog-kernel-api.txt | 4 drivers/watchdog/watchdog_dev.c| 9 ++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index 7fda3c86cf46..2f1a4ad7e565 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -178,6 +178,10 @@ they are supported. These optional routines/operations are: because the watchdog does not necessarily has a 1 second resolution). (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the watchdog's info structure). + If the watchdog driver does not have to perform any action but setting the + timeout value of the watchdog_device, this callback can be omitted. + If set_timeout is not provided but WDIOF_SETTIMEOUT is set, the watchdog + infrastructure updates the timeout value of the watchdog_device internally. * get_timeleft: this routines returns the time that's left before a reset. * ref: the operation that calls kref_get on the kref of a dynamically allocated watchdog_device struct. diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index e0fbc4ac9bb7..73bae196a081 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -260,9 +260,9 @@ out_status: static int watchdog_set_timeout(struct watchdog_device *wdd, unsigned int timeout) { - int err; + int err = 0; - if (!wdd->ops->set_timeout || !(wdd->info->options & WDIOF_SETTIMEOUT)) + if (!(wdd->info->options & WDIOF_SETTIMEOUT)) return -EOPNOTSUPP; if (watchdog_timeout_invalid(wdd, timeout)) @@ -275,7 +275,10 @@ static int watchdog_set_timeout(struct watchdog_device *wdd, goto out_timeout; } - err = wdd->ops->set_timeout(wdd, timeout); + if (wdd->ops->set_timeout) + err = wdd->ops->set_timeout(wdd, timeout); + else + wdd->timeout = timeout; watchdog_update_worker(wdd, true, false); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 5/8] watchdog: imx2: Convert to use infrastructure triggered keepalives
The watchdog infrastructure now supports handling watchdog keepalive if the watchdog is running while the watchdog device is closed. Convert the driver to use this infrastructure. Signed-off-by: Guenter Roeck --- drivers/watchdog/imx2_wdt.c | 72 - 1 file changed, 12 insertions(+), 60 deletions(-) diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 0bb1a1d1b170..66feef254661 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,6 @@ #include #include #include -#include #include #define DRIVER_NAME "imx2-wdt" @@ -62,7 +60,6 @@ struct imx2_wdt_device { struct clk *clk; struct regmap *regmap; - struct timer_list timer;/* Pings the watchdog when closed */ struct watchdog_device wdog; struct notifier_block restart_handler; }; @@ -151,16 +148,6 @@ static int imx2_wdt_ping(struct watchdog_device *wdog) return 0; } -static void imx2_wdt_timer_ping(unsigned long arg) -{ - struct watchdog_device *wdog = (struct watchdog_device *)arg; - struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - - /* ping it every wdog->timeout / 2 seconds to prevent reboot */ - imx2_wdt_ping(wdog); - mod_timer(&wdev->timer, jiffies + wdog->timeout * HZ / 2); -} - static int imx2_wdt_set_timeout(struct watchdog_device *wdog, unsigned int new_timeout) { @@ -177,40 +164,19 @@ static int imx2_wdt_start(struct watchdog_device *wdog) { struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); - if (imx2_wdt_is_running(wdev)) { - /* delete the timer that pings the watchdog after close */ - del_timer_sync(&wdev->timer); + if (imx2_wdt_is_running(wdev)) imx2_wdt_set_timeout(wdog, wdog->timeout); - } else + else imx2_wdt_setup(wdog); - return imx2_wdt_ping(wdog); -} - -static int imx2_wdt_stop(struct watchdog_device *wdog) -{ - /* -* We don't need a clk_disable, it cannot be disabled once started. -* We use a timer to ping the watchdog while /dev/watchdog is closed -*/ - imx2_wdt_timer_ping((unsigned long)wdog); - return 0; -} - -static inline void imx2_wdt_ping_if_active(struct watchdog_device *wdog) -{ - struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog); + set_bit(WDOG_RUNNING, &wdog->status); - if (imx2_wdt_is_running(wdev)) { - imx2_wdt_set_timeout(wdog, wdog->timeout); - imx2_wdt_timer_ping((unsigned long)wdog); - } + return imx2_wdt_ping(wdog); } static const struct watchdog_ops imx2_wdt_ops = { .owner = THIS_MODULE, .start = imx2_wdt_start, - .stop = imx2_wdt_stop, .ping = imx2_wdt_ping, .set_timeout = imx2_wdt_set_timeout, }; @@ -277,9 +243,10 @@ static int __init imx2_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(wdog, nowayout); watchdog_init_timeout(wdog, timeout, &pdev->dev); - setup_timer(&wdev->timer, imx2_wdt_timer_ping, (unsigned long)wdog); - - imx2_wdt_ping_if_active(wdog); + if (imx2_wdt_is_running(wdev)) { + imx2_wdt_set_timeout(wdog, wdog->timeout); + set_bit(WDOG_RUNNING, &wdog->status); + } /* * Disable the watchdog power down counter at boot. Otherwise the power @@ -320,7 +287,6 @@ static int __exit imx2_wdt_remove(struct platform_device *pdev) watchdog_unregister_device(wdog); if (imx2_wdt_is_running(wdev)) { - del_timer_sync(&wdev->timer); imx2_wdt_ping(wdog); dev_crit(&pdev->dev, "Device removed: Expect reboot!\n"); } @@ -334,10 +300,9 @@ static void imx2_wdt_shutdown(struct platform_device *pdev) if (imx2_wdt_is_running(wdev)) { /* -* We are running, we need to delete the timer but will -* give max timeout before reboot will take place +* We are running, configure max timeout before reboot +* will take place. */ - del_timer_sync(&wdev->timer); imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n"); @@ -355,10 +320,6 @@ static int imx2_wdt_suspend(struct device *dev) if (imx2_wdt_is_running(wdev)) { imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME); imx2_wdt_ping(wdog); - - /* The watchdog is not active */ - if (!watchdog_active(wdog)) - del_timer_sync(&wdev->timer); } clk_disable_unprepare(wdev->clk); @@ -384,19 +345,1
[PATCH 7/8] watchdog: gpio_wdt: Convert to use infrastructure triggered keepalives
The watchdog infrastructure now supports handling watchdog keepalive if the watchdog is running while the watchdog device is closed. The infrastructure now also supports generating additional heartbeats if the maximum hardware timeout is smaller than or close to the configured timeout. Convert the driver to use this infrastructure. Signed-off-by: Guenter Roeck --- drivers/watchdog/gpio_wdt.c | 65 - 1 file changed, 11 insertions(+), 54 deletions(-) diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c index 1687cc2d7122..cbbdae440bfa 100644 --- a/drivers/watchdog/gpio_wdt.c +++ b/drivers/watchdog/gpio_wdt.c @@ -32,12 +32,8 @@ struct gpio_wdt_priv { boolactive_low; boolstate; boolalways_running; - boolarmed; unsigned inthw_algo; - unsigned inthw_margin; - unsigned long last_jiffies; struct notifier_block notifier; - struct timer_list timer; struct watchdog_device wdd; }; @@ -50,20 +46,12 @@ static void gpio_wdt_disable(struct gpio_wdt_priv *priv) gpio_direction_input(priv->gpio); } -static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv) -{ - priv->state = priv->active_low; - gpio_direction_output(priv->gpio, priv->state); - priv->last_jiffies = jiffies; - mod_timer(&priv->timer, priv->last_jiffies + priv->hw_margin); -} - static int gpio_wdt_start(struct watchdog_device *wdd) { struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); - gpio_wdt_start_impl(priv); - priv->armed = true; + priv->state = priv->active_low; + gpio_direction_output(priv->gpio, priv->state); return 0; } @@ -72,10 +60,9 @@ static int gpio_wdt_stop(struct watchdog_device *wdd) { struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); - priv->armed = false; if (!priv->always_running) { - mod_timer(&priv->timer, 0); gpio_wdt_disable(priv); + clear_bit(WDOG_RUNNING, &priv->wdd.status); } return 0; @@ -85,32 +72,6 @@ static int gpio_wdt_ping(struct watchdog_device *wdd) { struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); - priv->last_jiffies = jiffies; - - return 0; -} - -static int gpio_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t) -{ - wdd->timeout = t; - - return gpio_wdt_ping(wdd); -} - -static void gpio_wdt_hwping(unsigned long data) -{ - struct watchdog_device *wdd = (struct watchdog_device *)data; - struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd); - - if (priv->armed && time_after(jiffies, priv->last_jiffies + - msecs_to_jiffies(wdd->timeout * 1000))) { - dev_crit(wdd->dev, "Timer expired. System will reboot soon!\n"); - return; - } - - /* Restart timer */ - mod_timer(&priv->timer, jiffies + priv->hw_margin); - switch (priv->hw_algo) { case HW_ALGO_TOGGLE: /* Toggle output pin */ @@ -124,6 +85,8 @@ static void gpio_wdt_hwping(unsigned long data) gpio_set_value_cansleep(priv->gpio, priv->active_low); break; } + + return 0; } static int gpio_wdt_notify_sys(struct notifier_block *nb, unsigned long code, @@ -132,12 +95,10 @@ static int gpio_wdt_notify_sys(struct notifier_block *nb, unsigned long code, struct gpio_wdt_priv *priv = container_of(nb, struct gpio_wdt_priv, notifier); - mod_timer(&priv->timer, 0); - switch (code) { case SYS_HALT: case SYS_POWER_OFF: - gpio_wdt_disable(priv); + gpio_wdt_stop(&priv->wdd); break; default: break; @@ -157,7 +118,6 @@ static const struct watchdog_ops gpio_wdt_ops = { .start = gpio_wdt_start, .stop = gpio_wdt_stop, .ping = gpio_wdt_ping, - .set_timeout= gpio_wdt_set_timeout, }; static int gpio_wdt_probe(struct platform_device *pdev) @@ -205,9 +165,6 @@ static int gpio_wdt_probe(struct platform_device *pdev) if (hw_margin < 2 || hw_margin > 65535) return -EINVAL; - /* Use safe value (1/2 of real timeout) */ - priv->hw_margin = msecs_to_jiffies(hw_margin / 2); - priv->always_running = of_property_read_bool(pdev->dev.of_node, "always-running"); @@ -217,11 +174,15 @@ static int gpio_wdt_probe(struct platform_device *pdev) priv->wdd.ops = &gpio_wdt_ops; priv->wdd.min_timeout = SOFT_TIMEOUT_MIN; priv->wdd.max_timeout = SOFT_TIMEOUT_MAX; + priv->wdd.max_hw_timeout_m
[PATCH 6/8] watchdog: retu: Convert to use infrastructure triggered keepalives
The watchdog infrastructure now supports handling watchdog keepalive if the watchdog is running while the watchdog device is closed. Convert the driver to use this infrastructure. Signed-off-by: Guenter Roeck --- drivers/watchdog/retu_wdt.c | 78 - 1 file changed, 7 insertions(+), 71 deletions(-) diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c index b7c68e275aeb..ce2982a7670c 100644 --- a/drivers/watchdog/retu_wdt.c +++ b/drivers/watchdog/retu_wdt.c @@ -28,69 +28,22 @@ /* Watchdog timer values in seconds */ #define RETU_WDT_MAX_TIMER 63 -struct retu_wdt_dev { - struct retu_dev *rdev; - struct device *dev; - struct delayed_work ping_work; -}; - -/* - * Since Retu watchdog cannot be disabled in hardware, we must kick it - * with a timer until userspace watchdog software takes over. If - * CONFIG_WATCHDOG_NOWAYOUT is set, we never start the feeding. - */ -static void retu_wdt_ping_enable(struct retu_wdt_dev *wdev) -{ - retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); - schedule_delayed_work(&wdev->ping_work, - round_jiffies_relative(RETU_WDT_MAX_TIMER * HZ / 2)); -} - -static void retu_wdt_ping_disable(struct retu_wdt_dev *wdev) -{ - retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); - cancel_delayed_work_sync(&wdev->ping_work); -} - -static void retu_wdt_ping_work(struct work_struct *work) -{ - struct retu_wdt_dev *wdev = container_of(to_delayed_work(work), - struct retu_wdt_dev, ping_work); - retu_wdt_ping_enable(wdev); -} - static int retu_wdt_start(struct watchdog_device *wdog) { - struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + struct retu_dev *rdev = watchdog_get_drvdata(wdog); - retu_wdt_ping_disable(wdev); + set_bit(WDOG_RUNNING, &wdog->status); - return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); -} - -static int retu_wdt_stop(struct watchdog_device *wdog) -{ - struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); - - retu_wdt_ping_enable(wdev); - - return 0; -} - -static int retu_wdt_ping(struct watchdog_device *wdog) -{ - struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); - - return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); + return retu_write(rdev, RETU_REG_WATCHDOG, wdog->timeout); } static int retu_wdt_set_timeout(struct watchdog_device *wdog, unsigned int timeout) { - struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + struct retu_dev *rdev = watchdog_get_drvdata(wdog); wdog->timeout = timeout; - return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); + return retu_write(rdev, RETU_REG_WATCHDOG, wdog->timeout); } static const struct watchdog_info retu_wdt_info = { @@ -101,8 +54,6 @@ static const struct watchdog_info retu_wdt_info = { static const struct watchdog_ops retu_wdt_ops = { .owner = THIS_MODULE, .start = retu_wdt_start, - .stop = retu_wdt_stop, - .ping = retu_wdt_ping, .set_timeout= retu_wdt_set_timeout, }; @@ -111,39 +62,26 @@ static int retu_wdt_probe(struct platform_device *pdev) struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent); bool nowayout = WATCHDOG_NOWAYOUT; struct watchdog_device *retu_wdt; - struct retu_wdt_dev *wdev; int ret; retu_wdt = devm_kzalloc(&pdev->dev, sizeof(*retu_wdt), GFP_KERNEL); if (!retu_wdt) return -ENOMEM; - wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL); - if (!wdev) - return -ENOMEM; - retu_wdt->info = &retu_wdt_info; retu_wdt->ops = &retu_wdt_ops; retu_wdt->timeout = RETU_WDT_MAX_TIMER; retu_wdt->min_timeout = 0; retu_wdt->max_timeout = RETU_WDT_MAX_TIMER; - watchdog_set_drvdata(retu_wdt, wdev); + watchdog_set_drvdata(retu_wdt, rdev); watchdog_set_nowayout(retu_wdt, nowayout); - wdev->rdev = rdev; - wdev->dev = &pdev->dev; - - INIT_DELAYED_WORK(&wdev->ping_work, retu_wdt_ping_work); - ret = watchdog_register_device(retu_wdt); if (ret < 0) return ret; - if (nowayout) - retu_wdt_ping(retu_wdt); - else - retu_wdt_ping_enable(wdev); + retu_wdt_start(retu_wdt); platform_set_drvdata(pdev, retu_wdt); @@ -153,10 +91,8 @@ static int retu_wdt_probe(struct platform_device *pdev) static int retu_wdt_remove(struct platform_device *pdev) { struct watchdog_device *wdog = platform_get_drvdata(pdev); - struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); watchdog_unregi
[PATCH 2/8] watchdog: Introduce hardware maximum timeout in watchdog core
Introduce an optional hardware maximum timeout in the watchdog core. The hardware maximum timeout can be lower than the maximum timeout. Drivers can set the maximum hardare timeout value in the watchdog data structure. If the configured timeout exceeds half the value of the maximum hardware timeout, the watchdog core enables a timer function to assist sending keepalive requests to the watchdog driver. Cc: Timo Kokkonen Cc: Uwe Kleine-König Signed-off-by: Guenter Roeck --- Documentation/watchdog/watchdog-kernel-api.txt | 14 +++ drivers/watchdog/watchdog_dev.c| 121 + include/linux/watchdog.h | 21 - 3 files changed, 135 insertions(+), 21 deletions(-) diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt index d8b0d3367706..5fa085276874 100644 --- a/Documentation/watchdog/watchdog-kernel-api.txt +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -53,9 +53,12 @@ struct watchdog_device { unsigned int timeout; unsigned int min_timeout; unsigned int max_timeout; + unsigned int max_hw_timeout_ms; + unsigned long last_keepalive; void *driver_data; struct mutex lock; unsigned long status; + struct delayed_work work; struct list_head deferred; }; @@ -73,8 +76,18 @@ It contains following fields: additional information about the watchdog timer itself. (Like it's unique name) * ops: a pointer to the list of watchdog operations that the watchdog supports. * timeout: the watchdog timer's timeout value (in seconds). + This is the time after which the system will reboot if user space does + not send a heartbeat request if the watchdog device is opened. + This may or may not be the hardware watchdog timeout. See max_hw_timeout_ms + for more details. * min_timeout: the watchdog timer's minimum timeout value (in seconds). * max_timeout: the watchdog timer's maximum timeout value (in seconds). +* max_hw_timeout_ms: Maximum hardware timeout, in milli-seconds. May differ + from max_timeout. If set, the infrastructure will send a heartbeat to the + watchdog driver if 'timeout' is larger than 'max_hw_timeout / 2', + unless user space failed to ping the watchdog for 'timeout' seconds. +* last_keepalive: Time of most recent keepalive triggered from user space, + in jiffies. * bootstatus: status of the device after booting (reported with watchdog WDIOF_* status bits). * driver_data: a pointer to the drivers private data of a watchdog device. @@ -85,6 +98,7 @@ It contains following fields: information about the status of the device (Like: is the watchdog timer running/active, is the nowayout bit set, is the device opened via the /dev/watchdog interface or not, ...). +* work: Worker data structure for WatchDog Timer Driver Core internal use only. * deferred: entry in wtd_deferred_reg_list which is used to register early initialized watchdogs. diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 06171c73daf5..25849c1d6dc1 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -37,7 +37,9 @@ #include/* For the -ENODEV/... values */ #include /* For printk/panic/... */ #include /* For file operations */ +#include /* For timeout functions */ #include /* For watchdog specific items */ +#include/* For workqueue */ #include /* For handling misc devices */ #include /* For __init/__exit/... */ #include /* For copy_to_user/put_user/... */ @@ -49,6 +51,53 @@ static dev_t watchdog_devt; /* the watchdog device behind /dev/watchdog */ static struct watchdog_device *old_wdd; +static struct workqueue_struct *watchdog_wq; + +static inline bool watchdog_need_worker(struct watchdog_device *wdd) +{ + unsigned int hm = wdd->max_hw_timeout_ms; + unsigned int m = wdd->max_timeout * 1000; + + return watchdog_active(wdd) && hm && hm != m && + wdd->timeout * 500 > hm; +} + +static inline void watchdog_update_worker(struct watchdog_device *wdd, + bool cancel, bool sync) +{ + if (watchdog_need_worker(wdd)) { + unsigned int t = wdd->timeout * 1000; + + if (wdd->max_hw_timeout_ms && t > wdd->max_hw_timeout_ms) + t = wdd->max_hw_timeout_ms; + queue_delayed_work(watchdog_wq, &wdd->work, + msecs_to_jiffies(t / 2)); + } else if (cancel) { + if (sync) + cancel_delayed_work_sync(&wdd->work); + else + cancel_delayed_work(&wdd->work); + } +} + +static int _watchdog_ping(struct watchdog_device *wdd) +{ + int err; + + if (test_bit(WDOG_UNREGISTERED, &wdd->status)) + return -ENODEV; + + if (!watchdog_active(wdd)) +
[PATCH 0/8] watchdog: Add support for keepalives triggered by infrastructure
The watchdog infrastructure is currently purely passive, meaning it only passes information from user space to drivers and vice versa. Since watchdog hardware tends to have its own quirks, this can result in quite complex watchdog drivers. A number of scanarios are especially common. - A watchdog is always active and can not be disabled, or can not be disabled once enabled. To support such hardware, watchdog drivers have to implement their own timers and use those timers to trigger watchdog keepalives while the watchdog device is not or not yet opened. - A variant of this is the desire to enable a watchdog as soon as its driver has been instantiated, to protect the system while it is still booting up, but the watchdog daemon is not yet running. - Some watchdogs have a very short maximum timeout, in the range of just a few seconds. Such low timeouts are difficult if not impossible to support from user space. Drivers supporting such watchdog hardware need to implement a timer function to augment heartbeats from user space. This patch set solves the above problems while keeping changes to the watchdog core minimal. - A new status flag, WDOG_RUNNING, informs the watchdog subsystem that a watchdog is running, and that the watchdog subsystem needs to generate heartbeat requests while the associated watchdog device is closed. - A new parameter in the watchdog data structure, max_hw_timeout_ms, informs the watchdog subsystem about a maximum hardware timeout. The watchdog subsystem uses this information together with the configured timeout and the maximum permitted timeout to determine if it needs to generate additional heartbeat requests. Patch #1 is a preparatory patch. Patch #2 adds timer functionality to the watchdog core. It solves the problem of short maximum hardware timeouts by augmenting heartbeats triggered from user space with internally triggered heartbeats. Patch #3 adds functionality to generate heartbeats while the watchdog device is closed. It handles situation where where the watchdog is running after the driver has been instantiated, but the device is not yet opened, and post-close situations necessary if a watchdog can not be stopped. Patch #4 makes the set_timeout function optional. This is now possible since timeout changes can now be completely handled in the watchdog core, for example if the hardware watchdog timeout is fixed. Patch #5 to #8 are example conversions of some watchdog drivers. Those patches will require testing. This patch set does not solve all limitations of the watchdog subsystem. Specifically, it does not add support for the following features. - It is desirable to be able to specify a maximum early timeout, from booting the system to opening the watchdog device. - Some watchdogs may require a minimum period of time between heartbeats. Examples are DA9062 and possibly AT91SAM9x. This and other features will be adddessed with subsequent patches. The patch set is inspired by an earlier patch set from Timo Kokonnen. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] staging/lustre: Properly reference kthread_run instead of cfs_daemonize
From: Oleg Drokin cfs_daemonize is long gone and replaced by a proper call to kthread_run, so update the comment to reflect that fact. Signed-off-by: Oleg Drokin --- drivers/staging/lustre/lustre/include/lustre_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 77a7de9..3bb2f8b 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -2183,7 +2183,7 @@ struct ptlrpcd_ctl { */ struct ptlrpc_request_set *pc_set; /** -* Thread name used in cfs_daemonize() +* Thread name used in kthread_run() */ charpc_name[16]; /** -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] staging/lustre/ptlrpc: Remove stray cfs_daemonize comment
From: Oleg Drokin Ever since daemonize was removed in 3.18, there are no longer any flags passed to kthread_run. Most of the comments were deleted, but this one lingered on until now. Signed-off-by: Oleg Drokin --- drivers/staging/lustre/lustre/ptlrpc/pinger.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c index d05c37c..f8edb79 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c +++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c @@ -318,8 +318,6 @@ int ptlrpc_start_pinger(void) strcpy(pinger_thread.t_name, "ll_ping"); - /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we -* just drop the VM and FILES in cfs_daemonize_ctxt() right away. */ rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread, "%s", pinger_thread.t_name)); if (IS_ERR_VALUE(rc)) { -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 0/2] Lustre: remove cfs_daemonize from comments
From: Oleg Drokin cfs_daemonize was removed long ago, but I just stumbled upon a couple of instances where it was still referenced in the comments, so here are the patches to clean it up and not cause any unnecessary confusion. Oleg Drokin (2): staging/lustre/ptlrpc: Remove stray daemonize comment staging/lustre: Properly reference kthread_run instead of cfs_daemonize drivers/staging/lustre/lustre/include/lustre_net.h | 2 +- drivers/staging/lustre/lustre/ptlrpc/pinger.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks
Hi Stephen, On 04/08/15 09:14, Stephen Boyd wrote: On 08/03/2015 06:03 PM, Lin Huang wrote: From: huang lin pclk_pd_pmu needs to keep running and with the upcoming gpio clock handling this is not always the case anymore. So add it to the list of critical clocks for now. Signed-off-by: Heiko Stuebner Signed-off-by: Lin Huang From: says huang lin, first signed-off-by is Heiko Stuebner, and final signed-off-by is Lin Huang... who actually authored this patch? Is Lin Huang the same person as huang lin ? This patch writed by Heiko and upload by me, i am sorry confuse you with my name, yes, Lin Huang and huang lin is the same person, it is all me. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH V3 request from stable 3.10 to 3.14] efi: fix 32bit kernel boot failed problem using efi
From: Fupan Li Commit 35d5134b7d5a ("x86/efi: Correct EFI boot stub use of code32_start") imported a bug, which will cause 32bit kernel boot failed using efi method. It should use the label's address instead of the value stored in the label to caculate the address of code32_start. Signed-off-by: Fupan Li --- arch/x86/boot/compressed/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index abb988a..3b28eff 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -54,7 +54,7 @@ ENTRY(efi_pe_entry) callreloc reloc: popl%ecx - sublreloc, %ecx + subl$reloc, %ecx movl%ecx, BP_code32_start(%eax) sub $0x4, %esp -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] serial: don't announce CIR serial ports
Hi Maciej, On 08/02/2015 05:09 PM, Maciej S. Szmigiero wrote: > CIR type serial ports aren't real serial ports. > This is just a way to prevent legacy serial driver > from probing and eventually binding some resources > so don't announce them like normal serial ports. I'd like to keep some form of reporting so that we know the port was properly probed; what about extending uart_report_port() to including CIR + disabled status? Secondly, good catch! Because we should not be trying to register a console on this port, nor driving modem signals. So maybe an early exit after uart_report_port? Regards, Peter Hurley > Signed-off-by: Maciej Szmigiero > --- > drivers/tty/serial/serial_core.c |2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/drivers/tty/serial/serial_core.c > b/drivers/tty/serial/serial_core.c > index f368520..99f944d 100644 > --- a/drivers/tty/serial/serial_core.c > +++ b/drivers/tty/serial/serial_core.c > @@ -2237,7 +2237,7 @@ uart_configure_port(struct uart_driver *drv, struct > uart_state *state, > port->ops->config_port(port, flags); > } > > - if (port->type != PORT_UNKNOWN) { > + if (port->type != PORT_UNKNOWN && port->type != PORT_8250_CIR) { > unsigned long flags; > > uart_report_port(drv, port); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v3 01/11] user_ns: 3 new LSM hooks for user namespace operations
On Mon, Aug 3, 2015 at 4:34 AM, Lukasz Pawelczyk wrote: > On pią, 2015-07-31 at 22:48 -0500, Serge E. Hallyn wrote: >> On Fri, Jul 31, 2015 at 11:28:56AM +0200, Lukasz Pawelczyk wrote: >> > On czw, 2015-07-30 at 16:30 -0500, Serge E. Hallyn wrote: >> > > On Fri, Jul 24, 2015 at 12:04:35PM +0200, Lukasz Pawelczyk wrote: >> > > > @@ -969,6 +982,7 @@ static int userns_install(struct nsproxy >> > > > *nsproxy, struct ns_common *ns) >> > > > { >> > > > struct user_namespace *user_ns = to_user_ns(ns); >> > > > struct cred *cred; >> > > > + int err; >> > > > >> > > > /* Don't allow gaining capabilities by reentering >> > > > * the same user namespace. >> > > > @@ -986,6 +1000,10 @@ static int userns_install(struct nsproxy >> > > > *nsproxy, struct ns_common *ns) >> > > > if (!ns_capable(user_ns, CAP_SYS_ADMIN)) >> > > > return -EPERM; >> > > > >> > > > + err = security_userns_setns(nsproxy, user_ns); >> > > > + if (err) >> > > > + return err; >> > > >> > > So at this point the LSM thinks current is in the new ns. If >> > > prepare_creds() fails below, should it be informed of that? >> > > (Or am I over-thinking this?) >> > > >> > > > + >> > > > cred = prepare_creds(); >> > > > if (!cred) >> > > > return -ENOMEM; >> > >> > Hmm, the use case for this hook I had in mind was just to allow or >> > disallow the operation based on the information passed in >> > arguments. >> > Not to register the current in any way so LSM can think it is or >> > isn't >> > in the new namespace. >> > >> > I think that any other LSM check that would like to know in what >> > namespace the current is, would just check that from current's >> > creds. >> > Not use some stale and duplicated information the above hook could >> > have >> > registered. >> > >> > I see no reason for this hook to change the LSM state, only to >> > answer >> > the question: allowed/disallowed (eventually return an error cause >> > it >> > is unable to give an answer which falls into the disallow >> > category). >> >> How about renaming it "security_userns_may_setns()" for clarity? > > I personally have nothing against it. However looking at already > existing hooks only one of them has "may" in the name (unix_may_send) > while a lot clearly have exactly this purpose (e.g. most of inode_* > family, some from file_* and task_*). So it seems the trend is against > it. > > What do you think? Anyone else has an opinion? Personally, I prefer that hooks be named as closely to their caller, or calling context, as possible. In this case, it seems like "may" is implied. It's an LSM like all the others, so it can fail, which would cause the caller to fail too, so "may" tends to be implicit. I would leave it as-is, but I could be convinced otherwise. -Kees -- Kees Cook Chrome OS Security -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/2] x86/ldt: allow to disable modify_ldt at runtime
On Mon, Aug 3, 2015 at 4:19 PM, Willy Tarreau wrote: > On Mon, Aug 03, 2015 at 03:35:15PM -0700, Kees Cook wrote: >> Yay for perm disable! Thank you! :) > > Andy would like to see this evolve towards something possibly > more complete and/or generic. I think this needs more thoughts > and that we should possibly stick to 0/1 for now and decide how > we want to make this evolve later to cover permanent disable, > various ABIs, etc... > > What do you think ? That's probably the best way forward. I still think a generic syscall disabling feature would be nice. :) I won't have time to work on it for a little while, though. -Kees -- Kees Cook Chrome OS Security -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/7] cpufreq: opp: fix handling of turbo modes
On 30.07.2015 23:37, Kukjin Kim wrote: > On 07/27/15 20:47, Bartlomiej Zolnierkiewicz wrote: >> On Monday, July 27, 2015 05:06:41 PM Viresh Kumar wrote: >>> On 27-07-15, 13:14, Bartlomiej Zolnierkiewicz wrote: Sorry but you don't seem to understand the issue. >>> >>> :) >>> >>> No, I did. I understand that if someone uses opp bindings today with >>> some entries as turbo OPPs, cpufreq will use them as normal >>> frequencies. And that may harm the board. >>> >>> BUT, opp-v2 code isn't ready to be used yet. And platforms should see >>> what all is implemented before trying to use them. >> >> OK. >> >>> All I was saying is, this isn't a FIX as we haven't introduced the >>> feature yet. Otherwise I had no issues with the patch. >> >> I will update the description for the next patchset revision. >> > Hi Bart, > > When will you re-post v3? Because I have a plan to send a pull-request > to arm-soc until this weekend... Dear Kukjin, We are already at 4.2-rc5 and you did not send the pull request before the weekend as you said. It is really late and there is no special reason for delaying the request. What happened? Best regards, Krzysztof -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
linux-next: manual merge of the security tree with Linus' tree
Hi James, Today's linux-next merge of the security tree got a conflict in: security/yama/yama_lsm.c between commit: 5413fcdbe9e7 ("Adding YAMA hooks also when YAMA is not stacked.") from Linus' tree and commit: 730daa164e7c ("Yama: remove needless CONFIG_SECURITY_YAMA_STACKED") from the security tree. I fixed it up (the latter removed the code updated by the former, so I just did that) and can carry the fix as necessary (no action is required). -- Cheers, Stephen Rothwells...@canb.auug.org.au -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 1/2] clk: rockchip: add pclk_pd_pmu to the list of rk3288 critical clocks
On 08/03/2015 06:03 PM, Lin Huang wrote: From: huang lin pclk_pd_pmu needs to keep running and with the upcoming gpio clock handling this is not always the case anymore. So add it to the list of critical clocks for now. Signed-off-by: Heiko Stuebner Signed-off-by: Lin Huang From: says huang lin, first signed-off-by is Heiko Stuebner, and final signed-off-by is Lin Huang... who actually authored this patch? Is Lin Huang the same person as huang lin ? -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mm: add the block to the tail of the list in expand()
On 2015/8/3 12:10, Dave Hansen wrote: > On 08/02/2015 07:05 PM, Xishi Qiu wrote: Also, this might not do very much good in practice. If you are splitting a high-order page, you are doing the split because the lower-order lists are empty. So won't that list_add() be to an empty >> >> I made a mistake, you are right, all the lower-order lists are empty, >> so it is no sense to add to the tail. > > I actually tested this experimentally and the lists are not always > empty. It's probably __rmqueue_smallest() vs. __rmqueue_fallback() logic. > > In any case, you might want to double-check. > Hi Dave, How did you do the experiment? Thanks, Xishi Qiu > . > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: cgroup/loop Bad page state oops in Linux v4.2-rc3-136-g45b4b782e848
On Mon, Aug 3, 2015 at 12:56 PM, Josh Boyer wrote: > On Mon, Aug 3, 2015 at 10:28 AM, Mike Snitzer wrote: >> On Sun, Aug 02 2015 at 10:01P -0400, >> Josh Boyer wrote: >> >>> On Fri, Jul 31, 2015 at 2:58 PM, Josh Boyer >>> wrote: >>> > On Thu, Jul 30, 2015 at 8:19 PM, Mike Snitzer wrote: >>> >> >>> >> The only commit that looks even remotely related (given 32bit concerns) >>> >> would be 1c220c69ce0dcc0f234a9f263ad9c0864f971852 >>> > >>> > Confirmed. I built kernels for our tester that started with the >>> > working snapshot and applied the patches above one at a time. The >>> > failing patch was the commit you suspected. >>> > >>> > I can try and build a 4.2-rc4 kernel with that reverted, but it would >>> > be good if someone could start thinking about how that could cause >>> > this issue. >>> >>> A revert on top of 4.2-rc4 booted. So this is currently causing >>> issues with upstream as well. >> >> Hi Josh, >> >> I've staged the following fix in linux-next (for 4.2-rc6 inclusion): >> https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=for-next&id=76270d574acc897178a5c8be0bd2a743a77e4bac >> >> Can you please verify that it works for your 32bit testcase against >> 4.2-rc4 (or rc5)? > > Sure, I'll get a kernel with this included spun up and ask Adam to test. Adam tested this with success. If you're still collecting patch metadata, adding: Tested-by: Adam Williamson would be appreciated. josh >> From: Mike Snitzer >> Date: Mon, 3 Aug 2015 09:54:58 -0400 >> Subject: [PATCH] dm: fix dm_merge_bvec regression on 32 bit systems >> >> A DM regression on 32 bit systems was reported against v4.2-rc3 here: >> https://lkml.org/lkml/2015/7/29/401 >> >> Fix this by reverting both commit 1c220c69 ("dm: fix casting bug in >> dm_merge_bvec()") and 148e51ba ("dm: improve documentation and code >> clarity in dm_merge_bvec"). This combined revert is done to eliminate >> the possibility of a partial revert in stable@ kernels. >> >> In hindsight the correct fix, at the time 1c220c69 was applied to fix >> the regression that 148e51ba introduced, should've been to simply revert >> 148e51ba. >> >> Reported-by: Josh Boyer >> Acked-by: Joe Thornber >> Signed-off-by: Mike Snitzer >> Cc: sta...@vger.kernel.org # 3.19+ >> --- >> drivers/md/dm.c | 27 ++- >> 1 file changed, 10 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/md/dm.c b/drivers/md/dm.c >> index ab37ae1..0d7ab20 100644 >> --- a/drivers/md/dm.c >> +++ b/drivers/md/dm.c >> @@ -1729,7 +1729,8 @@ static int dm_merge_bvec(struct request_queue *q, >> struct mapped_device *md = q->queuedata; >> struct dm_table *map = dm_get_live_table_fast(md); >> struct dm_target *ti; >> - sector_t max_sectors, max_size = 0; >> + sector_t max_sectors; >> + int max_size = 0; >> >> if (unlikely(!map)) >> goto out; >> @@ -1742,18 +1743,10 @@ static int dm_merge_bvec(struct request_queue *q, >> * Find maximum amount of I/O that won't need splitting >> */ >> max_sectors = min(max_io_len(bvm->bi_sector, ti), >> - (sector_t) queue_max_sectors(q)); >> + (sector_t) BIO_MAX_SECTORS); >> max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; >> - >> - /* >> -* FIXME: this stop-gap fix _must_ be cleaned up (by passing a >> sector_t >> -* to the targets' merge function since it holds sectors not bytes). >> -* Just doing this as an interim fix for stable@ because the more >> -* comprehensive cleanup of switching to sector_t will impact every >> -* DM target that implements a ->merge hook. >> -*/ >> - if (max_size > INT_MAX) >> - max_size = INT_MAX; >> + if (max_size < 0) >> + max_size = 0; >> >> /* >> * merge_bvec_fn() returns number of bytes >> @@ -1761,13 +1754,13 @@ static int dm_merge_bvec(struct request_queue *q, >> * max is precomputed maximal io size >> */ >> if (max_size && ti->type->merge) >> - max_size = ti->type->merge(ti, bvm, biovec, (int) max_size); >> + max_size = ti->type->merge(ti, bvm, biovec, max_size); >> /* >> * If the target doesn't support merge method and some of the devices >> -* provided their merge_bvec method (we know this by looking for the >> -* max_hw_sectors that dm_set_device_limits may set), then we can't >> -* allow bios with multiple vector entries. So always set max_size >> -* to 0, and the code below allows just one page. >> +* provided their merge_bvec method (we know this by looking at >> +* queue_max_hw_sectors), then we can't allow bios with multiple >> vector >> +* entries. So always set max_size to 0, and the code below allows >> +* just one page. >> */ >> else if (queue_ma
Re: [PATCH] x86: Clean up files of Intel Processor Trace
On 2015/08/03 20:03, Borislav Petkov wrote: > On Mon, Aug 03, 2015 at 11:08:07AM +0200, Peter Zijlstra wrote: >> For those of us suffering OCDs and all, its a good change though. The >> alfabet song does go: A, B, C, D etc.. after all. Not: A, C, D, B ... > > ... except that x86 encoding orders regs like it was originally: AX, > CX, DX, BX, ... Don't ask me why - looks like someone thought that the > C (count) and D (double precision - AX extension) registers were more > important than B (base). > > Or someone was simply illiterate. > I thought this was typo. If it is intentional, I'll keep it intact. Thanks, Takao Indoh -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH v2 2/2] pinctrl: rockchip: only enable gpio clock when it setting
From: huang lin gpio can keep state even the clock disable, for save power consumption, only enable gpio clock when it setting Signed-off-by: Heiko Stuebner Signed-off-by: Lin Huang --- Changes in v2: Advices by Douglas Anderson -use readl_relaxed() instead readl() -fix commit message format error drivers/pinctrl/pinctrl-rockchip.c | 57 +++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index cc2843a..70a4539 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -945,6 +945,7 @@ static int _rockchip_pmx_gpio_set_direction(struct gpio_chip *chip, if (ret < 0) return ret; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR); @@ -953,9 +954,11 @@ static int _rockchip_pmx_gpio_set_direction(struct gpio_chip *chip, data |= BIT(pin); else data &= ~BIT(pin); + writel_relaxed(data, bank->reg_base + GPIO_SWPORT_DDR); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return 0; } @@ -1389,6 +1392,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value) unsigned long flags; u32 data; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl(reg); @@ -1398,6 +1402,7 @@ static void rockchip_gpio_set(struct gpio_chip *gc, unsigned offset, int value) writel(data, reg); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); } /* @@ -1409,7 +1414,9 @@ static int rockchip_gpio_get(struct gpio_chip *gc, unsigned offset) struct rockchip_pin_bank *bank = gc_to_pin_bank(gc); u32 data; + clk_enable(bank->clk); data = readl(bank->reg_base + GPIO_EXT_PORT); + clk_disable(bank->clk); data >>= offset; data &= 1; return data; @@ -1546,6 +1553,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) if (ret < 0) return ret; + clk_enable(bank->clk); spin_lock_irqsave(&bank->slock, flags); data = readl_relaxed(bank->reg_base + GPIO_SWPORT_DDR); @@ -1603,6 +1611,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) default: irq_gc_unlock(gc); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return -EINVAL; } @@ -1611,6 +1620,7 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) irq_gc_unlock(gc); spin_unlock_irqrestore(&bank->slock, flags); + clk_disable(bank->clk); return 0; } @@ -1620,8 +1630,10 @@ static void rockchip_irq_suspend(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct rockchip_pin_bank *bank = gc->private; + clk_enable(bank->clk); bank->saved_masks = irq_reg_readl(gc, GPIO_INTMASK); irq_reg_writel(gc, ~gc->wake_active, GPIO_INTMASK); + clk_disable(bank->clk); } static void rockchip_irq_resume(struct irq_data *d) @@ -1629,7 +1641,27 @@ static void rockchip_irq_resume(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct rockchip_pin_bank *bank = gc->private; + clk_enable(bank->clk); irq_reg_writel(gc, bank->saved_masks, GPIO_INTMASK); + clk_disable(bank->clk); +} + +static void rockchip_irq_gc_mask_clr_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + clk_enable(bank->clk); + irq_gc_mask_clr_bit(d); +} + +void rockchip_irq_gc_mask_set_bit(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + irq_gc_mask_set_bit(d); + clk_disable(bank->clk); } static int rockchip_interrupts_register(struct platform_device *pdev, @@ -1640,7 +1672,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; struct irq_chip_generic *gc; int ret; - int i; + int i, j; for (i = 0; i < ctrl->nr_banks; ++i, ++bank) { if (!bank->valid) { @@ -1649,11 +1681,19 @@ static int rockchip_interrupts_register(struct platform_device *pdev, continue; } + ret = clk_enable(bank->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable clock for bank %s\n", + bank->name); + conti