date:20181214

[PATCH] cxgb4: remove DEFINE_SIMPLE_DEBUGFS_FILE()

2018-12-14 Thread Yangtao Li

We already have the DEFINE_SHOW_ATTRIBUTE. There is no need to define
such a macro, so remove DEFINE_SIMPLE_DEBUGFS_FILE. Also use the
DEFINE_SHOW_ATTRIBUTE macro to simplify some code.

Signed-off-by: Yangtao Li 
---
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c| 113 --
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.h|  13 --
 .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c   |  16 +--
 3 files changed, 25 insertions(+), 117 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c 
b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index cab492ec8f59..b0ff9fa183f4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -378,19 +378,7 @@ static int cim_qcfg_show(struct seq_file *seq, void *v)
   QUEREMFLITS_G(p[2]) * 16);
return 0;
 }
-
-static int cim_qcfg_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, cim_qcfg_show, inode->i_private);
-}
-
-static const struct file_operations cim_qcfg_fops = {
-   .owner   = THIS_MODULE,
-   .open= cim_qcfg_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(cim_qcfg);
 
 static int cimq_show(struct seq_file *seq, void *v, int idx)
 {
@@ -860,8 +848,7 @@ static int tx_rate_show(struct seq_file *seq, void *v)
}
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tx_rate);
+DEFINE_SHOW_ATTRIBUTE(tx_rate);
 
 static int cctrl_tbl_show(struct seq_file *seq, void *v)
 {
@@ -893,8 +880,7 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v)
kfree(incr);
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(cctrl_tbl);
+DEFINE_SHOW_ATTRIBUTE(cctrl_tbl);
 
 /* Format a value in a unit that differs from the value's native unit by the
  * given factor.
@@ -955,8 +941,7 @@ static int clk_show(struct seq_file *seq, void *v)
 
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(clk);
+DEFINE_SHOW_ATTRIBUTE(clk);
 
 /* Firmware Device Log dump. */
 static const char * const devlog_level_strings[] = {
@@ -1990,22 +1975,10 @@ static int sensors_show(struct seq_file *seq, void *v)
 
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(sensors);
+DEFINE_SHOW_ATTRIBUTE(sensors);
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int clip_tbl_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, clip_tbl_show, inode->i_private);
-}
-
-static const struct file_operations clip_tbl_debugfs_fops = {
-   .owner   = THIS_MODULE,
-   .open= clip_tbl_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = single_release
-};
+DEFINE_SHOW_ATTRIBUTE(clip_tbl);
 #endif
 
 /*RSS Table.
@@ -2208,8 +2181,7 @@ static int rss_config_show(struct seq_file *seq, void *v)
 
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(rss_config);
+DEFINE_SHOW_ATTRIBUTE(rss_config);
 
 /* RSS Secret Key.
  */
@@ -2628,19 +2600,7 @@ static int resources_show(struct seq_file *seq, void *v)
 
return 0;
 }
-
-static int resources_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, resources_show, inode->i_private);
-}
-
-static const struct file_operations resources_debugfs_fops = {
-   .owner   = THIS_MODULE,
-   .open= resources_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = seq_release,
-};
+DEFINE_SHOW_ATTRIBUTE(resources);
 
 /**
  * ethqset2pinfo - return port_info of an Ethernet Queue Set
@@ -3233,8 +3193,7 @@ static int tid_info_show(struct seq_file *seq, void *v)
   t4_read_reg(adap, LE_DB_ACT_CNT_IPV6_A));
return 0;
 }
-
-DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
+DEFINE_SHOW_ATTRIBUTE(tid_info);
 
 static void add_debugfs_mem(struct adapter *adap, const char *name,
unsigned int idx, unsigned int size_mb)
@@ -3364,21 +3323,9 @@ static int meminfo_show(struct seq_file *seq, void *v)
 
return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(meminfo);
 
-static int meminfo_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, meminfo_show, inode->i_private);
-}
-
-static const struct file_operations meminfo_fops = {
-   .owner   = THIS_MODULE,
-   .open= meminfo_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = single_release,
-};
-
-static int chcr_show(struct seq_file *seq, void *v)
+static int chcr_stats_show(struct seq_file *seq, void *v)
 {
struct adapter *adap = seq->private;
 
@@ -3399,20 +3346,7 @@ static int chcr_show(struct seq_file *seq, void *v)
   atomic_read(>chcr_stats.ipsec_cnt));
return 0;
 }
-
-
-static int chcr_stats_open(struct inode *inode, struct file *file)
-{
-return single_open(file, chcr_show, inode->i_private);
-}
-
-static const struct file_operations chcr_stats_debugfs_fops = {
-.owner   = THIS_MODULE,
-.open=

Re: Can we drop upstream Linux x32 support?

2018-12-14 Thread Thomas Schoebel-Theuer


On 12/14/18 22:41, Thomas Schöbel-Theuer wrote:

On 12/14/18 22:24, Andy Lutomirski wrote:


I'm talking about x32, which is a different beast.



So from my viewpoint the mentioned roadmap / timing requirements will 
remain the same, whatever you are dropping.


Enterprise-critical use cases will probably need to be migrated to 
KVM/qemu together with their old kernel versions, anyway (because the 
original hardware will be no longer available in a few decades).




Here is a systematic approach to the problem.


AFAICS legacy 32bit userspace code (which exists in some notable masses) 
can be executed at least in the following ways:



1) natively on 32bit-capable hardware, under 32bit kernels. Besides 
legacy hardware, this also encompasses most current Intel / AMD 64bit 
hardware in 32bit compatibility mode.


2) under 64bit kernels, using the 32bit compat layer from practically 
any kernel version.


3) under KVM/qemu.


When you just drop 1), users have a fair chance by migrating to any of 
the other two possibilities.


As explained, a time frame of ~5 years should work for the vast majority.

If you clearly explain the migration paths to your users (and to the 
press), I think it will be acceptable.



[side note: I know of a single legacy instance which is now ~20 years 
old, but makes a revenue of several millions per month. These guys have 
large quantities of legacy hardware in stock. And they have enough money 
to hire a downstream maintainer in case of emergency.]



Fatal problems would only arise if you would drop all three 
possibilities in the very long term.



In ~100 years, possibility 3) should be sufficient for handling use 
cases like preservation of historic documents. The latter is roughly 
equivalent to running binary-only MSDOS, Windows NT, and similar, even 
in 100 years, and even non-natively under future hardware architectures.

[PATCH] 6lowpan: convert to DEFINE_SHOW_ATTRIBUTE

2018-12-14 Thread Yangtao Li

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Yangtao Li 
---
 net/6lowpan/debugfs.c | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 24915e0bb9ea..6c152f9ea26e 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -232,18 +232,7 @@ static int lowpan_context_show(struct seq_file *file, void 
*offset)
 
return 0;
 }
-
-static int lowpan_context_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, lowpan_context_show, inode->i_private);
-}
-
-static const struct file_operations lowpan_context_fops = {
-   .open   = lowpan_context_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(lowpan_context);
 
 static int lowpan_short_addr_get(void *data, u64 *val)
 {
-- 
2.17.0

[PATCH] ipconfig: convert to DEFINE_SHOW_ATTRIBUTE

2018-12-14 Thread Yangtao Li

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Yangtao Li 
---
 net/ipv4/ipconfig.c | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 88212615bf4c..fcb817d0eb24 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1361,18 +1361,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, 
void *v)
}
return 0;
 }
-
-static int ntp_servers_seq_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, ntp_servers_seq_show, NULL);
-}
-
-static const struct file_operations ntp_servers_seq_fops = {
-   .open   = ntp_servers_seq_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
 #endif /* CONFIG_PROC_FS */
 
 /*
-- 
2.17.0

[PATCH] sunrpc: convert to DEFINE_SHOW_ATTRIBUTE

2018-12-14 Thread Yangtao Li

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Yangtao Li 
---
 net/sunrpc/rpc_pipe.c | 19 +++
 net/sunrpc/stats.c| 14 +-
 2 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 4fda18d47e2c..69663681bf9d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1266,7 +1266,7 @@ static const struct rpc_pipe_ops gssd_dummy_pipe_ops = {
  * that this file will be there and have a certain format.
  */
 static int
-rpc_show_dummy_info(struct seq_file *m, void *v)
+rpc_dummy_info_show(struct seq_file *m, void *v)
 {
seq_printf(m, "RPC server: %s\n", utsname()->nodename);
seq_printf(m, "service: foo (1) version 0\n");
@@ -1275,25 +1275,12 @@ rpc_show_dummy_info(struct seq_file *m, void *v)
seq_printf(m, "port: 0\n");
return 0;
 }
-
-static int
-rpc_dummy_info_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, rpc_show_dummy_info, NULL);
-}
-
-static const struct file_operations rpc_dummy_info_operations = {
-   .owner  = THIS_MODULE,
-   .open   = rpc_dummy_info_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
 
 static const struct rpc_filelist gssd_dummy_info_file[] = {
[0] = {
.name = "info",
-   .i_fop = _dummy_info_operations,
+   .i_fop = _dummy_info_fops,
.mode = S_IFREG | 0400,
},
 };
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 71166b393732..d064fcec0b2e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -62,19 +62,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
}
return 0;
 }
-
-static int rpc_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, rpc_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations rpc_proc_fops = {
-   .owner = THIS_MODULE,
-   .open = rpc_proc_open,
-   .read  = seq_read,
-   .llseek = seq_lseek,
-   .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rpc_proc);
 
 /*
  * Get RPC server stats
-- 
2.17.0

Re: [ANNOUNCE] Call for Papers - linux.conf.au Kernel Miniconf, Christchurch NZ, 21-25 Jan 2019

2018-12-14 Thread Andrew Donnellan


On 21/10/18 12:10 pm, Andrew Donnellan wrote:
The linux.conf.au Kernel Miniconf is happening once again, this time in 
Christchurch on 22 Jan 2019.


*** Submissions close on 2018-12-16, 23:59 AoE, with early submissions 
(before 2018-11-16, 23:59 AoE) given priority. ***


This deadline is in just under 53 hours, and will not be extended.



*** Submission details: http://lca-kernel.ozlabs.org/2019-cfp.html ***

The Kernel Miniconf is a 1 day stream alongside the main LCA conference 
to talk about kernel stuff.


We invite submissions on anything related to kernel and low-level 
systems programming. We welcome submissions from developers of all 
levels of experience in the kernel community, covering a broad range of 
topics.


Past Kernel Miniconfs have included technical talks on topics such as 
memory management, RCU, scheduling and filesystems, as well as talks on 
Linux kernel community topics such as licensing and Linux kernel 
development process.


We strongly encourage both first-time and seasoned speakers from all 
backgrounds, ages, genders, nationalities, ethnicities, religions and 
abilities. Like the main LCA conference itself, we respect and encourage 
diversity at our miniconf.


Speakers will need to purchase an LCA ticket to attend.

See http://lca-kernel.ozlabs.org/2019-cfp.html for full details and the 
submission form.




--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited

Re: [PATCH v8 20/22] tracing: Use hist trigger's var_ref array to destroy var_refs

2018-12-14 Thread Namhyung Kim

Hi Tom and Masami,

On Sat, Dec 15, 2018 at 2:29 AM Tom Zanussi  wrote:
>
> Hi Masami,
>
> On Sat, 2018-12-15 at 01:31 +0900, Masami Hiramatsu wrote:
> > On Mon, 10 Dec 2018 18:01:34 -0600
> > Tom Zanussi  wrote:
> >
> > > From: Tom Zanussi 
> > >
> > > Since every var ref for a trigger has an entry in the var_ref[]
> > > array,
> > > use that to destroy the var_refs, instead of piecemeal via the
> > > field
> > > expressions.
> > >
> > > This allows us to avoid having to keep and treat differently
> > > separate
> > > lists for the action-related references, which future patches will
> > > remove.
> >
> > I'm not so sure, but this seems a bugfix... if so, please move this
> > at
> > the top of this series.
> >
>
> No, none of these are bugfixes - patches 17-22 are just some additional
> cleanup that try to make the code easier to follow after Namhyung's
> comments that things were confusing here.
>
> Actually, what I should probably do is make these a separate series..

Agreed, I also think it's getting bigger and having these cleanups
before the actual change would make things little bit easier IMHO.

Thanks,
Namhyung

Re: [PATCH v4 11/15] clocksource: Add clock driver for RDA8810PL SoC

2018-12-14 Thread Manivannan Sadhasivam

On Wed, Dec 12, 2018 at 04:52:58PM +0100, Daniel Lezcano wrote:
> On 12/12/2018 16:47, Manivannan Sadhasivam wrote:
> > Hi Daniel,
> > 
> > On Wed, Dec 12, 2018 at 04:07:53PM +0100, Daniel Lezcano wrote:
> >> On 10/12/2018 18:35, Manivannan Sadhasivam wrote:
> >>> Add clock driver for RDA Micro RDA8810PL SoC supporting OSTIMER
> >>> and HWTIMER.
> >>>
> >>> RDA8810PL has two independent timers: OSTIMER (56 bit) and HWTIMER
> >>> (64 bit). Each timer provides optional interrupt support. In this
> >>> driver, OSTIMER is used for clockevents and HWTIMER is used for
> >>> clocksource.
> >>>
> >>> Signed-off-by: Andreas Färber 
> >>> Signed-off-by: Manivannan Sadhasivam 
> >>
> >> The driver looks good to me. Do you want me to pick it up via my tree?
> >>
> > 
> > Yes, please do. Marc is going to pick up the irqchip driver but I'm not
> > sure about the serial driver. The rest of the patches can be picked up
> > by the ARM maintainers (I need to send another version for dropping
> > Andreas from MAINTAINERS).
> 
> Ok, applied.
>

Just to be sure before spinning next version, have you also picked up the
bindings patch? I can't find the commit(s) in your tree!

https://git.linaro.org/people/daniel.lezcano/linux.git/

Thanks,
Mani

> Thanks
> 
>   -- Daniel
> 
> 
> -- 
>   Linaro.org │ Open source software for ARM SoCs
> 
> Follow Linaro:   Facebook |
>  Twitter |
>  Blog
>

Re: [PATCH v2 0/3] PCIE support for i.MX8MQ

2018-12-14 Thread Andrey Smirnov

On Fri, Dec 14, 2018 at 12:38 PM Bjorn Helgaas  wrote:
>
> [+cc Trent]
>
> On Thu, Dec 06, 2018 at 12:15:50PM +, Lorenzo Pieralisi wrote:
> > On Wed, Dec 05, 2018 at 11:35:42PM -0800, Andrey Smirnov wrote:
> > > Everyone:
> > >
> > > This series contains changes I made in order to enable support of PCIE
> > > IP block on i.MX8MQ SoCs (full tree can be found at [github-v2]).
> > >
> > > NOTE: The last patch have a Kconfig symbol depenency on [imx8mq-kconfig].
> > >
> > > Changes since [v1]:
> > >
> > >  - Driver changed to use single "fsl,controller-id" property to
> > >distinguish between two intances of PCIE IP block
> > >
> > >  - All code pertaining to L1SS was dropped to simplify the patch
> > >
> > >  - Documented additions to DT bindings
> > >
> > > Feedback is welcome!
> > >
> > > Thanks,
> > > Andrey Smirnov
> >
> > Andrey,
> >
> > I have applied the patches, ...
>
> When I merged your pci/dwc branch, I noticed what looks like a merge
> conflict between these commits:
>
>   d123765b58ee ("PCI: dwc: Adjust Kconfig to allow IMX6 PCIe host on IMX7")
>   1a059d994189 ("PCI: imx: Add support for i.MX8MQ")
>
> d123765b58ee added SOC_IMX7D:
>
>   -   depends on SOC_IMX6Q || (ARM && COMPILE_TEST)
>   +   depends on SOC_IMX6Q || SOC_IMX7D || (ARM && COMPILE_TEST)
>
> and 1a059d994189 added SOC_IMX8MQ but dropped SOC_IMX7D again, which looks
> like a mistake:
>
>   -   depends on SOC_IMX6Q || SOC_IMX7D || (ARM && COMPILE_TEST)
>   +   depends on SOC_IMX8MQ || SOC_IMX6Q || (ARM && COMPILE_TEST)
>
> So my "next" branch[1] (current head 7053eeb009e0) is:
>
>   config PCI_IMX6
> bool "Freescale i.MX6/7 PCIe controller"
> depends on SOC_IMX8MQ || SOC_IMX6Q || (ARM && COMPILE_TEST)
> depends on PCI_MSI_IRQ_DOMAIN
> select PCIE_DW_HOST
>
> which I think is wrong.

Yeah, this is definitely wrong, should be SOC_IMX8MQ || SOC_IMX6Q ||
SOC_IMX7D || (ARM && COMPILE_TEST)

> Lorenzo, if you update your pci/dwc branch,  I'll rebuild my "next" branch.
>
> If you want to tweak the switch statements and/or IMX8 checks, you
> could do that at the same time.
>
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git/log/?h=next

OK, I'll use the branch above for v3 of the patches.

Thanks,
Andrey Smirnov

Re: [PATCH v2 3/3] PCI: imx: Add support for i.MX8MQ

2018-12-14 Thread Andrey Smirnov

On Fri, Dec 14, 2018 at 12:30 PM Bjorn Helgaas  wrote:
>
> [+cc Gustavo for fallthrough annotation]
>
> On Wed, Dec 05, 2018 at 11:35:45PM -0800, Andrey Smirnov wrote:
> > Add code needed to support i.MX8MQ variant.
>
> > @@ -245,7 +253,8 @@ static void imx6_pcie_reset_phy(struct imx6_pcie 
> > *imx6_pcie)
> >  {
> >   u32 tmp;
> >
> > - if (imx6_pcie->variant == IMX7D)
> > + if (imx6_pcie->variant == IMX7D ||
> > + imx6_pcie->variant == IMX8MQ)
>
> This style looks like a maintenance problem: the code below is probably
> IMX6-specific, and you should test for *that* instead of adding to this
> list of things that are *not* IMX6, because that list is likely to
> continue growing.  There are more occurrences below.
>

Makes sense, I'll update that patches and send a v3 out.

> > @@ -301,6 +312,7 @@ static void imx6_pcie_assert_core_reset(struct 
> > imx6_pcie *imx6_pcie)
> >
> >   switch (imx6_pcie->variant) {
> >   case IMX7D:
> > + case IMX8MQ: /* FALLTHROUGH */
> >   reset_control_assert(imx6_pcie->pciephy_reset);
> >   reset_control_assert(imx6_pcie->apps_reset);
> >   break;
>
> I'm not an expert on fallthrough annotation (Gustavo, cc'd, is), but
> this looks wrong.  It's the IMX7D case that falls through, not the
> IMX8MQ case.
>
> The recent annotations added by Gustavo are at the point where the
> "break" would normally be, e.g.,
>
>   case IMX7D:
> /* fall through */<--- annotation
>   case IMX8MQ:
> 
> break;
>
> But in this case there's actually no IMX7D-specific *code* there, so I
> suspect the annotation is unnecessary.  It's obvious that IMX7D and
> IMX8MQ are handled the same, so there's really no opportunity for the
> "forgotten break" mistake -Wimplicit-fallthrough is trying to find.
>
> If we *do* want this annotation, we should spell it the same as
> Gustavo has been, i.e., "fall through".
>
> Again, more occurrences below.

Yes, definitely, same mistake of mine was already caught elsewhere in
the tree https://lore.kernel.org/lkml/20181214144406.0dbff...@canb.auug.org.au/

I'll fix it in v3.

Thanks,
Andrey Smirnov

[PATCH 3/7] dt-bindings: riscv: cpus: add E51 cores to the list of documented CPUs

2018-12-14 Thread Paul Walmsley

Add compatible strings for the SiFive E51 family of CPU cores to the
RISC-V CPU compatible string documentation.  The E51 CPU core is
described in:

https://static.dev.sifive.com/FU540-C000-v1.0.pdf

Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 Documentation/devicetree/bindings/riscv/cpus.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/riscv/cpus.txt 
b/Documentation/devicetree/bindings/riscv/cpus.txt
index adf7b7af5dc3..fb9d4f86f41f 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.txt
+++ b/Documentation/devicetree/bindings/riscv/cpus.txt
@@ -68,8 +68,9 @@ described below.
 - compatible:
 Usage: required
 Value type: 
-Definition: must contain "riscv", may contain one of
-"sifive,rocket0"
+Definition: must contain "riscv", may contain one or
+   more of "sifive,rocket0", "sifive,e51",
+   "sifive,e5"
 - mmu-type:
 Usage: optional
 Value type: 
-- 
2.20.0

[PATCH 2/7] dt-bindings: riscv: sifive: add documentation for the SiFive FU540

2018-12-14 Thread Paul Walmsley

Add DT binding documentation for the SiFive FU540 SoC.  This
SoC is documented at:

https://static.dev.sifive.com/FU540-C000-v1.0.pdf

This file is originally based on
Documentation/devicetree/bindings/arm/ti/k3.txt.

Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 .../devicetree/bindings/riscv/sifive/fu540.txt | 14 ++
 MAINTAINERS|  9 +
 2 files changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/riscv/sifive/fu540.txt

diff --git a/Documentation/devicetree/bindings/riscv/sifive/fu540.txt 
b/Documentation/devicetree/bindings/riscv/sifive/fu540.txt
new file mode 100644
index ..b5cf2b9f9a75
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/sifive/fu540.txt
@@ -0,0 +1,14 @@
+SiFive FU540 SoC architecture device tree bindings
+--
+
+Platforms based on the SiFive FU540 RISC-V SoC should follow the
+scheme described below:
+
+SoCs
+
+
+Each device tree root node must specify one of the following
+compatible-string values:
+
+- Freedom Unleashed 540 rev. C000, or "FU540"
+  compatible = "sifive,fu540-c000", "sifive,fu540"
diff --git a/MAINTAINERS b/MAINTAINERS
index 8119141a926f..dee555ceb175 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13626,6 +13626,15 @@ S: Supported
 K: sifive
 N: sifive
 
+SIFIVE FU540 SYSTEM-ON-CHIP
+M: Paul Walmsley 
+M: Palmer Dabbelt 
+L: linux-ri...@lists.infradead.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git
+S: Supported
+K: fu540
+N: fu540
+
 SILEAD TOUCHSCREEN DRIVER
 M: Hans de Goede 
 L: linux-in...@vger.kernel.org
-- 
2.20.0

[PATCH 5/7] riscv: dts: add initial support for the SiFive FU540-C000 SoC

2018-12-14 Thread Paul Walmsley

Add initial support for the SiFive FU540-C000 SoC.  This is a 28nm SoC
based around the SiFive U54-MC core complex and a TileLink
interconnect.

This file is expected to grow considerably as more device drivers are
added to the kernel.

Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 182 +
 1 file changed, 182 insertions(+)
 create mode 100644 arch/riscv/boot/dts/sifive/fu540-c000.dtsi

diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi 
b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
new file mode 100644
index ..0ef314cf17b6
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2018 SiFive, Inc */
+/* See the file LICENSE for further information */
+
+/dts-v1/;
+
+#include 
+
+/ {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   compatible = "sifive,fu540-c000", "sifive,fu540";
+
+   aliases {
+   serial0 = 
+   serial1 = 
+   };
+
+   chosen {
+   };
+
+   cpus {
+   #address-cells = <1>;
+   #size-cells = <0>;
+   timebase-frequency = <100>;
+   cpu0: cpu@0 {
+   clock-frequency = <0>;
+   compatible = "sifive,e51", "sifive,rocket0";
+   device_type = "cpu";
+   i-cache-block-size = <64>;
+   i-cache-sets = <128>;
+   i-cache-size = <16384>;
+   reg = <0>;
+   riscv,isa = "rv64imac";
+   status = "okay";
+   cpu0_intc: interrupt-controller {
+   #interrupt-cells = <1>;
+   compatible = "riscv,cpu-intc";
+   interrupt-controller;
+   };
+   };
+   cpu1: cpu@1 {
+   clock-frequency = <0>;
+   compatible = "sifive,u54-mc", "sifive,rocket0";
+   d-cache-block-size = <64>;
+   d-cache-sets = <64>;
+   d-cache-size = <32768>;
+   d-tlb-sets = <1>;
+   d-tlb-size = <32>;
+   device_type = "cpu";
+   i-cache-block-size = <64>;
+   i-cache-sets = <64>;
+   i-cache-size = <32768>;
+   i-tlb-sets = <1>;
+   i-tlb-size = <32>;
+   mmu-type = "riscv,sv39";
+   reg = <1>;
+   riscv,isa = "rv64imafdc";
+   status = "okay";
+   tlb-split;
+   cpu1_intc: interrupt-controller {
+   #interrupt-cells = <1>;
+   compatible = "riscv,cpu-intc";
+   interrupt-controller;
+   };
+   };
+   cpu2: cpu@2 {
+   clock-frequency = <0>;
+   compatible = "sifive,u54-mc", "sifive,rocket0";
+   d-cache-block-size = <64>;
+   d-cache-sets = <64>;
+   d-cache-size = <32768>;
+   d-tlb-sets = <1>;
+   d-tlb-size = <32>;
+   device_type = "cpu";
+   i-cache-block-size = <64>;
+   i-cache-sets = <64>;
+   i-cache-size = <32768>;
+   i-tlb-sets = <1>;
+   i-tlb-size = <32>;
+   mmu-type = "riscv,sv39";
+   reg = <2>;
+   riscv,isa = "rv64imafdc";
+   status = "okay";
+   tlb-split;
+   cpu2_intc: interrupt-controller {
+   #interrupt-cells = <1>;
+   compatible = "riscv,cpu-intc";
+   interrupt-controller;
+   };
+   };
+   cpu3: cpu@3 {
+   clock-frequency = <0>;
+   compatible = "sifive,u54-mc", "sifive,rocket0";
+   d-cache-block-size = <64>;
+   d-cache-sets = <64>;
+   d-cache-size = <32768>;
+   d-tlb-sets = <1>;
+   d-tlb-size = <32>;
+   device_type = "cpu";
+   i-cache-block-size = <64>;
+   i-cache-sets = <64>;
+

[PATCH 6/7] dt-binding: riscv: sifive: add documentation for FU540-based boards

2018-12-14 Thread Paul Walmsley

Add DT binding documentation for boards based on the SiFive FU540 SoC.
The first board, the HiFive Unleashed A00 (FU540), is described here:

https://static.dev.sifive.com/HiFive-Unleashed-Getting-Started-Guide-v1p1.pdf

Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 .../devicetree/bindings/riscv/sifive/fu540.txt | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/riscv/sifive/fu540.txt 
b/Documentation/devicetree/bindings/riscv/sifive/fu540.txt
index b5cf2b9f9a75..20119cc54964 100644
--- a/Documentation/devicetree/bindings/riscv/sifive/fu540.txt
+++ b/Documentation/devicetree/bindings/riscv/sifive/fu540.txt
@@ -12,3 +12,13 @@ compatible-string values:
 
 - Freedom Unleashed 540 rev. C000, or "FU540"
   compatible = "sifive,fu540-c000", "sifive,fu540"
+
+Boards
+--
+
+Boards based on the FU540 SoC should specify the following
+compatible-string values:
+
+- HiFive-Unleashed rev. A00 board
+  compatible = "sifive,hifive-unleashed-a00-fu540",
+  "sifive,hifive-unleashed-fu540"
-- 
2.20.0

[PATCH 0/7] arch: riscv: add DT file support, starting with the SiFive HiFive-U

2018-12-14 Thread Paul Walmsley

Add support for building flattened DT files from DT source files under
arch/riscv/boot/dts.  Follow existing kernel precedent from other SoC
architectures.

Start by adding initial support for the SiFive FU540 SoC, and the
development board that uses it, the SiFive HiFive Unleashed A00.

Applies on v4.20-rc6.

- Paul

Paul Walmsley (7):
  arch: riscv: add support for building DTB files from DT source data
  dt-bindings: riscv: sifive: add documentation for the SiFive FU540
  dt-bindings: riscv: cpus: add E51 cores to the list of documented CPUs
  dt-bindings: riscv: cpus: add U54 cores to the list of documented CPUs
  riscv: dts: add initial support for the SiFive FU540-C000 SoC
  dt-binding: riscv: sifive: add documentation for FU540-based boards
  riscv: dts: add initial board data for the SiFive HiFive Unleashed

 .../devicetree/bindings/riscv/cpus.txt|   6 +-
 .../bindings/riscv/sifive/fu540.txt   |  24 +++
 MAINTAINERS   |   9 +
 arch/riscv/Kconfig|   2 +
 arch/riscv/Kconfig.platforms  |   8 +
 arch/riscv/boot/dts/Makefile  |   2 +
 arch/riscv/boot/dts/sifive/Makefile   |   4 +
 arch/riscv/boot/dts/sifive/fu540-c000.dtsi| 182 ++
 .../dts/sifive/hifive-unleashed-a00-fu540.dts |  39 
 9 files changed, 274 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/riscv/sifive/fu540.txt
 create mode 100644 arch/riscv/Kconfig.platforms
 create mode 100644 arch/riscv/boot/dts/Makefile
 create mode 100644 arch/riscv/boot/dts/sifive/Makefile
 create mode 100644 arch/riscv/boot/dts/sifive/fu540-c000.dtsi
 create mode 100644 arch/riscv/boot/dts/sifive/hifive-unleashed-a00-fu540.dts

-- 
2.20.0

[PATCH 7/7] riscv: dts: add initial board data for the SiFive HiFive Unleashed

2018-12-14 Thread Paul Walmsley

Add initial board data for the SiFive HiFive Unleashed A00.

Currently the data populated in this DT file describes the board
DRAM configuration and the external clock sources that supply the
PRCI.

Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 arch/riscv/boot/dts/Makefile  |  2 +
 arch/riscv/boot/dts/sifive/Makefile   |  4 ++
 .../dts/sifive/hifive-unleashed-a00-fu540.dts | 39 +++
 3 files changed, 45 insertions(+)
 create mode 100644 arch/riscv/boot/dts/Makefile
 create mode 100644 arch/riscv/boot/dts/sifive/Makefile
 create mode 100644 arch/riscv/boot/dts/sifive/hifive-unleashed-a00-fu540.dts

diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
new file mode 100644
index ..dcc3ada78455
--- /dev/null
+++ b/arch/riscv/boot/dts/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-y += sifive
diff --git a/arch/riscv/boot/dts/sifive/Makefile 
b/arch/riscv/boot/dts/sifive/Makefile
new file mode 100644
index ..e120ccf5649c
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_ARCH_SIFIVE) += hifive-unleashed-a00-fu540.dtb
+
+
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00-fu540.dts 
b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00-fu540.dts
new file mode 100644
index ..0c6afabe69e3
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00-fu540.dts
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2018 SiFive, Inc */
+/* See the file LICENSE for further information */
+
+/dts-v1/;
+
+#include "fu540-c000.dtsi"
+
+/ {
+   #address-cells = <2>;
+   #size-cells = <2>;
+   model = "SiFive HiFive Unleashed A00 (FU540-C000)"
+   compatible = "sifive,hifive-unleashed-a00-fu540",
+   "sifive,hifive-unleashed-fu540";
+
+   chosen {
+   };
+
+   memory@8000 {
+   device_type = "memory";
+   reg = <0x0 0x8000 0x1f 0x8000>;
+   };
+
+   soc {
+   hfclk: hfclk {
+   #clock-cells = <0>;
+   compatible = "fixed-clock";
+   clock-frequency = <>;
+   clock-output-names = "hfclk";
+   };
+   rtcclk: rtcclk {
+   #clock-cells = <0>;
+   compatible = "fixed-clock";
+   clock-frequency = <100>;
+   clock-output-names = "rtcclk";
+   };
+   };
+};
-- 
2.20.0

[PATCH 1/7] arch: riscv: add support for building DTB files from DT source data

2018-12-14 Thread Paul Walmsley

Similar to what's implemented for ARM64, add support for building
DTB files from DT source data for RISC-V boards.

This patch starts with the infrastructure needed for SiFive boards.

Cc: Palmer Dabbelt 
Cc: Albert Ou 
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 arch/riscv/Kconfig   | 2 ++
 arch/riscv/Kconfig.platforms | 8 
 2 files changed, 10 insertions(+)
 create mode 100644 arch/riscv/Kconfig.platforms

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 55da93f4e818..dc9f1afa4ad9 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -121,6 +121,8 @@ config ARCH_RV64I
 
 endchoice
 
+source "arch/riscv/Kconfig.platforms"
+
 # We must be able to map all physical memory into the kernel, but the compiler
 # is still a bit more efficient when generating code if it's setup in a manner
 # such that it can only map 2GiB of memory.
diff --git a/arch/riscv/Kconfig.platforms b/arch/riscv/Kconfig.platforms
new file mode 100644
index ..bd3d2642bcff
--- /dev/null
+++ b/arch/riscv/Kconfig.platforms
@@ -0,0 +1,8 @@
+menu "Platform selection"
+
+config ARCH_SIFIVE
+bool "SiFive platforms"
+help
+  This enables direct support for SiFive SoC platform hardware.
+
+endmenu
-- 
2.20.0

[PATCH 4/7] dt-bindings: riscv: cpus: add U54 cores to the list of documented CPUs

2018-12-14 Thread Paul Walmsley

Add compatible strings for the SiFive U54 family of CPU cores to the
RISC-V CPU compatible string documentation.  The U54 CPU cores are
described in:

https://static.dev.sifive.com/FU540-C000-v1.0.pdf


Cc: Rob Herring 
Cc: Mark Rutland 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: devicet...@vger.kernel.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Paul Walmsley 
Signed-off-by: Paul Walmsley 
---
 Documentation/devicetree/bindings/riscv/cpus.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/riscv/cpus.txt 
b/Documentation/devicetree/bindings/riscv/cpus.txt
index fb9d4f86f41f..d8d99b6b5386 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.txt
+++ b/Documentation/devicetree/bindings/riscv/cpus.txt
@@ -70,7 +70,8 @@ described below.
 Value type: 
 Definition: must contain "riscv", may contain one or
more of "sifive,rocket0", "sifive,e51",
-   "sifive,e5"
+   "sifive,e5", "sifive,u54-mc", "sifive,u54",
+   "sifive,u5"
 - mmu-type:
 Usage: optional
 Value type: 
-- 
2.20.0

Re: Can we drop upstream Linux x32 support?

2018-12-14 Thread Thorsten Glaser

Andy Lutomirski dixit:

>x32 is not this at all.  The kernel ABI part of x32 isn't ILP32.  It's
>IP32, 32-bit size_t, and *64-bit* long.  The core kernel doesn't

Yeah, I was looking at this from userspace PoV, as I said I’m not
a Linux kernel programmer.

In BSD we have register_t which is probably the equivalent to your
__kernel_long_t? Maybe removing the “long” from the name helps.

But yes, x32 is just a (second to i386) ILP32 userspace API in an
amd64 kernel. This does imply mapping on the userspace (x32) to
kernel (amd64) boundary and back. I would have thought full struct
member mapping, as dalias described, to be the most robust.

>something similar to work using the normal x86_64 syscalls.  And I'm

But those would require the longer structs etc. and therefore
lose all the benefits of x32…

bye,
//mirabilos
-- 
„Cool, /usr/share/doc/mksh/examples/uhr.gz ist ja ein Grund,
mksh auf jedem System zu installieren.“
-- XTaran auf der OpenRheinRuhr, ganz begeistert
(EN: “[…]uhr.gz is a reason to install mksh on every system.”)

[ANNOUNCE] Git v2.20.1

2018-12-14 Thread Junio C Hamano

The latest maintenance release Git v2.20.1 is now available at
the usual places.

The tarballs are found at:

https://www.kernel.org/pub/software/scm/git/

The following public repositories all have a copy of the 'v2.20.1'
tag and the 'maint' branch that the tag points at:

  url = https://kernel.googlesource.com/pub/scm/git/git
  url = git://repo.or.cz/alt-git.git
  url = https://github.com/gitster/git



Git v2.20.1 Release Notes
=

This release is primarily to fix brown-paper-bag breakages in the
2.20.0 release.

Fixes since v2.20
-

 * A few newly added tests were not portable and caused minority
   platforms to report false breakages, which have been fixed.

 * Portability fix for a recent update to parse-options API.

 * "git help -a" did not work well when an overly long alias is
   defined, which has been corrected.

 * A recent update accidentally squelched an error message when the
   run_command API failed to run a missing command, which has been
   corrected.



Changes since v2.20.0 are as follows:

Derrick Stolee (1):
  .gitattributes: ensure t/oid-info/* has eol=lf

Johannes Schindelin (4):
  help.h: fix coding style
  help -a: handle aliases with long names gracefully
  t4256: mark support files as LF-only
  t9902: 'send-email' test case requires PERL

Junio C Hamano (2):
  run-command: report exec failure
  Git 2.20.1

Nguyễn Thái Ngọc Duy (1):
  parse-options: fix SunCC compiler warning

Re: [PATCH] fix page_count in ->iomap_migrate_page()

2018-12-14 Thread Gao Xiang



On 2018/12/14 13:56, zhangjun wrote:
> IOMAP uses PG_private a little different with buffer_head based
> filesystem.
> It uses it as marker and when set, the page counter is not incremented,
> migrate_page_move_mapping() assumes that PG_private indicates a counter
> of +1.
> so, we have to pass a extra count of -1 to migrate_page_move_mapping()
> if the flag is set.
> 
> Signed-off-by: zhangjun 
> ---

I found that it fixed in
https://patchwork.kernel.org/patch/10684835/
and has been merged in
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=61c6de667263184125d5ca75e894fcad632b0dd3

It seems it has been corrected by Piotr.

Thanks,
Gao Xiang

[PATCH] fork,memcg: fix crash in free_thread_stack on memcg charge fail

2018-12-14 Thread Rik van Riel

Changeset 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
will result in fork failing if allocating a kernel stack for a task
in dup_task_struct exceeds the kernel memory allowance for that cgroup.

Unfortunately, it also results in a crash.

This is due to the code jumping to free_stack and calling free_thread_stack
when the memcg kernel stack charge fails, but without tsk->stack pointing
at the freshly allocated stack.

This in turn results in the vfree_atomic in free_thread_stack oopsing
with a backtrace like this:

#5 [c900244efc88] die at 8101f0ab
 #6 [c900244efcb8] do_general_protection at 8101cb86
 #7 [c900244efce0] general_protection at 818ff082
[exception RIP: llist_add_batch+7]
RIP: 8150d487  RSP: c900244efd98  RFLAGS: 00010282
RAX:   RBX: 88085ef55980  RCX: 
RDX: 88085ef55980  RSI: 343834343531203a  RDI: 343834343531203a
RBP: c900244efd98   R8: 0001   R9: 8808578c3600
R10:   R11: 0001  R12: 88029f6c21c0
R13: 0286  R14: 880147759b00  R15: 
ORIG_RAX:   CS: 0010  SS: 0018
 #8 [c900244efda0] vfree_atomic at 811df2c7
 #9 [c900244efdb8] copy_process at 81086e37
#10 [c900244efe98] _do_fork at 810884e0
#11 [c900244eff10] sys_vfork at 810887ff
#12 [c900244eff20] do_syscall_64 at 81002a43
RIP: 0049b948  RSP: 7ffcdb307830  RFLAGS: 0246
RAX: ffda  RBX: 00896030  RCX: 0049b948
RDX:   RSI: 7ffcdb307790  RDI: 005d7421
RBP: 0067370f   R8: 7ffcdb3077b0   R9: 0001ed00
R10: 0008  R11: 0246  R12: 0040
R13: 000f  R14:   R15: 0088d018
ORIG_RAX: 003a  CS: 0033  SS: 002b

The simplest fix is to assign tsk->stack right where it is allocated.

Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
Cc: Andrew Morton 
Cc: Shakeel Butt 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Tejun Heo 
Cc: Roman Gushchin 
Signed-off-by: Rik van Riel 
---
 kernel/fork.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 07cddff89c7b..e2a5156bc9c3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -240,8 +240,10 @@ static unsigned long *alloc_thread_stack_node(struct 
task_struct *tsk, int node)
 * free_thread_stack() can be called in interrupt context,
 * so cache the vm_struct.
 */
-   if (stack)
+   if (stack) {
tsk->stack_vm_area = find_vm_area(stack);
+   tsk->stack = stack;
+   }
return stack;
 #else
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -288,7 +290,10 @@ static struct kmem_cache *thread_stack_cache;
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
  int node)
 {
-   return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+   unsigned long *stack;
+   stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+   tsk->stack = stack;
+   return stack;
 }
 
 static void free_thread_stack(struct task_struct *tsk)


-- 
All rights reversed.

Re: Tracing to console

2018-12-14 Thread Steven Rostedt

On Fri, 15 Jun 2018 21:01:53 +
Bart Van Assche  wrote:

> Hello Steven,

Sorry about the 6 month old reply. I just noticed this email buried in
my INBOX (I triage my INBOX to find emails like this that got missed).
And yes I missed your reply as well :-/

> 
> If I run the following commands as root:
> 
> cd /sys/kernel/debug/tracing
> grep pm_ available_events > set_event
> echo function >current_tracer
> grep scsi_ available_filter_functions | while read a b; do echo $a; done 
> >set_ftrace_filter
> echo 1 >events/printk/enable
> echo 1 >tracing_on
> echo 1 >/proc/sys/kernel/tracepoint_printk

Just for some closure. The tracepoint_printk is a special command that
only gets enabled if you pass "tp_printk" on the kernel command line.

> echo - - - >/sys/class/scsi_host/host0/scan
> 
> then the following appears on the console:
> 
> console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 
> 300)
> console: console: console: console: ata1: SATA link up 1.5 Gbps (SStatus 113 
> SControl 300)
> console: console: console: console: console: ata1: SATA link up 1.5 Gbps 
> (SStatus 113 SControl 300)
> console: console: console: console: console: console: ata1: SATA link up 1.5 
> Gbps (SStatus 113 SControl 300)
> console: console: console: console: console: console: console: ata1: SATA 
> link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: console: console: console: console: console: console: ata1: 
> SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: console: console: console: console: console: console: 
> console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: console: console: console: console: console: console: 
> console: console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
> console: console: console: console: console: console: console: console: 
> console: console: console: ata1: SATA link up 1.5 Gbps (SStatus 113 SControl 
> 300)
> [ ... ]
> 
> I assume that the repeated "console: " prefix is a bug and also that the same
> line repeats over and over again is also a bug? I ran into this with commit
> 017b3f8a10ca ("scsi: snic: fix a couple of spelling mistakes: "COMPLETE"").
>

The above behavior is actually expected and not a bug. It's a "Doctor
it hurts me when I do this" issue. Don't do that ;-)

The tracepoint_printk is for cases where the system crashes before you
ever get to a command prompt, and you want to see trace events that you
can enable from the kernel command line. What you did above was like
running "ls -lR / > /dev/mem" in old kernels. It's doomed to fail.

You just told the kernel that you want all trace events to go over
printk, and you also told the kernel, you want all printk to become a
trace event. Thus you created a recursive loop. You're lucky it didn't
crash.

I hope this explains things for you.

Again, sorry that it took 6 months to reply. I believe June was a busy
time for me (vacation and work travel).

-- Steve

Re: [PATCH][next] nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"

2018-12-14 Thread Sagi Grimberg


Reviewed-by: Sagi Grimberg

[Patch net 07/10] net: hns: Avoid net reset caused by pause frames storm

2018-12-14 Thread Peng Li

From: Yonglong Liu 

There will be a large number of MAC pause frames on the net,
which caused tx timeout of net device. And then the net device
was reset to try to recover it. So that is not useful, and will
cause some other problems.

So need doubled ndev->watchdog_timeo if device watchdog occurred
until watchdog_timeo up to 40s and then try resetting to recover
it.

When collecting dfx information such as hardware registers when tx timeout.
Some registers for count were cleared when read. So need move this task
before update net state which also read the count registers.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 03d959c..8a53c82 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1509,11 +1509,19 @@ static int hns_nic_net_stop(struct net_device *ndev)
 }
 
 static void hns_tx_timeout_reset(struct hns_nic_priv *priv);
+#define HNS_TX_TIMEO_LIMIT (40 * HZ)
 static void hns_nic_net_timeout(struct net_device *ndev)
 {
struct hns_nic_priv *priv = netdev_priv(ndev);
 
-   hns_tx_timeout_reset(priv);
+   if (ndev->watchdog_timeo < HNS_TX_TIMEO_LIMIT) {
+   ndev->watchdog_timeo *= 2;
+   netdev_info(ndev, "watchdog_timo changed to %d.\n",
+   ndev->watchdog_timeo);
+   } else {
+   ndev->watchdog_timeo = HNS_NIC_TX_TIMEOUT;
+   hns_tx_timeout_reset(priv);
+   }
 }
 
 static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
@@ -2076,11 +2084,11 @@ static void hns_nic_service_task(struct work_struct 
*work)
= container_of(work, struct hns_nic_priv, service_task);
struct hnae_handle *h = priv->ae_handle;
 
+   hns_nic_reset_subtask(priv);
hns_nic_update_link_status(priv->netdev);
h->dev->ops->update_led_status(h);
hns_nic_update_stats(priv->netdev);
 
-   hns_nic_reset_subtask(priv);
hns_nic_service_event_complete(priv);
 }
 
-- 
1.9.1

[Patch net 06/10] net: hns: Free irq when exit from abnormal branch

2018-12-14 Thread Peng Li

From: Yonglong Liu 

1.In "hns_nic_init_irq", if request irq fail at index i,
  the function return directly without releasing irq resources
  that already requested.

2.In "hns_nic_net_up" after "hns_nic_init_irq",
  if exceptional branch occurs, irqs that already requested
  are not release.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index c9454c4..03d959c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1284,6 +1284,22 @@ static int hns_nic_init_affinity_mask(int q_num, int 
ring_idx,
return cpu;
 }
 
+static void hns_nic_free_irq(int q_num, struct hns_nic_priv *priv)
+{
+   int i;
+
+   for (i = 0; i < q_num * 2; i++) {
+   if (priv->ring_data[i].ring->irq_init_flag == RCB_IRQ_INITED) {
+   irq_set_affinity_hint(priv->ring_data[i].ring->irq,
+ NULL);
+   free_irq(priv->ring_data[i].ring->irq,
+>ring_data[i]);
+   priv->ring_data[i].ring->irq_init_flag =
+   RCB_IRQ_NOT_INITED;
+   }
+   }
+}
+
 static int hns_nic_init_irq(struct hns_nic_priv *priv)
 {
struct hnae_handle *h = priv->ae_handle;
@@ -1309,7 +1325,7 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
if (ret) {
netdev_err(priv->netdev, "request irq(%d) fail\n",
   rd->ring->irq);
-   return ret;
+   goto out_free_irq;
}
disable_irq(rd->ring->irq);
 
@@ -1324,6 +1340,10 @@ static int hns_nic_init_irq(struct hns_nic_priv *priv)
}
 
return 0;
+
+out_free_irq:
+   hns_nic_free_irq(h->q_num, priv);
+   return ret;
 }
 
 static int hns_nic_net_up(struct net_device *ndev)
@@ -1371,6 +1391,7 @@ static int hns_nic_net_up(struct net_device *ndev)
for (j = i - 1; j >= 0; j--)
hns_nic_ring_close(ndev, j);
 
+   hns_nic_free_irq(h->q_num, priv);
set_bit(NIC_STATE_DOWN, >state);
 
return ret;
-- 
1.9.1

[Patch net 02/10] net: hns: All ports can not work when insmod hns ko after rmmod.

2018-12-14 Thread Peng Li

From: Yonglong Liu 

There are two test cases:
1. Remove the 4 modules:hns_enet_drv/hns_dsaf/hnae/hns_mdio,
   and install them again, must use "ifconfig down/ifconfig up"
   command pair to bring port to work.

   This patch calls phy_stop function when init phy to fix this bug.

2. Remove the 2 modules:hns_enet_drv/hns_dsaf, and install them again,
   all ports can not use anymore, because of the phy devices register
   failed(phy devices already exists).

   Phy devices are registered when hns_dsaf installed, this patch
   removes them when hns_dsaf removed.

The two cases are sometimes related, fixing the second case also requires
fixing the first case, so fix them together.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 15 +++
 drivers/net/ethernet/hisilicon/hns/hns_enet.c |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 3613e40..a97228c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -778,6 +778,17 @@ static int hns_mac_register_phy(struct hns_mac_cb *mac_cb)
return rc;
 }
 
+static void hns_mac_remove_phydev(struct hns_mac_cb *mac_cb)
+{
+   if (!to_acpi_device_node(mac_cb->fw_port) || !mac_cb->phy_dev)
+   return;
+
+   phy_device_remove(mac_cb->phy_dev);
+   phy_device_free(mac_cb->phy_dev);
+
+   mac_cb->phy_dev = NULL;
+}
+
 #define MAC_MEDIA_TYPE_MAX_LEN 16
 
 static const struct {
@@ -1117,7 +1128,11 @@ void hns_mac_uninit(struct dsaf_device *dsaf_dev)
int max_port_num = hns_mac_get_max_port_num(dsaf_dev);
 
for (i = 0; i < max_port_num; i++) {
+   if (!dsaf_dev->mac_cb[i])
+   continue;
+
dsaf_dev->misc_op->cpld_reset_led(dsaf_dev->mac_cb[i]);
+   hns_mac_remove_phydev(dsaf_dev->mac_cb[i]);
dsaf_dev->mac_cb[i] = NULL;
}
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 28e9078..c205a0e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1186,6 +1186,9 @@ int hns_nic_init_phy(struct net_device *ndev, struct 
hnae_handle *h)
if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
phy_dev->autoneg = false;
 
+   if (h->phy_if == PHY_INTERFACE_MODE_SGMII)
+   phy_stop(phy_dev);
+
return 0;
 }
 
-- 
1.9.1

[Patch net 05/10] net: hns: Clean rx fbd when ae stopped.

2018-12-14 Thread Peng Li

From: Yonglong Liu 

If there are packets in hardware when changing the speed or duplex,
it may cause hardware hang up.

This patch adds the code to wait rx fbd clean up when ae stopped.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c 
b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b52029e..ad1779f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -379,6 +379,9 @@ static void hns_ae_stop(struct hnae_handle *handle)
 
hns_ae_ring_enable_all(handle, 0);
 
+   /* clean rx fbd. */
+   hns_rcb_wait_fbd_clean(handle->qs, handle->q_num, RCB_INT_FLAG_RX);
+
(void)hns_mac_vm_config_bc_en(mac_cb, 0, false);
 }
 
-- 
1.9.1

[Patch net 08/10] net: hns: Fix ntuple-filters status error.

2018-12-14 Thread Peng Li

From: Yonglong Liu 

The ntuple-filters features is forced on by chip.
But it shows "ntuple-filters: off [fixed]" when use ethtool.
This patch make it correct with "ntuple-filters: on [fixed]".

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 8a53c82..6242249 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2374,7 +2374,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
ndev->min_mtu = MAC_MIN_MTU;
switch (priv->enet_ver) {
case AE_VERSION_2:
-   ndev->features |= NETIF_F_TSO | NETIF_F_TSO6;
+   ndev->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_NTUPLE;
ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6;
-- 
1.9.1

[Patch net 09/10] net: hns: Add mac pcs config when enable|disable mac

2018-12-14 Thread Peng Li

From: Yonglong Liu 

In some case, when mac enable|disable and adjust link, may cause hard to
link(or abnormal) between mac and phy. This patch adds the code for rx PCS
to avoid this bug.

Disable the rx PCS when driver disable the gmac, and enable the rx PCS
when driver enable the mac.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 14 ++
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index aaf72c0..1790cda 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -67,11 +67,14 @@ static void hns_gmac_enable(void *mac_drv, enum 
mac_commom_mode mode)
struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
/*enable GE rX/tX */
-   if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+   if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX)
dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 1);
 
-   if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+   if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) {
+   /* enable rx pcs */
+   dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 0);
dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 1);
+   }
 }
 
 static void hns_gmac_disable(void *mac_drv, enum mac_commom_mode mode)
@@ -79,11 +82,14 @@ static void hns_gmac_disable(void *mac_drv, enum 
mac_commom_mode mode)
struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
/*disable GE rX/tX */
-   if ((mode == MAC_COMM_MODE_TX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+   if (mode == MAC_COMM_MODE_TX || mode == MAC_COMM_MODE_RX_AND_TX)
dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_TX_EN_B, 0);
 
-   if ((mode == MAC_COMM_MODE_RX) || (mode == MAC_COMM_MODE_RX_AND_TX))
+   if (mode == MAC_COMM_MODE_RX || mode == MAC_COMM_MODE_RX_AND_TX) {
+   /* disable rx pcs */
+   dsaf_set_dev_bit(drv, GMAC_PCS_RX_EN_REG, 0, 1);
dsaf_set_dev_bit(drv, GMAC_PORT_EN_REG, GMAC_PORT_RX_EN_B, 0);
+   }
 }
 
 /* hns_gmac_get_en - get port enable
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index f171d11..b9733b0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -534,6 +534,7 @@
 #define GMAC_LD_LINK_COUNTER_REG   0x01D0UL
 #define GMAC_LOOP_REG  0x01DCUL
 #define GMAC_RECV_CONTROL_REG  0x01E0UL
+#define GMAC_PCS_RX_EN_REG 0x01E4UL
 #define GMAC_VLAN_CODE_REG 0x01E8UL
 #define GMAC_RX_OVERRUN_CNT_REG0x01ECUL
 #define GMAC_RX_LENGTHFIELD_ERR_CNT_REG0x01F4UL
-- 
1.9.1

[Patch net 00/10] net: hns: Code improvements & fixes for HNS driver

2018-12-14 Thread Peng Li

This patchset introduces some code improvements and fixes
for the identified problems in the HNS driver.

Every patch is independent.

Yonglong Liu (10):
  net: hns: Incorrect offset address used for some registers.
  net: hns: All ports can not work when insmod hns ko after rmmod.
  net: hns: some registers use wrong address according to the datasheet.
  net: hns: fixed bug that netdev was opened twice
  net: hns: clean rx fbd when ae stopped.
  net: hns: free irq when exit from abnormal branch
  net: hns: Avoid net reset caused by pause frames storm
  net: hns: Fix ntuple-filters status error.
  net: hns: Add mac pcs config when enable|disable mac
  net: hns: fix ping failed when use net bridge and send multicast

 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |   3 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c |  14 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  |  15 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 503 ++---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  13 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c  |  43 +-
 6 files changed, 413 insertions(+), 178 deletions(-)

-- 
1.9.1

[Patch net 04/10] net: hns: Fixed bug that netdev was opened twice

2018-12-14 Thread Peng Li

From: Yonglong Liu 

After resetting dsaf to try to repair chip error such as ecc error,
the net device will be open if net interface is up. But at this time
if there is the users set the net device up with the command ifconfig,
the net device will be opened twice consecutively.

Function napi_enable was called when open device. And Kernel panic will
be occurred if it was called twice consecutively. Such as follow:
static inline void napi_enable(struct napi_struct *n)
{
 BUG_ON(!test_bit(NAPI_STATE_SCHED, >state));
 smp_mb__before_clear_bit();
 clear_bit(NAPI_STATE_SCHED, >state);
}

[37255.571996] Kernel panic - not syncing: BUG!
[37255.595234] Call trace:
[37255.597694] [] dump_backtrace+0x0/0x1a0
[37255.603114] [] show_stack+0x20/0x28
[37255.608187] [] dump_stack+0x98/0xb8
[37255.613258] [] panic+0x10c/0x26c
[37255.618070] [] hns_nic_net_up+0x30c/0x4e0
[37255.623664] [] hns_nic_net_open+0x94/0x12c
[37255.629346] [] __dev_open+0xf4/0x168
[37255.634504] [] __dev_change_flags+0x98/0x15c
[37255.640359] [] dev_change_flags+0x2c/0x68
[37255.769580] [] devinet_ioctl+0x650/0x704
[37255.775086] [] inet_ioctl+0x98/0xb4
[37255.780159] [] sock_do_ioctl+0x44/0x84
[37255.785490] [] sock_ioctl+0x248/0x30c
[37255.790737] [] do_vfs_ioctl+0x480/0x618
[37255.796156] [] SyS_ioctl+0x90/0xa4
[37255.801139] SMP: stopping secondary CPUs
[37255.805079] kbox: catch panic event.
[37255.809586] collected_len = 128928, LOG_BUF_LEN_LOCAL = 131072
[37255.816103] flush cache 0x80003f00  size 0x80
[37255.822192] flush cache 0x80003f00  size 0x80
[37255.828289] flush cache 0x80003f00  size 0x80
[37255.834378] kbox: no notify die func register. no need to notify
[37255.840413] ---[ end Kernel panic - not syncing: BUG!

This patchset fix this bug according to the flag NIC_STATE_DOWN.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c 
b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index c205a0e..c9454c4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1333,6 +1333,9 @@ static int hns_nic_net_up(struct net_device *ndev)
int i, j;
int ret;
 
+   if (!test_bit(NIC_STATE_DOWN, >state))
+   return 0;
+
ret = hns_nic_init_irq(priv);
if (ret != 0) {
netdev_err(ndev, "hns init irq failed! ret=%d\n", ret);
-- 
1.9.1

[Patch net 01/10] net: hns: Incorrect offset address used for some registers.

2018-12-14 Thread Peng Li

From: Yonglong Liu 

According to the hip06 Datasheet:
1. The offset of INGRESS_SW_VLAN_TAG_DISC should be 0x1A00+4*all_chn_num
2. The offset of INGRESS_IN_DATA_STP_DISC should be 0x1A50+4*all_chn_num

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index e557a4e..dc9215a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2166,9 +2166,9 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, 
u32 node_num)
DSAF_INODE_LOCAL_ADDR_FALSE_NUM_0_REG + 0x80 * (u64)node_num);
 
hw_stats->vlan_drop += dsaf_read_dev(dsaf_dev,
-   DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 0x80 * (u64)node_num);
+   DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + 4 * (u64)node_num);
hw_stats->stp_drop += dsaf_read_dev(dsaf_dev,
-   DSAF_INODE_IN_DATA_STP_DISC_0_REG + 0x80 * (u64)node_num);
+   DSAF_INODE_IN_DATA_STP_DISC_0_REG + 4 * (u64)node_num);
 
/* pfc pause frame statistics stored in dsaf inode*/
if ((node_num < DSAF_SERVICE_NW_NUM) && !is_ver1) {
-- 
1.9.1

[Patch net 10/10] net: hns: Fix ping failed when use net bridge and send multicast

2018-12-14 Thread Peng Li

From: Yonglong Liu 

Create a net bridge, add eth and vnet to the bridge. The vnet is used
by a virtual machine. When ping the virtual machine from the outside
host and the virtual machine send multicast at the same time, the ping
package will lost.

The multicast package send to the eth, eth will send it to the bridge too,
and the bridge learn the mac of eth. When outside host ping the virtual
mechine, it will match the promisc entry of the eth which is not expected,
and the bridge send it to eth not to vnet, cause ping lost.

So this patch change promisc tcam entry position to the END of 512 tcam
entries, which indicate lower priority. And separate one promisc entry to
two: mc & uc, to avoid package match the wrong tcam entry.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 257 +
 1 file changed, 216 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 97d62de..3b9e74b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -935,6 +935,62 @@ static void hns_dsaf_tcam_mc_cfg(
 }
 
 /**
+ * hns_dsaf_tcam_uc_cfg_vague - INT
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address,
+ * @ptbl_tcam_data,
+ */
+static void hns_dsaf_tcam_uc_cfg_vague(struct dsaf_device *dsaf_dev,
+  u32 address,
+  struct dsaf_tbl_tcam_data *tcam_data,
+  struct dsaf_tbl_tcam_data *tcam_mask,
+  struct dsaf_tbl_tcam_ucast_cfg *tcam_uc)
+{
+   spin_lock_bh(_dev->tcam_lock);
+   hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
+   hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data);
+   hns_dsaf_tbl_tcam_ucast_cfg(dsaf_dev, tcam_uc);
+   hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+   hns_dsaf_tbl_tcam_data_ucast_pul(dsaf_dev);
+
+   /*Restore Match Data*/
+   tcam_mask->tbl_tcam_data_high = 0x;
+   tcam_mask->tbl_tcam_data_low = 0x;
+   hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+
+   spin_unlock_bh(_dev->tcam_lock);
+}
+
+/**
+ * hns_dsaf_tcam_mc_cfg_vague - INT
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address,
+ * @ptbl_tcam_data,
+ * @ptbl_tcam_mask
+ * @ptbl_tcam_mcast
+ */
+static void hns_dsaf_tcam_mc_cfg_vague(struct dsaf_device *dsaf_dev,
+  u32 address,
+  struct dsaf_tbl_tcam_data *tcam_data,
+  struct dsaf_tbl_tcam_data *tcam_mask,
+  struct dsaf_tbl_tcam_mcast_cfg *tcam_mc)
+{
+   spin_lock_bh(_dev->tcam_lock);
+   hns_dsaf_tbl_tcam_addr_cfg(dsaf_dev, address);
+   hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, tcam_data);
+   hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, tcam_mc);
+   hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+   hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev);
+
+   /*Restore Match Data*/
+   tcam_mask->tbl_tcam_data_high = 0x;
+   tcam_mask->tbl_tcam_data_low = 0x;
+   hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, tcam_mask);
+
+   spin_unlock_bh(_dev->tcam_lock);
+}
+
+/**
  * hns_dsaf_tcam_mc_invld - INT
  * @dsaf_id: dsa fabric id
  * @address
@@ -1493,6 +1549,27 @@ static u16 hns_dsaf_find_empty_mac_entry(struct 
dsaf_device *dsaf_dev)
 }
 
 /**
+ * hns_dsaf_find_empty_mac_entry_reverse
+ * search dsa fabric soft empty-entry from the end
+ * @dsaf_dev: dsa fabric device struct pointer
+ */
+static u16 hns_dsaf_find_empty_mac_entry_reverse(struct dsaf_device *dsaf_dev)
+{
+   struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+   struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+   int i;
+
+   soft_mac_entry = priv->soft_mac_tbl + (DSAF_TCAM_SUM - 1);
+   for (i = (DSAF_TCAM_SUM - 1); i > 0; i--) {
+   /* search all entry from end to start.*/
+   if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX)
+   return i;
+   soft_mac_entry--;
+   }
+   return DSAF_INVALID_ENTRY_IDX;
+}
+
+/**
  * hns_dsaf_set_mac_key - set mac key
  * @dsaf_dev: dsa fabric device struct pointer
  * @mac_key: tcam key pointer
@@ -2673,58 +2750,156 @@ int hns_dsaf_get_regs_count(void)
return DSAF_DUMP_REGS_NUM;
 }
 
-/* Reserve the last TCAM entry for promisc support */
-#define dsaf_promisc_tcam_entry(port) \
-   (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port))
-void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
-  u32 port, bool enable)
+static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
 {
+   struct dsaf_tbl_tcam_ucast_cfg tbl_tcam_ucast = {0, 1, 0, 0, 0x80};
+   struct dsaf_tbl_tcam_data

[Patch net 03/10] net: hns: Some registers use wrong address according to the datasheet.

2018-12-14 Thread Peng Li

From: Yonglong Liu 

According to the hip06 datasheet:
1.Six registers use wrong address:
  RCB_COM_SF_CFG_INTMASK_RING
  RCB_COM_SF_CFG_RING_STS
  RCB_COM_SF_CFG_RING
  RCB_COM_SF_CFG_INTMASK_BD
  RCB_COM_SF_CFG_BD_RINT_STS
  DSAF_INODE_VC1_IN_PKT_NUM_0_REG
2.The offset of DSAF_INODE_VC1_IN_PKT_NUM_0_REG should be
  0x103C + 0x80 * all_chn_num
3.The offset to show the value of DSAF_INODE_IN_DATA_STP_DISC_0_REG
  is wrong, so the value of DSAF_INODE_SW_VLAN_TAG_DISC_0_REG will be
  overwrite

These registers are only used in "ethtool -d", so that did not cause ndev
to misfunction.

Signed-off-by: Yonglong Liu 
Signed-off-by: Peng Li 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 242 ++---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  12 +-
 2 files changed, 127 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index dc9215a..97d62de 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2285,237 +2285,237 @@ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 
port, void *data)
DSAF_INODE_BD_ORDER_STATUS_0_REG + j * 4);
p[223 + i] = dsaf_read_dev(ddev,
DSAF_INODE_SW_VLAN_TAG_DISC_0_REG + j * 4);
-   p[224 + i] = dsaf_read_dev(ddev,
+   p[226 + i] = dsaf_read_dev(ddev,
DSAF_INODE_IN_DATA_STP_DISC_0_REG + j * 4);
}
 
-   p[227] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4);
+   p[229] = dsaf_read_dev(ddev, DSAF_INODE_GE_FC_EN_0_REG + port * 4);
 
for (i = 0; i < DSAF_INODE_NUM / DSAF_COMM_CHN; i++) {
j = i * DSAF_COMM_CHN + port;
-   p[228 + i] = dsaf_read_dev(ddev,
+   p[230 + i] = dsaf_read_dev(ddev,
DSAF_INODE_VC0_IN_PKT_NUM_0_REG + j * 4);
}
 
-   p[231] = dsaf_read_dev(ddev,
-   DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 4);
+   p[233] = dsaf_read_dev(ddev,
+   DSAF_INODE_VC1_IN_PKT_NUM_0_REG + port * 0x80);
 
/* dsaf inode registers */
for (i = 0; i < HNS_DSAF_SBM_NUM(ddev) / DSAF_COMM_CHN; i++) {
j = i * DSAF_COMM_CHN + port;
-   p[232 + i] = dsaf_read_dev(ddev,
+   p[234 + i] = dsaf_read_dev(ddev,
DSAF_SBM_CFG_REG_0_REG + j * 0x80);
-   p[235 + i] = dsaf_read_dev(ddev,
+   p[237 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CFG_0_XGE_REG_0_REG + j * 0x80);
-   p[238 + i] = dsaf_read_dev(ddev,
+   p[240 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CFG_1_REG_0_REG + j * 0x80);
-   p[241 + i] = dsaf_read_dev(ddev,
+   p[243 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CFG_2_XGE_REG_0_REG + j * 0x80);
-   p[244 + i] = dsaf_read_dev(ddev,
+   p[246 + i] = dsaf_read_dev(ddev,
DSAF_SBM_FREE_CNT_0_0_REG + j * 0x80);
-   p[245 + i] = dsaf_read_dev(ddev,
+   p[249 + i] = dsaf_read_dev(ddev,
DSAF_SBM_FREE_CNT_1_0_REG + j * 0x80);
-   p[248 + i] = dsaf_read_dev(ddev,
+   p[252 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CNT_0_0_REG + j * 0x80);
-   p[251 + i] = dsaf_read_dev(ddev,
+   p[255 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CNT_1_0_REG + j * 0x80);
-   p[254 + i] = dsaf_read_dev(ddev,
+   p[258 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CNT_2_0_REG + j * 0x80);
-   p[257 + i] = dsaf_read_dev(ddev,
+   p[261 + i] = dsaf_read_dev(ddev,
DSAF_SBM_BP_CNT_3_0_REG + j * 0x80);
-   p[260 + i] = dsaf_read_dev(ddev,
+   p[264 + i] = dsaf_read_dev(ddev,
DSAF_SBM_INER_ST_0_REG + j * 0x80);
-   p[263 + i] = dsaf_read_dev(ddev,
+   p[267 + i] = dsaf_read_dev(ddev,
DSAF_SBM_MIB_REQ_FAILED_TC_0_REG + j * 0x80);
-   p[266 + i] = dsaf_read_dev(ddev,
+   p[270 + i] = dsaf_read_dev(ddev,
DSAF_SBM_LNK_INPORT_CNT_0_REG + j * 0x80);
-   p[269 + i] = dsaf_read_dev(ddev,
+   p[273 + i] = dsaf_read_dev(ddev,
DSAF_SBM_LNK_DROP_CNT_0_REG + j * 0x80);
-   p[272 + i] = dsaf_read_dev(ddev,
+   p[276 + i] = dsaf_read_dev(ddev,
DSAF_SBM_INF_OUTPORT_CNT_0_REG + j * 0x80);
-   p[275 + i] = dsaf_read_dev(ddev,
+

[PATCH 2/2] ARM: dts: Add support for 96Boards Chameleon96 board

2018-12-14 Thread Manivannan Sadhasivam

Add devicetree support for 96Boards Chameleon96 board from Novtech, Inc.
based on Altera CycloneV SoC FPGA. This board is one of the Consumer
Edition boards of the 96Boards family and has the following key features:

* SoC - Intel Cyclone V SoC FPGA
* GPU - Graphics based on Intel Video Suite for FPGA
* RAM - 512MB DDR3L
* USB - 2x USB2.0 Host, 1x USB2.0 OTG
* Wireless - Wifi, BT

More information about this board can be found in 96Boards product
page: https://www.96boards.org/product/chameleon96/

Signed-off-by: Manivannan Sadhasivam 
---
 arch/arm/boot/dts/Makefile|   1 +
 .../boot/dts/socfpga_cyclone5_chameleon96.dts | 130 ++
 2 files changed, 131 insertions(+)
 create mode 100644 arch/arm/boot/dts/socfpga_cyclone5_chameleon96.dts

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index b0e966d625b9..d6cf081ec325 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -897,6 +897,7 @@ dtb-$(CONFIG_ARCH_SOCFPGA) += \
socfpga_arria10_socdk_nand.dtb \
socfpga_arria10_socdk_qspi.dtb \
socfpga_arria10_socdk_sdmmc.dtb \
+   socfpga_cyclone5_chameleon96.dtb \
socfpga_cyclone5_mcvevk.dtb \
socfpga_cyclone5_socdk.dtb \
socfpga_cyclone5_de0_nano_soc.dtb \
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_chameleon96.dts 
b/arch/arm/boot/dts/socfpga_cyclone5_chameleon96.dts
new file mode 100644
index ..f6561766d83f
--- /dev/null
+++ b/arch/arm/boot/dts/socfpga_cyclone5_chameleon96.dts
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Device Tree file for the Chameleon96
+ *
+ * Copyright (c) 2018 Manivannan Sadhasivam 
+ */
+
+#include 
+
+#include "socfpga_cyclone5.dtsi"
+
+/ {
+   model = "Novetech Chameleon96";
+   compatible = "novtech,chameleon96", "altr,socfpga-cyclone5", 
"altr,socfpga";
+
+   chosen {
+   bootargs = "earlyprintk";
+   stdout-path = "serial0:115200n8";
+   };
+
+   memory@0 {
+   name = "memory";
+   device_type = "memory";
+   reg = <0x0 0x2000>; /* 512MB */
+   };
+
+   regulator_3_3v: 3-3-v-regulator {
+   compatible = "regulator-fixed";
+   regulator-name = "3.3V";
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+   };
+
+   leds {
+   compatible = "gpio-leds";
+
+   user_led1 {
+   label = "green:user1";
+   gpios = < 14 GPIO_ACTIVE_LOW>;
+   linux,default-trigger = "heartbeat";
+   };
+
+   user_led2 {
+   label = "green:user2";
+   gpios = < 22 GPIO_ACTIVE_LOW>;
+   linux,default-trigger = "mmc0";
+   };
+
+   user_led3 {
+   label = "green:user3";
+   gpios = < 25 GPIO_ACTIVE_LOW>;
+   linux,default-trigger = "none";
+   };
+
+   user_led4 {
+   label = "green:user4";
+   gpios = < 3 GPIO_ACTIVE_LOW>;
+   panic-indicator;
+   linux,default-trigger = "none";
+   };
+   };
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   /* On Low speed expansion */
+   label = "LS-I2C0";
+   status = "okay";
+};
+
+ {
+   /* On Low speed expansion */
+   label = "LS-I2C1";
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   /* On High speed expansion */
+   label = "HS-I2C2";
+   status = "okay";
+};
+
+ {
+   vmmc-supply = <_3_3v>;
+   vqmmc-supply = <_3_3v>;
+   status = "okay";
+};
+
+ {
+   /* On High speed expansion */
+   label = "HS-SPI1";
+   status = "okay";
+};
+
+ {
+   /* On Low speed expansion */
+   label = "LS-SPI0";
+   status = "okay";
+};
+
+ {
+   /* On Low speed expansion */
+   label = "LS-UART1";
+   status = "okay";
+};
+
+ {
+   /* On Low speed expansion */
+   label = "LS-UART0";
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
+
+ {
+   status = "okay";
+};
-- 
2.17.1

[PATCH 0/2] Add board support for Chameleon96 Board

2018-12-14 Thread Manivannan Sadhasivam

Hello,

This patchset adds board support for Chameleon96 board from Novetech
based on Intel Cyclone V SoC FPGA. This board is one of the Consumer
Edition boards of the 96Boards family and has the following key features:

* SoC - Intel Cyclone V SoC FPGA
* GPU - Graphics based on Intel Video Suite for FPGA
* RAM - 512MB DDR3L
* USB - 2x USB2.0 Host, 1x USB2.0 OTG
* Wireless - Wifi, BT

More information about this board can be found in 96Boards product
page: https://www.96boards.org/product/chameleon96/

This patchset has been tested on Chameleon96 board and the board boots
into a distro on SD card with USB ports working.

Thanks,
Mani

Manivannan Sadhasivam (2):
  dt-bindings: vendor-prefixes: Add Novtech Vendor Prefix
  ARM: dts: Add support for 96Boards Chameleon96 board

 .../devicetree/bindings/vendor-prefixes.txt   |   1 +
 arch/arm/boot/dts/Makefile|   1 +
 .../boot/dts/socfpga_cyclone5_chameleon96.dts | 130 ++
 3 files changed, 132 insertions(+)
 create mode 100644 arch/arm/boot/dts/socfpga_cyclone5_chameleon96.dts

-- 
2.17.1

[PATCH 1/2] dt-bindings: vendor-prefixes: Add Novtech Vendor Prefix

2018-12-14 Thread Manivannan Sadhasivam

Add vendor prefix for NovTech, Inc.

Signed-off-by: Manivannan Sadhasivam 
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt 
b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 4b1a2a8fcc16..ac182966249c 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -270,6 +270,7 @@ nintendoNintendo
 nltNLT Technologies, Ltd.
 nokia  Nokia
 nordic Nordic Semiconductor
+novtech NovTech, Inc.
 nutsboard  NutsBoard
 nuvotonNuvoton Technology Corporation
 nvdNew Vision Display
-- 
2.17.1

Re: [PATCH 6/6] arm64: Use ftrace_graph_get_ret_stack() instead of curr_ret_stack

2018-12-14 Thread Steven Rostedt

On Thu, 13 Dec 2018 17:09:35 +
James Morse  wrote:

> Hi Steven,
> 

> I gave this branch a spin, but I hit the WARN_ON() fairly easily:

Thanks for testing!

Can you see if this patch fixes it for you?

-- Steve

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d4f04f0ca646..8dfd5021b933 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -246,10 +246,10 @@ unsigned long ftrace_return_to_handler(unsigned long 
frame_pointer)
 struct ftrace_ret_stack *
 ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
 {
-   idx = current->curr_ret_stack - idx;
+   idx = task->curr_ret_stack - idx;
 
if (idx >= 0 && idx <= task->curr_ret_stack)
-   return >ret_stack[idx];
+   return >ret_stack[idx];
 
return NULL;
 }

Re: [RFC 2/4] mm: separate memory allocation and actual work in alloc_vmap_area()

2018-12-14 Thread Joe Perches

On Fri, 2018-12-14 at 11:45 -0800, Matthew Wilcox wrote:
> On Fri, Dec 14, 2018 at 11:40:45AM -0800, Joe Perches wrote:
> > On Fri, 2018-12-14 at 10:13 -0800, Matthew Wilcox wrote:
> > > On Fri, Dec 14, 2018 at 10:07:18AM -0800, Roman Gushchin wrote:
> > > > +/*
> > > > + * Allocate a region of KVA of the specified size and alignment, 
> > > > within the
> > > > + * vstart and vend.
> > > > + */
> > > > +static struct vmap_area *alloc_vmap_area(unsigned long size,
> > > > +unsigned long align,
> > > > +unsigned long vstart,
> > > > +unsigned long vend,
> > > > +int node, gfp_t gfp_mask)
> > > > +{
> > > > +   struct vmap_area *va;
> > > > +   int ret;
> > > > +
> > > > +   va = kmalloc_node(sizeof(struct vmap_area),
> > > > +   gfp_mask & GFP_RECLAIM_MASK, node);
> > > > +   if (unlikely(!va))
> > > > +   return ERR_PTR(-ENOMEM);
> > > > +
> > > > +   ret = init_vmap_area(va, size, align, vstart, vend, node, 
> > > > gfp_mask);
> > > > +   if (ret) {
> > > > +   kfree(va);
> > > > +   return ERR_PTR(ret);
> > > > +   }
> > > > +
> > > > +   return va;
> > > >  }
> > > >  
> > > > +
> > > 
> > > Another spurious blank line?

[wrong spacing noticed]

> Umm ... this blank line changed the file from having one blank line
> after the function to having two blank lines after the function.

right. thanks.

Re: [PATCH 4.4 00/88] 4.4.168-stable review

2018-12-14 Thread Guenter Roeck


On 12/14/18 3:59 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.4.168 release.
There are 88 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Dec 16 11:56:41 UTC 2018.
Anything received after that time might be too late.


Build results:
total: 170 pass: 145 fail: 25
Failed builds:
arm:allnoconfig
arm:tinyconfig
arm:efm32_defconfig
blackfin:defconfig
blackfin:BF561-EZKIT-SMP_defconfig
c6x:dsk6455_defconfig
c6x:evmc6457_defconfig
c6x:evmc6678_defconfig
h8300:allnoconfig
h8300:tinyconfig
h8300:edosk2674_defconfig
h8300:h8300h-sim_defconfig
h8300:h8s-sim_defconfig
m68k:allnoconfig
m68k:tinyconfig
m68k:m5272c3_defconfig
m68k:m5307c3_defconfig
m68k:m5249evb_defconfig
m68k:m5407c3_defconfig
microblaze:nommu_defconfig
microblaze:allnoconfig
microblaze:tinyconfig
sh:defconfig
sh:allnoconfig
sh:tinyconfig
Qemu test results:
total: 288 pass: 288 fail: 0

mm/nommu.c: In function '__get_user_pages_unlocked':
mm/nommu.c:211:49: error: 'write' undeclared (first use in this function)
mm/nommu.c:211:56: error: 'force' undeclared (first use in this function)
mm/nommu.c:212:9: warning: passing argument 7 of 'get_user_pages' from 
incompatible pointer type [enabled by default]
mm/nommu.c:185:6: note: expected 'struct vm_area_struct **' but argument is of 
type 'struct page **'
mm/nommu.c:212:9: error: too many arguments to function 'get_user_pages'
mm/nommu.c:185:6: note: declared here

Details are available at https://kerneltests.org/builders/.

Guenter

Re: [PATCH 4.9 00/51] 4.9.146-stable review

2018-12-14 Thread Guenter Roeck


On 12/14/18 4:00 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.9.146 release.
There are 51 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Dec 16 11:56:52 UTC 2018.
Anything received after that time might be too late.



Build results:
total: 171 pass: 171 fail: 0
Qemu test results:
total: 311 pass: 311 fail: 0

Details are available at https://kerneltests.org/builders/.

Guenter

Re: [PATCH 4.14 00/89] 4.14.89-stable review

2018-12-14 Thread Guenter Roeck


On 12/14/18 3:59 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.14.89 release.
There are 89 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Dec 16 11:57:01 UTC 2018.
Anything received after that time might be too late.



Build results:
total: 171 pass: 171 fail: 0
Qemu test results:
total: 322 pass: 322 fail: 0

Details are available at https://kerneltests.org/builders/.

Guenter

Re: [PATCH 4.19 000/142] 4.19.10-stable review

2018-12-14 Thread Guenter Roeck


On 12/14/18 3:58 AM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 4.19.10 release.
There are 142 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Dec 16 11:57:12 UTC 2018.
Anything received after that time might be too late.



Build results:
total: 155 pass: 155 fail: 0
Qemu test results:
total: 337 pass: 337 fail: 0

Details are available at https://kerneltests.org/builders/.

Guenter

[PATCH v5 2/5] acpi/numa: Set the memory-side-cache size in memblocks

2018-12-14 Thread Dan Williams

From: Keith Busch 

Add memblock based enumeration of memory-side-cache of System RAM.
Detect the capability in early init through HMAT tables, and set the
size in the address range memblocks if a direct mapped side cache is
present.

Cc: 
Cc: "Rafael J. Wysocki" 
Cc: Dave Hansen 
Cc: Andy Lutomirski 
Cc: Peter Zijlstra 
Cc: Mike Rapoport 
Signed-off-by: Keith Busch 
Signed-off-by: Dan Williams 
---
 arch/x86/Kconfig |1 +
 drivers/acpi/numa.c  |   32 
 include/linux/memblock.h |   36 
 mm/Kconfig   |3 +++
 mm/memblock.c|   20 
 5 files changed, 92 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8689e794a43c..3f9c413d8eb5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,6 +171,7 @@ config X86
select HAVE_KVM
select HAVE_LIVEPATCH   if X86_64
select HAVE_MEMBLOCK_NODE_MAP
+   select HAVE_MEMBLOCK_CACHE_INFO if ACPI_NUMA
select HAVE_MIXED_BREAKPOINTS_REGS
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index f5e09c39ff22..ec7e849f1c19 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -40,6 +40,12 @@ static int pxm_to_node_map[MAX_PXM_DOMAINS]
 static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
 
+struct mem_cacheinfo {
+   phys_addr_t size;
+   bool direct_mapped;
+};
+static struct mem_cacheinfo side_cached_pxms[MAX_PXM_DOMAINS] __initdata;
+
 unsigned char acpi_srat_revision __initdata;
 int acpi_numa __initdata;
 
@@ -262,6 +268,8 @@ acpi_numa_memory_affinity_init(struct 
acpi_srat_mem_affinity *ma)
u64 start, end;
u32 hotpluggable;
int node, pxm;
+   u64 cache_size;
+   bool direct;
 
if (srat_disabled())
goto out_err;
@@ -308,6 +316,13 @@ acpi_numa_memory_affinity_init(struct 
acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem 
%#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1);
 
+   cache_size = side_cached_pxms[pxm].size;
+   direct = side_cached_pxms[pxm].direct_mapped;
+   if (cache_size &&
+   memblock_set_sidecache(start, ma->length, cache_size, direct))
+   pr_warn("SRAT: Failed to mark side cached range [mem 
%#010Lx-%#010Lx] in memblock\n",
+   (unsigned long long)start, (unsigned long long)end - 1);
+
max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
 
return 0;
@@ -411,6 +426,18 @@ acpi_parse_memory_affinity(union acpi_subtable_headers * 
header,
return 0;
 }
 
+static int __init
+acpi_parse_cache(union acpi_subtable_headers *header, const unsigned long end)
+{
+   struct acpi_hmat_cache *c = (void *)header;
+   u32 attrs = (c->cache_attributes & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8;
+
+   if (attrs == ACPI_HMAT_CA_DIRECT_MAPPED)
+   side_cached_pxms[c->memory_PD].direct_mapped = true;
+   side_cached_pxms[c->memory_PD].size += c->cache_size;
+   return 0;
+}
+
 static int __init acpi_parse_srat(struct acpi_table_header *table)
 {
struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
@@ -460,6 +487,11 @@ int __init acpi_numa_init(void)
sizeof(struct acpi_table_srat),
srat_proc, ARRAY_SIZE(srat_proc), 0);
 
+   acpi_table_parse_entries(ACPI_SIG_HMAT,
+sizeof(struct acpi_table_hmat),
+ACPI_HMAT_TYPE_CACHE,
+acpi_parse_cache, 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity, 0);
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aee299a6aa76..169ed3dd456d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -60,6 +60,10 @@ struct memblock_region {
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid;
 #endif
+#ifdef CONFIG_HAVE_MEMBLOCK_CACHE_INFO
+   phys_addr_t cache_size;
+   bool direct_mapped;
+#endif
 };
 
 /**
@@ -317,6 +321,38 @@ static inline int memblock_get_region_node(const struct 
memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+#ifdef CONFIG_HAVE_MEMBLOCK_CACHE_INFO
+int memblock_set_sidecache(phys_addr_t base, phys_addr_t size,
+  phys_addr_t cache_size, bool direct_mapped);
+
+static inline bool memblock_sidecache_direct_mapped(struct memblock_region *m)
+{
+   return m->direct_mapped;
+}
+
+static inline phys_addr_t memblock_sidecache_size(struct memblock_region *m)
+{
+   return m->cache_size;
+}
+#else

[PATCH v5 0/5] mm: Randomize free memory

2018-12-14 Thread Dan Williams

Changes since v4: [1]
* Default the randomization to off and enable it dynamically based on
  the detection of a memory side cache advertised by platform firmware.
  In the case of x86 this enumeration comes from the ACPI HMAT. (Michal
  and Mel)
* Improve the changelog of the patch that introduces the shuffling to
  clarify the motivation and better explain the tradeoffs. (Michal and
  Mel)
* Include the required HMAT enabling in the series.

[1]: 
https://lkml.kernel.org/r/153922180166.838512.8260339805733812034.st...@dwillia2-desk3.amr.corp.intel.com

---

Quote patch 3:

Randomization of the page allocator improves the average utilization of
a direct-mapped memory-side-cache. Memory side caching is a platform
capability that Linux has been previously exposed to in HPC
(high-performance computing) environments on specialty platforms. In
that instance it was a smaller pool of high-bandwidth-memory relative to
higher-capacity / lower-bandwidth DRAM. Now, this capability is going to
be found on general purpose server platforms where DRAM is a cache in
front of higher latency persistent memory [2].

Robert offered an explanation of the state of the art of Linux
interactions with memory-side-caches [3], and I copy it here:

It's been a problem in the HPC space:

http://www.nersc.gov/research-and-development/knl-cache-mode-performance-coe/

A kernel module called zonesort is available to try to help:
https://software.intel.com/en-us/articles/xeon-phi-software

and this abandoned patch series proposed that for the kernel:
https://lkml.org/lkml/2017/8/23/195

Dan's patch series doesn't attempt to ensure buffers won't conflict, but
also reduces the chance that the buffers will. This will make performance
more consistent, albeit slower than "optimal" (which is near impossible
to attain in a general-purpose kernel).  That's better than forcing
users to deploy remedies like:
"To eliminate this gradual degradation, we have added a Stream
 measurement to the Node Health Check that follows each job;
 nodes are rebooted whenever their measured memory bandwidth
 falls below 300 GB/s."

A replacement for zonesort was merged upstream in commit cc9aec03e58f
"x86/numa_emulation: Introduce uniform split capability". With this
numa_emulation capability, memory can be split into cache sized
("near-memory" sized) numa nodes. A bind operation to such a node, and
disabling workloads on other nodes, enables full cache performance.
However, once the workload exceeds the cache size then cache conflicts
are unavoidable. While HPC environments might be able to tolerate
time-scheduling of cache sized workloads, for general purpose server
platforms, the oversubscribed cache case will be the common case.

The worst case scenario is that a server system owner benchmarks a
workload at boot with an un-contended cache only to see that performance
degrade over time, even below the average cache performance due to
excessive conflicts. Randomization clips the peaks and fills in the
valleys of cache utilization to yield steady average performance.

See patch 3 for more details.

[2]: 
https://itpeernetwork.intel.com/intel-optane-dc-persistent-memory-operating-modes/
[3]: https://lkml.org/lkml/2018/9/22/54

--- 
   

   
Dan Williams (3):
  mm: Shuffle initial free memory to improve memory-side-cache utilization
  mm: Move buddy list manipulations into helpers
  mm: Maintain randomization of page free lists

Keith Busch (2):
  acpi: Create subtable parsing infrastructure
  acpi/numa: Set the memory-side-cache size in memblocks


 arch/ia64/kernel/acpi.c   |   12 +
 arch/x86/Kconfig  |1 
 arch/x86/kernel/acpi/boot.c   |   36 ++--
 drivers/acpi/numa.c   |   48 +
 drivers/acpi/scan.c   |4 
 drivers/acpi/tables.c |   67 ++--
 drivers/irqchip/irq-gic-v2m.c |2 
 drivers/irqchip/irq-gic-v3-its-pci-msi.c  |2 
 drivers/irqchip/irq-gic-v3-its-platform-msi.c |2 
 drivers/irqchip/irq-gic-v3-its.c  |6 -
 drivers/irqchip/irq-gic-v3.c  |8 -
 drivers/irqchip/irq-gic.c |4 
 drivers/mailbox/pcc.c |2 
 include/linux/acpi.h  |5 -
 include/linux/list.h  |   17 ++
 include/linux/memblock.h  |   36 
 include/linux/mm.h|   53 ++
 include/linux/mm_types.h  |3 
 include/linux/mmzone.h|   65 +++
 init/Kconfig  |   36 
 mm/Kconfig

[PATCH v5 3/5] mm: Shuffle initial free memory to improve memory-side-cache utilization

2018-12-14 Thread Dan Williams

Randomization of the page allocator improves the average utilization of
a direct-mapped memory-side-cache. Memory side caching is a platform
capability that Linux has been previously exposed to in HPC
(high-performance computing) environments on specialty platforms. In
that instance it was a smaller pool of high-bandwidth-memory relative to
higher-capacity / lower-bandwidth DRAM. Now, this capability is going to
be found on general purpose server platforms where DRAM is a cache in
front of higher latency persistent memory [1].

Robert offered an explanation of the state of the art of Linux
interactions with memory-side-caches [2], and I copy it here:

It's been a problem in the HPC space:

http://www.nersc.gov/research-and-development/knl-cache-mode-performance-coe/

A kernel module called zonesort is available to try to help:
https://software.intel.com/en-us/articles/xeon-phi-software

and this abandoned patch series proposed that for the kernel:
https://lkml.org/lkml/2017/8/23/195

Dan's patch series doesn't attempt to ensure buffers won't conflict, but
also reduces the chance that the buffers will. This will make performance
more consistent, albeit slower than "optimal" (which is near impossible
to attain in a general-purpose kernel).  That's better than forcing
users to deploy remedies like:
"To eliminate this gradual degradation, we have added a Stream
 measurement to the Node Health Check that follows each job;
 nodes are rebooted whenever their measured memory bandwidth
 falls below 300 GB/s."

A replacement for zonesort was merged upstream in commit cc9aec03e58f
"x86/numa_emulation: Introduce uniform split capability". With this
numa_emulation capability, memory can be split into cache sized
("near-memory" sized) numa nodes. A bind operation to such a node, and
disabling workloads on other nodes, enables full cache performance.
However, once the workload exceeds the cache size then cache conflicts
are unavoidable. While HPC environments might be able to tolerate
time-scheduling of cache sized workloads, for general purpose server
platforms, the oversubscribed cache case will be the common case.

The worst case scenario is that a server system owner benchmarks a
workload at boot with an un-contended cache only to see that performance
degrade over time, even below the average cache performance due to
excessive conflicts. Randomization clips the peaks and fills in the
valleys of cache utilization to yield steady average performance.

Here are some performance impact details of the patches:

1/ An Intel internal synthetic memory bandwidth measurement tool, saw a
3X speedup in a contrived case that tries to force cache conflicts. The
contrived cased used the numa_emulation capability to force an instance
of the benchmark to be run in two of the near-memory sized numa nodes.
If both instances were placed on the same emulated they would fit and
cause zero conflicts.  While on separate emulated nodes without
randomization they underutilized the cache and conflicted unnecessarily
due to the in-order allocation per node.

2/ A well known Java server application benchmark was run with a heap
size that exceeded cache size by 3X. The cache conflict rate was 8% for
the first run and degraded to 21% after page allocator aging. With
randomization enabled the rate levelled out at 11%.

3/ A MongoDB workload did not observe measurable difference in
cache-conflict rates, but the overall throughput dropped by 7% with
randomization in one case.

4/ Mel Gorman ran his suite of performance workloads with randomization
enabled on platforms without a memory-side-cache and saw a mix of some
improvements and some losses [3].

While there is potentially significant improvement for applications that
depend on low latency access across a wide working-set, the performance
may be negligible to negative for other workloads. For this reason the
shuffle capability defaults to off unless a direct-mapped
memory-side-cache is detected. Even then, the page_alloc.shuffle=0
parameter can be specified to disable the randomization on those
systems.

Outside of memory-side-cache utilization concerns there is potentially
security benefit from randomization. Some data exfiltration and
return-oriented-programming attacks rely on the ability to infer the
location of sensitive data objects. The kernel page allocator,
especially early in system boot, has predictable first-in-first out
behavior for physical pages. Pages are freed in physical address order
when first onlined.

Quoting Kees:
"While we already have a base-address randomization
 (CONFIG_RANDOMIZE_MEMORY), attacks against the same hardware and
 memory layouts would certainly be using the predictability of
 allocation ordering (i.e. for attacks where the base address isn't
 important: only the relative positions between allocated memory).
 This is common in lots of heap-style attacks.

[PATCH v5 5/5] mm: Maintain randomization of page free lists

2018-12-14 Thread Dan Williams

When freeing a page with an order >= shuffle_page_order randomly select
the front or back of the list for insertion.

While the mm tries to defragment physical pages into huge pages this can
tend to make the page allocator more predictable over time. Inject the
front-back randomness to preserve the initial randomness established by
shuffle_free_memory() when the kernel was booted.

The overhead of this manipulation is constrained by only being applied
for MAX_ORDER sized pages by default.

Cc: Michal Hocko 
Cc: Kees Cook 
Cc: Dave Hansen 
Signed-off-by: Dan Williams 
---
 include/linux/mm.h |   12 
 include/linux/mmzone.h |   10 ++
 mm/page_alloc.c|   11 +--
 mm/shuffle.c   |   16 
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43e5a449caaf..8299267c028a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,6 +2088,13 @@ static inline void shuffle_zone(struct zone *z, unsigned 
long start_pfn,
return;
__shuffle_zone(z, start_pfn, end_pfn);
 }
+
+static inline bool is_shuffle_order(int order)
+{
+   if (!static_branch_unlikely(_alloc_shuffle_key))
+return false;
+   return order >= CONFIG_SHUFFLE_PAGE_ORDER;
+}
 #else
 static inline void shuffle_free_memory(pg_data_t *pgdat, unsigned long 
start_pfn,
unsigned long end_pfn)
@@ -2102,6 +2109,11 @@ static inline void shuffle_zone(struct zone *z, unsigned 
long start_pfn,
 static inline void page_alloc_shuffle_enable(void)
 {
 }
+
+static inline bool is_shuffle_order(int order)
+{
+   return false;
+}
 #endif
 
 /* Free the reserved page into the buddy system, so it gets managed. */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 35cc33af87f2..338929647eea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -98,6 +98,8 @@ extern int page_group_by_mobility_disabled;
 struct free_area {
struct list_headfree_list[MIGRATE_TYPES];
unsigned long   nr_free;
+   u64 rand;
+   u8  rand_bits;
 };
 
 /* Used for pages not on another list */
@@ -116,6 +118,14 @@ static inline void add_to_free_area_tail(struct page 
*page, struct free_area *ar
area->nr_free++;
 }
 
+#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR
+/* Used to preserve page allocation order entropy */
+void add_to_free_area_random(struct page *page, struct free_area *area,
+   int migratetype);
+#else
+#define add_to_free_area_random add_to_free_area
+#endif
+
 /* Used for pages which are on another list */
 static inline void move_to_free_area(struct page *page, struct free_area *area,
 int migratetype)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d2f7b050bc13..62a40ad07593 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -850,7 +851,8 @@ static inline void __free_one_page(struct page *page,
 * so it's less likely to be used soon and more likely to be merged
 * as a higher order page
 */
-   if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) {
+   if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)
+   && !is_shuffle_order(order)) {
struct page *higher_page, *higher_buddy;
combined_pfn = buddy_pfn & pfn;
higher_page = page + (combined_pfn - pfn);
@@ -864,7 +866,12 @@ static inline void __free_one_page(struct page *page,
}
}
 
-   add_to_free_area(page, >free_area[order], migratetype);
+   if (is_shuffle_order(order))
+   add_to_free_area_random(page, >free_area[order],
+   migratetype);
+   else
+   add_to_free_area(page, >free_area[order], migratetype);
+
 }
 
 /*
diff --git a/mm/shuffle.c b/mm/shuffle.c
index 621fde268d01..5850a0761d10 100644
--- a/mm/shuffle.c
+++ b/mm/shuffle.c
@@ -204,3 +204,19 @@ void __meminit __shuffle_free_memory(pg_data_t *pgdat, 
unsigned long start_pfn,
for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
shuffle_zone(z, start_pfn, end_pfn);
 }
+
+void add_to_free_area_random(struct page *page, struct free_area *area,
+   int migratetype)
+{
+   if (area->rand_bits == 0) {
+   area->rand_bits = 64;
+   area->rand = get_random_u64();
+   }
+
+   if (area->rand & 1)
+   add_to_free_area(page, area, migratetype);
+   else
+   add_to_free_area_tail(page, area, migratetype);
+   area->rand_bits--;
+   area->rand >>= 1;
+}

[PATCH v5 4/5] mm: Move buddy list manipulations into helpers

2018-12-14 Thread Dan Williams

In preparation for runtime randomization of the zone lists, take all
(well, most of) the list_*() functions in the buddy allocator and put
them in helper functions. Provide a common control point for injecting
additional behavior when freeing pages.

Cc: Michal Hocko 
Cc: Dave Hansen 
Signed-off-by: Dan Williams 
---
 include/linux/mm.h   |3 --
 include/linux/mm_types.h |3 ++
 include/linux/mmzone.h   |   51 ++
 mm/compaction.c  |4 +--
 mm/page_alloc.c  |   70 ++
 5 files changed, 84 insertions(+), 47 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9647779e82b..43e5a449caaf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -473,9 +473,6 @@ static inline void vma_set_anonymous(struct vm_area_struct 
*vma)
 struct mmu_gather;
 struct inode;
 
-#define page_private(page) ((page)->private)
-#define set_page_private(page, v)  ((page)->private = (v))
-
 #if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
 static inline int pmd_devmap(pmd_t pmd)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..72f37ea6dedb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -209,6 +209,9 @@ struct page {
 #define PAGE_FRAG_CACHE_MAX_SIZE   __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER  get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
+#define page_private(page) ((page)->private)
+#define set_page_private(page, v)  ((page)->private = (v))
+
 struct page_frag_cache {
void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index eafa66d66232..35cc33af87f2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -18,6 +18,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 /* Free memory management - zoned buddy allocator.  */
@@ -98,6 +100,55 @@ struct free_area {
unsigned long   nr_free;
 };
 
+/* Used for pages not on another list */
+static inline void add_to_free_area(struct page *page, struct free_area *area,
+int migratetype)
+{
+   list_add(>lru, >free_list[migratetype]);
+   area->nr_free++;
+}
+
+/* Used for pages not on another list */
+static inline void add_to_free_area_tail(struct page *page, struct free_area 
*area,
+ int migratetype)
+{
+   list_add_tail(>lru, >free_list[migratetype]);
+   area->nr_free++;
+}
+
+/* Used for pages which are on another list */
+static inline void move_to_free_area(struct page *page, struct free_area *area,
+int migratetype)
+{
+   list_move(>lru, >free_list[migratetype]);
+}
+
+static inline struct page *get_page_from_free_area(struct free_area *area,
+   int migratetype)
+{
+   return list_first_entry_or_null(>free_list[migratetype],
+   struct page, lru);
+}
+
+static inline void rmv_page_order(struct page *page)
+{
+   __ClearPageBuddy(page);
+   set_page_private(page, 0);
+}
+
+static inline void del_page_from_free_area(struct page *page,
+   struct free_area *area, int migratetype)
+{
+   list_del(>lru);
+   rmv_page_order(page);
+   area->nr_free--;
+}
+
+static inline bool free_area_empty(struct free_area *area, int migratetype)
+{
+   return list_empty(>free_list[migratetype]);
+}
+
 struct pglist_data;
 
 /*
diff --git a/mm/compaction.c b/mm/compaction.c
index 7c607479de4a..44adbfa073b3 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1359,13 +1359,13 @@ static enum compact_result __compact_finished(struct 
zone *zone,
bool can_steal;
 
/* Job done if page is free of the right migratetype */
-   if (!list_empty(>free_list[migratetype]))
+   if (!free_area_empty(area, migratetype))
return COMPACT_SUCCESS;
 
 #ifdef CONFIG_CMA
/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
if (migratetype == MIGRATE_MOVABLE &&
-   !list_empty(>free_list[MIGRATE_CMA]))
+   !free_area_empty(area, MIGRATE_CMA))
return COMPACT_SUCCESS;
 #endif
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a51031cf16fe..d2f7b050bc13 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -704,12 +704,6 @@ static inline void set_page_order(struct page *page, 
unsigned int order)
__SetPageBuddy(page);
 }
 
-static inline void rmv_page_order(struct page *page)
-{
-   __ClearPageBuddy(page);
-   set_page_private(page, 0);
-}
-
 /*
  * This function checks whether a page is free && is the buddy
  * we can coalesce a page and its buddy if
@@ -810,13 +804,11 @@ static inline void

[PATCH v5 1/5] acpi: Create subtable parsing infrastructure

2018-12-14 Thread Dan Williams

From: Keith Busch 

Parsing entries in an ACPI table had assumed a generic header
structure. There is no standard ACPI header, though, so less common
layouts with different field sizes required custom parsers to go through
their subtable entry list.

Create the infrastructure for adding different table types so parsing
the entries array may be more reused for all ACPI system tables so that
the common code doesn't need to be duplicated.

Reviewed-by: Rafael J. Wysocki 
Signed-off-by: Keith Busch 
Signed-off-by: Dan Williams 
---
 arch/ia64/kernel/acpi.c   |   12 ++--
 arch/x86/kernel/acpi/boot.c   |   36 +++--
 drivers/acpi/numa.c   |   16 +++---
 drivers/acpi/scan.c   |4 +
 drivers/acpi/tables.c |   67 +
 drivers/irqchip/irq-gic-v2m.c |2 -
 drivers/irqchip/irq-gic-v3-its-pci-msi.c  |2 -
 drivers/irqchip/irq-gic-v3-its-platform-msi.c |2 -
 drivers/irqchip/irq-gic-v3-its.c  |6 +-
 drivers/irqchip/irq-gic-v3.c  |8 +--
 drivers/irqchip/irq-gic.c |4 +
 drivers/mailbox/pcc.c |2 -
 include/linux/acpi.h  |5 +-
 13 files changed, 108 insertions(+), 58 deletions(-)

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 41eb281709da..3973d2c2a9b0 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -177,7 +177,7 @@ struct acpi_table_madt *acpi_madt __initdata;
 static u8 has_8259;
 
 static int __init
-acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header,
  const unsigned long end)
 {
struct acpi_madt_local_apic_override *lapic;
@@ -216,7 +216,7 @@ acpi_parse_lsapic(struct acpi_subtable_header * header, 
const unsigned long end)
 }
 
 static int __init
-acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long 
end)
+acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long 
end)
 {
struct acpi_madt_local_apic_nmi *lacpi_nmi;
 
@@ -230,7 +230,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, 
const unsigned long e
 }
 
 static int __init
-acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long 
end)
+acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long 
end)
 {
struct acpi_madt_io_sapic *iosapic;
 
@@ -245,7 +245,7 @@ acpi_parse_iosapic(struct acpi_subtable_header * header, 
const unsigned long end
 static unsigned int __initdata acpi_madt_rev;
 
 static int __init
-acpi_parse_plat_int_src(struct acpi_subtable_header * header,
+acpi_parse_plat_int_src(union acpi_subtable_headers * header,
const unsigned long end)
 {
struct acpi_madt_interrupt_source *plintsrc;
@@ -329,7 +329,7 @@ unsigned int get_cpei_target_cpu(void)
 }
 
 static int __init
-acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
   const unsigned long end)
 {
struct acpi_madt_interrupt_override *p;
@@ -350,7 +350,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
 }
 
 static int __init
-acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long 
end)
+acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long 
end)
 {
struct acpi_madt_nmi_source *nmi_src;
 
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 06635fbca81c..58561b4df09d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 
enabled)
 }
 
 static int __init
-acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
+acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
 {
struct acpi_madt_local_x2apic *processor = NULL;
 #ifdef CONFIG_X86_X2APIC
@@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
 
-   acpi_table_print_madt_entry(header);
+   acpi_table_print_madt_entry(>common);
 
 #ifdef CONFIG_X86_X2APIC
apic_id = processor->local_apic_id;
@@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
 }
 
 static int __init
-acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
 {
struct acpi_madt_local_apic *processor = NULL;
 
@@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, 
const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return

Re: [PATCH v2] arm64: invalidate TLB just before turning MMU on

2018-12-14 Thread Qian Cai

On 12/14/18 2:23 AM, Ard Biesheuvel wrote:
> On Fri, 14 Dec 2018 at 05:08, Qian Cai  wrote:
>> Also tried to move the local TLB flush part around a bit inside
>> __cpu_setup(), although it did complete kdump some times, it did trigger
>> "Synchronous Exception" in EFI after a cold-reboot fairly often that
>> seems no way to recover remotely without reinstalling the OS.
> 
> This doesn't make any sense to me. If the system gets into a weird
> state out of cold reboot, how could this code be the culprit? Please
> check your firmware, and try to reproduce the issue on a system that
> doesn't have such defects.
> 

I'll continue investigating those "Synchronous Exception" although it is kind of
hard due to I don't have any source code of the firmware to confirm it is buggy
or not.

I did manage to reproduce this kdump issue on around 5 of those server running a
fairly recent version of the firmware (07/01/2018). I don't have access to other
large CPU machines.

[PATCH] PCI: Remove unused attr variable in pci_dma_configure

2018-12-14 Thread Nathan Chancellor

Clang warns:

drivers/pci/pci-driver.c:1603:21: error: unused variable 'attr'
[-Werror,-Wunused-variable]

Commit e5361ca29f2f ("ACPI / scan: Refactor _CCA enforcement") removed
attr's use and replaced it with its assigned value so it is no longer
needed.

Signed-off-by: Nathan Chancellor 
---

The commit that causes this warning is in Christoph's dma-mapping tree
so I assume this will go there too (roll it into it if need be).

 drivers/pci/pci-driver.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 1b58e058b13f..ea55444e6ead 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1600,7 +1600,6 @@ static int pci_dma_configure(struct device *dev)
ret = of_dma_configure(dev, bridge->parent->of_node, true);
} else if (has_acpi_companion(bridge)) {
struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
-   enum dev_dma_attr attr = acpi_get_dma_attr(adev);
 
ret = acpi_dma_configure(dev, acpi_get_dma_attr(adev));
}
-- 
2.20.0

Re: [PATCH] thermal: stm32: read factory settings inside stm_thermal_prepare

2018-12-14 Thread Eduardo Valentin



On Thu, Dec 06, 2018 at 01:23:32PM +, David HERNANDEZ SANCHEZ wrote:
> Calling stm_thermal_read_factory_settings before clocking
> internal peripheral causes bad register values and makes
> temperature computation wrong.
> 
> Calling stm_thermal_read_factory_settings inside
> stm_thermal_prepare fixes this problem as internal
> peripheral is well clocked at this stage.
> 
> Signed-off-by: David Hernandez Sanchez 
> Fixes: 1d693155 ("thermal: add stm32 thermal driver")

Please append patch version in the title next time to help tracking it.

> 
> diff --git a/drivers/thermal/st/stm_thermal.c 
> b/drivers/thermal/st/stm_thermal.c
> index 47623da..daa1257 100644
> --- a/drivers/thermal/st/stm_thermal.c
> +++ b/drivers/thermal/st/stm_thermal.c
> @@ -532,6 +532,10 @@ static int stm_thermal_prepare(struct stm_thermal_sensor 
> *sensor)
>   if (ret)
>   return ret;
>  
> + ret = stm_thermal_read_factory_settings(sensor);
> + if (ret)
> + goto thermal_unprepare;
> +
>   ret = stm_thermal_calibration(sensor);
>   if (ret)
>   goto thermal_unprepare;
> @@ -636,10 +640,6 @@ static int stm_thermal_probe(struct platform_device 
> *pdev)
>   /* Populate sensor */
>   sensor->base = base;
>  
> - ret = stm_thermal_read_factory_settings(sensor);
> - if (ret)
> - return ret;
> -
>   sensor->clk = devm_clk_get(>dev, "pclk");
>   if (IS_ERR(sensor->clk)) {
>   dev_err(>dev, "%s: failed to fetch PCLK clock\n",
> -- 
> 2.7.4

Re: gtucker/kernelci-stable boot bisection: v4.19.9 on meson-gxbb-p200

2018-12-14 Thread Kevin Hilman

"kernelci.org bot"  writes:

> gtucker/kernelci-stable boot bisection: v4.19.9 on meson-gxbb-p200
>
> Summary:
>   Start:  be53d23e68c2 Linux 4.19.9
>   Details:https://kernelci.org/boot/id/5c13e85d59b5144a340a819d
>   Plain log:  
> https://storage.kernelci.org//gtucker/kernelci-stable/v4.19.9/arm64/defconfig/lab-baylibre/boot-meson-gxbb-p200.txt
>   HTML log:   
> https://storage.kernelci.org//gtucker/kernelci-stable/v4.19.9/arm64/defconfig/lab-baylibre/boot-meson-gxbb-p200.html
>   Result: be53d23e68c2 Linux 4.19.9
>
> Checks:
>   revert: PASS
>   verify: PASS
>
> Parameters:
>   Tree:   gtucker
>   URL:https://gitlab.collabora.com/gtucker/linux.git
>   Branch: kernelci-stable
>   Target: meson-gxbb-p200
>   CPU arch:   arm64
>   Lab:lab-baylibre
>   Config: defconfig
>   Test suite: boot
>
> Breaking commit found:
>
> ---
> commit be53d23e68c29900da6b6ce486b5ab8507de94b1
> Author: Greg Kroah-Hartman 
> Date:   Thu Dec 13 09:16:23 2018 +0100
>
> Linux 4.19.9
>
> diff --git a/Makefile b/Makefile
> index 34bc4c752c49..8717f34464d5 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1,7 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0
>  VERSION = 4
>  PATCHLEVEL = 19
> -SUBLEVEL = 8
> +SUBLEVEL = 9
>  EXTRAVERSION =
>  NAME = "People's Front"
> ---

This is pretty clearly a bug in the automation toolling.

If this patch broke the kernel on that board (which I maintain, BTW) I
would be very impressed.

Kevin

Re: [PATCH v2 6/8] socket: Add SO_TIMESTAMP[NS]_NEW

2018-12-14 Thread Deepa Dinamani

> > diff --git a/arch/alpha/include/uapi/asm/socket.h 
> > b/arch/alpha/include/uapi/asm/socket.h
> > index 00e45c80e574..352e3dc0b3d9 100644
> > --- a/arch/alpha/include/uapi/asm/socket.h
> > +++ b/arch/alpha/include/uapi/asm/socket.h
> > @@ -3,6 +3,7 @@
> >  #define _UAPI_ASM_SOCKET_H
> >
> >  #include 
> > +#include 
> >
> >  /* For setsockopt(2) */
> >  /*
> > @@ -110,12 +111,22 @@
> >
> >  #define SO_TIMESTAMP_OLD 29
> >  #define SO_TIMESTAMPNS_OLD   35
> > +
> >  #define SO_TIMESTAMPING_OLD  37
> >
> > +#define SO_TIMESTAMP_NEW 62
> > +#define SO_TIMESTAMPNS_NEW   63
> > +
> >  #if !defined(__KERNEL__)
> >
> > -#define SO_TIMESTAMP   SO_TIMESTAMP_OLD
> > -#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD
> > +#if __BITS_PER_LONG == 64
> > +#define SO_TIMESTAMP   SO_TIMESTAMP_OLD
> > +#define SO_TIMESTAMPNS SO_TIMESTAMPNS_OLD
> > +#else
> > +#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? 
> > SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
> > +#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? 
> > SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
> > +#endif
> > +
>
> This is not platform specific. Perhaps it can be deduplicated. The
> interface expects callers to include , not
>  directly. So perhaps it can go there?

I'm not following what you are saying here.

Are you talking about in kernel users or userspace interface?

Userspace should always include sys/socket.h according to the man page.
I'm not sure if userspace can even include linux/socket.h directly.
On my distribution this includes bits/socket.h which in turn includes
asm/socket.h.

Which file gets installed as asm/socket.h is defined per architecture
in the kbuild file such as
arch/ia64/include/uapi/asm/Kbuild (without series applied):

 generic-y += poll.h
 generic-y += sembuf.h
 generic-y += shmbuf.h
 generic-y += socket.h

Also the new timestamp numbers being added are not the same for all
architectures.

So I'm not sure how this can be moved to linux/socket.h.

> This did not address yet the previous comments on consistency and
> unnecessary code churn.
>
> The existing logic to differentiate SO_TIMESTAMP from SO_TIMESTAMPNS
> in both tcp_recv_timestamp and __sock_recv_timestamp is
>
>   if (sock_flag(sk, SOCK_RCVTSTAMP)) {
>   if (sock_flag(sk, SOCK_RCVTSTAMPNS))
>   /* timespec case */
>   else
>   /* timeval case */
>   }
>
> A new level of nesting needs to be added to differentiate .._OLD from .._NEW.
>
> Even if massively changing the original functions, please do so
> consistently, either
>
>   if (sock_flag(sk, SOCK_RCVTSTAMP)) {
>   if (sock_flag(sk, SOCK_TSTAMP_NEW) {
>   /* new code */
>   } else {
>   if (sock_flag(sk, SOCK_RCVTSTAMPNS))
>   /* timespec case */
>   else
>   /* timeval case */
>  }
>   }
>
> or
>
>   if (sock_flag(sk, SOCK_RCVTSTAMP)) {
>   if (sock_flag(sk, SOCK_TSTAMP_NEW) {
>   if (sock_flag(sk, SOCK_RCVTSTAMPNS))
>   /* new timespec case */
>   else
>   /* timespec case */
>   } else {
>if (sock_flag(sk, SOCK_RCVTSTAMPNS))
>/* new timespec case */
>else
>/* timespec case */
>   }
>   }
>
> But not one variant in one function and one in the other.
>
> Deep nesting is hard to follow and, once again, massive code changes
> (even indentations) make git blame harder to use. So where possible,
> try to avoid both and just insert a branch to a new function for the
> .._NEW cases instead:
>
>   if (sock_flag(sk, SOCK_RCVTSTAMP)) {
> +  if (sock_flag(sk, SOCK_TSTAMP_NEW)
> +  __sock_recv_timestamp_new(..);
> -  if (sock_flag(sk, SOCK_RCVTSTAMPNS))
> +  else if (sock_flag(sk, SOCK_RCVTSTAMPNS))
>   /* timespec case */
>   else
>   /* timeval case */
>   }
>
> and leave the rest of the function unmodified.

Ok, I will keep the functions consistent.

-Deepa

[PATCH v10 3/6] PCI/ACPI: Allow ACPI to be built without CONFIG_PCI set

2018-12-14 Thread Sinan Kaya

We are compiling PCI code today for systems with ACPI and no PCI
device present. Remove the useless code and reduce the tight
dependency.

Signed-off-by: Sinan Kaya 
Acked-by: Bjorn Helgaas  # PCI parts
---
 arch/x86/include/asm/pci_x86.h | 7 +++
 drivers/acpi/Kconfig   | 1 -
 drivers/acpi/Makefile  | 2 +-
 drivers/acpi/internal.h| 5 +
 drivers/pci/Makefile   | 2 +-
 include/acpi/acpi_drivers.h| 7 +++
 include/linux/acpi.h   | 7 +++
 include/linux/pci.h| 4 
 8 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 959d618dbb17..73bb404f4d2a 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -121,7 +121,14 @@ extern void __init dmi_check_pciprobe(void);
 extern void __init dmi_check_skip_isa_align(void);
 
 /* some common used subsys_initcalls */
+#ifdef CONFIG_PCI
 extern int __init pci_acpi_init(void);
+#else
+static inline int  __init pci_acpi_init(void)
+{
+   return -EINVAL;
+}
+#endif
 extern void __init pcibios_irq_init(void);
 extern int __init pcibios_init(void);
 extern int pci_legacy_init(void);
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 7cea769c37df..a0abcb3bd673 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -9,7 +9,6 @@ config ARCH_SUPPORTS_ACPI
 menuconfig ACPI
bool "ACPI (Advanced Configuration and Power Interface) Support"
depends on ARCH_SUPPORTS_ACPI
-   depends on PCI
select PNP
default y if X86
help
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index edc039313cd6..7c6afc111d76 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -39,7 +39,7 @@ acpi-y+= processor_core.o
 acpi-$(CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC) += processor_pdc.o
 acpi-y += ec.o
 acpi-$(CONFIG_ACPI_DOCK)   += dock.o
-acpi-y += pci_root.o pci_link.o pci_irq.o
+acpi-$(CONFIG_PCI) += pci_root.o pci_link.o pci_irq.o
 obj-$(CONFIG_ACPI_MCFG)+= pci_mcfg.o
 acpi-y += acpi_lpss.o acpi_apd.o
 acpi-y += acpi_platform.o
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 530a3f675490..b7060dae2789 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -25,8 +25,13 @@ int acpi_osi_init(void);
 acpi_status acpi_os_initialize1(void);
 void init_acpi_device_notify(void);
 int acpi_scan_init(void);
+#ifdef CONFIG_PCI
 void acpi_pci_root_init(void);
 void acpi_pci_link_init(void);
+#else
+static inline void acpi_pci_root_init(void) {}
+static inline void acpi_pci_link_init(void) {}
+#endif
 void acpi_processor_init(void);
 void acpi_platform_init(void);
 void acpi_pnp_init(void);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index f2bda77a2df1..657d642fcc67 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -11,6 +11,7 @@ ifdef CONFIG_PCI
 obj-$(CONFIG_PROC_FS)  += proc.o
 obj-$(CONFIG_SYSFS)+= slot.o
 obj-$(CONFIG_OF)   += of.o
+obj-$(CONFIG_ACPI) += pci-acpi.o
 endif
 
 obj-$(CONFIG_PCI_QUIRKS)   += quirks.o
@@ -20,7 +21,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o
 obj-$(CONFIG_PCI_ATS)  += ats.o
 obj-$(CONFIG_PCI_IOV)  += iov.o
 obj-$(CONFIG_PCI_BRIDGE_EMUL)  += pci-bridge-emul.o
-obj-$(CONFIG_ACPI) += pci-acpi.o
 obj-$(CONFIG_PCI_LABEL)+= pci-label.o
 obj-$(CONFIG_X86_INTEL_MID)+= pci-mid.o
 obj-$(CONFIG_PCI_SYSCALL)  += syscall.o
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 14499757338f..de1804aeaf69 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -88,7 +88,14 @@ int acpi_pci_link_free_irq(acpi_handle handle);
 
 struct pci_bus;
 
+#ifdef CONFIG_PCI
 struct pci_dev *acpi_get_pci_dev(acpi_handle);
+#else
+static inline struct pci_dev *acpi_get_pci_dev(acpi_handle handle)
+{
+   return NULL;
+}
+#endif
 
 /* Arch-defined function to add a bus to the system */
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ed80f147bd50..eb1fdf4c196a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -340,7 +340,14 @@ struct pci_dev;
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
 bool acpi_isa_irq_available(int irq);
+#ifdef CONFIG_PCI
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
+#else
+static inline void acpi_penalize_sci_irq(int irq, int trigger,
+   int polarity)
+{
+}
+#endif
 void acpi_pci_irq_disable (struct pci_dev *dev);
 
 extern int ec_read(u8 addr, u8 *val);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 11c71c4ecf75..51a5a5217667 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1960,7 +1960,11 @@ int

[PATCH v10 4/6] ACPICA: Remove PCI bits from ACPICA when CONFIG_PCI is unset

2018-12-14 Thread Sinan Kaya

Now that we allow CONFIG_PCI to be unset, remove useless code from ACPICA
too.

Signed-off-by: Sinan Kaya 
---
 drivers/acpi/acpica/Makefile| 2 +-
 drivers/acpi/acpica/achware.h   | 9 +
 include/acpi/platform/aclinux.h | 4 
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index b14621da5413..59700433a96e 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -77,13 +77,13 @@ acpi-y +=   \
hwacpi.o\
hwesleep.o  \
hwgpe.o \
-   hwpci.o \
hwregs.o\
hwsleep.o   \
hwvalid.o   \
hwxface.o   \
hwxfsleep.o
 
+acpi-$(CONFIG_PCI) += hwpci.o
 acpi-$(ACPI_FUTURE_USAGE) += hwtimer.o
 
 acpi-y +=  \
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 43ce67a9da1f..ef99e2fc37f8 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -106,11 +106,20 @@ acpi_hw_enable_runtime_gpe_block(struct 
acpi_gpe_xrupt_info *gpe_xrupt_info,
 struct acpi_gpe_block_info *gpe_block,
 void *context);
 
+#ifdef ACPI_PCI_CONFIGURED
 /*
  * hwpci - PCI configuration support
  */
 acpi_status
 acpi_hw_derive_pci_id(struct acpi_pci_id *pci_id,
  acpi_handle root_pci_device, acpi_handle pci_region);
+#else
+static inline acpi_status
+acpi_hw_derive_pci_id(struct acpi_pci_id *pci_id, acpi_handle root_pci_device,
+ acpi_handle pci_region)
+{
+   return AE_SUPPORT;
+}
+#endif
 
 #endif /* __ACHWARE_H__ */
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 7451b3bca83a..e3d21d014fcc 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -33,6 +33,10 @@
 
 /* Kernel specific ACPICA configuration */
 
+#ifdef CONFIG_PCI
+#define ACPI_PCI_CONFIGURED
+#endif
+
 #ifdef CONFIG_ACPI_REDUCED_HARDWARE_ONLY
 #define ACPI_REDUCED_HARDWARE 1
 #endif
-- 
2.19.0

[PATCH v10 6/6] ACPI/IORT: Stub out ACS functions when CONFIG_PCI is not set

2018-12-14 Thread Sinan Kaya

Remove PCI dependent code out of iort.c when CONFIG_PCI is not defined.

Signed-off-by: Sinan Kaya 
---
 drivers/acpi/arm64/iort.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 70f4e80b9246..d0f68607efe6 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1437,6 +1437,7 @@ static int __init iort_add_platform_device(struct 
acpi_iort_node *node,
 
 static bool __init iort_enable_acs(struct acpi_iort_node *iort_node)
 {
+#ifdef CONFIG_PCI
if (iort_node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
struct acpi_iort_node *parent;
struct acpi_iort_id_mapping *map;
@@ -1462,6 +1463,7 @@ static bool __init iort_enable_acs(struct acpi_iort_node 
*iort_node)
}
}
}
+#endif
 
return false;
 }
-- 
2.19.0

[PATCH v10 5/6] arm64: select ACPI PCI code only both features are enabled

2018-12-14 Thread Sinan Kaya

ACPI and PCI are no longer coupled to each other. Specify requirements
for both when pulling in code.

Signed-off-by: Sinan Kaya 
---
 arch/arm64/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ea2ab0330e3a..bcb6262044d8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -5,7 +5,7 @@ config ARM64
select ACPI_GTDT if ACPI
select ACPI_IORT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
-   select ACPI_MCFG if ACPI
+   select ACPI_MCFG if (ACPI && PCI)
select ACPI_SPCR_TABLE if ACPI
select ACPI_PPTT if ACPI
select ARCH_CLOCKSOURCE_DATA
@@ -163,7 +163,7 @@ config ARM64
select OF
select OF_EARLY_FLATTREE
select OF_RESERVED_MEM
-   select PCI_ECAM if ACPI
+   select PCI_ECAM if (ACPI && PCI)
select POWER_RESET
select POWER_SUPPLY
select REFCOUNT_FULL
-- 
2.19.0

[PATCH v10 2/6] ACPI / OSL: Stub out acpi_os_(read/write)_pci_configurations()

2018-12-14 Thread Sinan Kaya

Getting ready to allow CONFIG_PCI to be unset with ACPI enabled. Stub out
acpi_os_read_pci_configuration and acpi_os_write_pci_configuration
functions when CONFIG_PCI is not defined.

Signed-off-by: Sinan Kaya 
---
 drivers/acpi/osl.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index b48874b8e1ea..524fd5f33ea4 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -773,6 +773,7 @@ acpi_status
 acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
   u64 *value, u32 width)
 {
+#ifdef CONFIG_PCI
int result, size;
u32 value32;
 
@@ -799,12 +800,19 @@ acpi_os_read_pci_configuration(struct acpi_pci_id * 
pci_id, u32 reg,
*value = value32;
 
return (result ? AE_ERROR : AE_OK);
+#else
+   int rc;
+
+   rc = pr_warn_once("PCI configuration space access is not supported\n");
+   return rc ? AE_SUPPORT : AE_OK;
+#endif
 }
 
 acpi_status
 acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id, u32 reg,
u64 value, u32 width)
 {
+#ifdef CONFIG_PCI
int result, size;
 
switch (width) {
@@ -826,6 +834,12 @@ acpi_os_write_pci_configuration(struct acpi_pci_id * 
pci_id, u32 reg,
reg, size, value);
 
return (result ? AE_ERROR : AE_OK);
+#else
+   int rc;
+
+   rc = pr_warn_once("PCI configuration space access is not supported\n");
+   return rc ? AE_SUPPORT : AE_OK;
+#endif
 }
 
 static void acpi_os_execute_deferred(struct work_struct *work)
-- 
2.19.0

[PATCH v10 1/6] ACPI: Allow CONFIG_PCI to be unset for reboot

2018-12-14 Thread Sinan Kaya

Make PCI reboot conditional on PCI support being present on the kernel
configuration.

Signed-off-by: Sinan Kaya 
---
 drivers/acpi/reboot.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c
index 6fa9c2a4cfe9..d75e637ee36a 100644
--- a/drivers/acpi/reboot.c
+++ b/drivers/acpi/reboot.c
@@ -7,8 +7,6 @@
 void acpi_reboot(void)
 {
struct acpi_generic_address *rr;
-   struct pci_bus *bus0;
-   unsigned int devfn;
u8 reset_value;
 
if (acpi_disabled)
@@ -33,6 +31,11 @@ void acpi_reboot(void)
 * on a device on bus 0. */
switch (rr->space_id) {
case ACPI_ADR_SPACE_PCI_CONFIG:
+   {
+#ifdef CONFIG_PCI
+   unsigned int devfn;
+   struct pci_bus *bus0;
+
/* The reset register can only live on bus 0. */
bus0 = pci_find_bus(0, 0);
if (!bus0)
@@ -44,8 +47,9 @@ void acpi_reboot(void)
/* Write the value that resets us. */
pci_bus_write_config_byte(bus0, devfn,
(rr->address & 0x), reset_value);
+#endif
break;
-
+   }
case ACPI_ADR_SPACE_SYSTEM_MEMORY:
case ACPI_ADR_SPACE_SYSTEM_IO:
printk(KERN_DEBUG "ACPI MEMORY or I/O RESET_REG.\n");
-- 
2.19.0

Re: [PATCH] kbuild, x86: revert macros in extended asm workarounds

2018-12-14 Thread Masahiro Yamada

Hi Peter,

On Thu, Dec 13, 2018 at 7:53 PM Peter Zijlstra  wrote:
>
> On Thu, Dec 13, 2018 at 06:17:41PM +0900, Masahiro Yamada wrote:
> > Revert the following commits:
> >
> > - 5bdcd510c2ac9efaf55c4cbd8d46421d8e2320cd
> >   ("x86/jump-labels: Macrofy inline assembly code to work around GCC 
> > inlining bugs")
> >
> > - d5a581d84ae6b8a4a740464b80d8d9cf1e7947b2
> >   ("x86/cpufeature: Macrofy inline assembly code to work around GCC 
> > inlining bugs")
> >
> > - 0474d5d9d2f7f3b11262f7bf87d0e7314ead9200.
> >   ("x86/extable: Macrofy inline assembly code to work around GCC inlining 
> > bugs")
> >
> > - 494b5168f2de009eb80f198f668da374295098dd.
> >   ("x86/paravirt: Work around GCC inlining bugs when compiling paravirt 
> > ops")
> >
> > - f81f8ad56fd1c7b99b2ed1c314527f7d9ac447c6.
> >   ("x86/bug: Macrofy the BUG table section handling, to work around GCC 
> > inlining bugs")
> >
> > - 77f48ec28e4ccff94d2e5f4260a83ac27a7f3099.
> >   ("x86/alternatives: Macrofy lock prefixes to work around GCC inlining 
> > bugs")
> >
> > - 9e1725b410594911cc5981b6c7b4cea4ec054ca8.
> >   ("x86/refcount: Work around GCC inlining bug")
> >   (Conflicts: arch/x86/include/asm/refcount.h)
> >
> > - c06c4d8090513f2974dfdbed2ac98634357ac475.
> >   ("x86/objtool: Use asm macros to work around GCC inlining bugs")
> >
> > - 77b0bf55bc675233d22cd5df97605d516d64525e.
> >   ("kbuild/Makefile: Prepare for using macros in inline assembly code to 
> > work around asm() related GCC inlining bugs")
> >
>
> I don't think we want to blindly revert all that. Some of them actually
> made sense and did clean up things irrespective of the asm-inline issue.
>
> In particular I like the jump-label one.

[1] The #error message is unnecessary.

[2] keep STATC_BRANCH_NOP/JMP instead of STATIC_JUMP_IF_TRUE/FALSE



In v2, I will make sure to not re-add [1].
I am not sure about [2].


Do you mean only [1],
or both of them?



> The cpufeature one OTOh, yeah,
> I'd love to get that reverted.
>
> And as a note; the normal commit quoting style is:
>
>   d5a581d84ae6 ("x86/cpufeature: Macrofy inline assembly code to work around 
> GCC inlining bugs")


OK. I will do so in v2.


--
Best Regards
Masahiro Yamada

Re: [PATCH 1/2] mm: introduce put_user_page*(), placeholder versions

2018-12-14 Thread John Hubbard

On 12/14/18 12:03 PM, Matthew Wilcox wrote:
> On Fri, Dec 14, 2018 at 11:53:31AM -0800, Dave Hansen wrote:
>> On 12/14/18 11:48 AM, Matthew Wilcox wrote:
>>> I think we can do better than a proxy object with bit 0 set.  I'd go
>>> for allocating something like this:
>>>
>>> struct dynamic_page {
>>> struct page;
>>> unsigned long vaddr;
>>> unsigned long pfn;
>>> ...
>>> };
>>>
>>> and use a bit in struct page to indicate that this is a dynamic page.
>>
>> That might be fun.  We'd just need a fast/static and slow/dynamic path
>> in page_to_pfn()/pfn_to_page().  We'd also need some kind of auxiliary
>> pfn-to-page structure since we could not fit that^ structure in vmemmap[].
> 
> Yes; working on the pfn-to-page structure right now as it happens ...
> in the meantime, an XArray for it probably wouldn't be _too_ bad.
> 

OK, this looks great. And as Dan pointed out, we get a nice side effect of
type safety for the gup/dma call site conversion. After doing partial 
conversions, the need for type safety (some of the callers really are 
complex) really seems worth the extra work, so that's a big benefit.

Next steps: I want to go try this dynamic_page approach out right away. 
If there are pieces such as page_to_pfn and related, that are already in
progress, I'd definitely like to work on top of that. Also, any up front
advice or pitfalls to avoid is always welcome, of course. :)

thanks,
-- 
John Hubbard
NVIDIA

Re: [RFC v2 14/20] iommu: introduce device fault data

2018-12-14 Thread Jacob Pan

On Wed, 12 Dec 2018 09:21:43 +0100
Auger Eric  wrote:

> Hi Jacob,
> 
> On 9/21/18 12:06 AM, Jacob Pan wrote:
> > On Tue, 18 Sep 2018 16:24:51 +0200
> > Eric Auger  wrote:
> >   
> >> From: Jacob Pan 
> >>
> >> Device faults detected by IOMMU can be reported outside IOMMU
> >> subsystem for further processing. This patch intends to provide
> >> a generic device fault data such that device drivers can be
> >> communicated with IOMMU faults without model specific knowledge.
> >>
> >> The proposed format is the result of discussion at:
> >> https://lkml.org/lkml/2017/11/10/291
> >> Part of the code is based on Jean-Philippe Brucker's patchset
> >> (https://patchwork.kernel.org/patch/9989315/).
> >>
> >> The assumption is that model specific IOMMU driver can filter and
> >> handle most of the internal faults if the cause is within IOMMU
> >> driver control. Therefore, the fault reasons can be reported are
> >> grouped and generalized based common specifications such as PCI
> >> ATS.
> >>
> >> Signed-off-by: Jacob Pan 
> >> Signed-off-by: Jean-Philippe Brucker
> >>  Signed-off-by: Liu, Yi L
> >>  Signed-off-by: Ashok Raj
> >>  Signed-off-by: Eric Auger
> >>  [moved part of the iommu_fault_event
> >> struct in the uapi, enriched the fault reasons to be able to map
> >> unrecoverable SMMUv3 errors]  
> > Sounds good to me.
> > There are also other "enrichment" we need to do to support mdev or
> > finer granularity fault reporting below physical device. e.g. PASID
> > level.
> > 
> > The current scheme works for PCIe physical device level, where each
> > device registers a single handler only once. When device fault is
> > detected by the IOMMU, it will find the matching handler and private
> > data to report back. However, for devices partitioned by PASID and
> > represented by mdev this may not work. Since IOMMU is not mdev aware
> > and only works at physical device level.
> > So I am thinking we should allow multiple registration of fault
> > handler with different data and ID. i.e.
> > 
> > int iommu_register_device_fault_handler(struct device *dev,
> > iommu_dev_fault_handler_t
> > handler, int id,
> > void *data)
> > 
> > where the new "id field" is
> >  * @id: Identification of the handler private data, will be used by
> > fault
> >  *  reporting code to match the handler data to be returned.
> > For page
> >  *  request, this can be the PASID. ID must be unique per
> > device, i.e.
> >  *  each ID can only be registered once per device.
> >  *  - IOMMU_DEV_FAULT_ID_UNRECOVERY (~0U) is reserved for fault
> > reporting
> >  *  w/o ID. e.g. unrecoverable faults.
> > 
> > I am still testing, but just wanted to have feedback on this idea.  
> 
> I am currently respinning this series. Do you have a respin for this
> patch including iommu_register_device_fault_handler with the @id param
> as you suggested above? Otherwise 2 solutions: I keep the code as is
> or I do the modification myself implementing a list of fault_params?
> 
you can keep the code as is if it fits your current needs. Yi and I
have thought of some new cases for supporting mdev. We are thinking to
support many to many handler vs PASID relationship. i.e. allow
registration of many fault handlers per device, each associated with an
ID and data. The use case is that a physical device may register a
fault handler for its own PASID or non-PASID related faults. Such
physical device can also be partitioned into sub-device, e.g. mdev, but
fault handler registration is at physical device level in that IOMMU is
not mdev aware.
Anyway, still need some work to flush out the details.
> Besides do you plans for "[PATCH v5 00/23] IOMMU and VT-d driver
> support for Shared Virtual Address (SVA)" respin - hope I didn't miss
> anything? - ?
> 
You did not miss anything. Yes, we are still working on some internal
integration issues. It should not affect the common interface much. Or
I can send out a common API spin first once we have the functionality
tested.

Thanks for checking.

> Thanks
> 
> Eric
> > 
> > Thanks,
> > 
> > Jacob
> > 
> >   
> >> ---
> >>  include/linux/iommu.h  | 55 -
> >>  include/uapi/linux/iommu.h | 83
> >> ++ 2 files changed, 136
> >> insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> >> index 9bd3e63d562b..7529c14ff506 100644
> >> --- a/include/linux/iommu.h
> >> +++ b/include/linux/iommu.h
> >> @@ -49,13 +49,17 @@ struct bus_type;
> >>  struct device;
> >>  struct iommu_domain;
> >>  struct notifier_block;
> >> +struct iommu_fault_event;
> >>  
> >>  /* iommu fault flags */
> >> -#define IOMMU_FAULT_READ  0x0
> >> -#define IOMMU_FAULT_WRITE 0x1
> >> +#define IOMMU_FAULT_READ  (1 << 0)
> >> +#define IOMMU_FAULT_WRITE (1 << 1)
> >> +#define IOMMU_FAULT_EXEC  (1 << 2)
> >> +#define IOMMU_FAULT_PRIV  (1 <<

Re: [PATCH v2 1/1] mm, memory_hotplug: Initialize struct pages for the full memory section

2018-12-14 Thread Wei Yang

On Fri, Dec 14, 2018 at 11:19:59AM +0100, Michal Hocko wrote:
>[Your From address seems to have a typo (linux.bm.com) - fixed]
>
>On Fri 14-12-18 10:33:55, Zaslonko Mikhail wrote:
>[...]
>> Yes, it might still trigger PF_POISONED_CHECK if the first page 
>> of the pageblock is left uninitialized (poisoned).
>> But in order to cover these exceptional cases we would need to 
>> adjust memory_hotplug sysfs handler functions with similar 
>> checks (as in the for loop of memmap_init_zone()). And I guess 
>> that is what we were trying to avoid (adding special cases to 
>> memory_hotplug paths).
>
>is_mem_section_removable should test pfn_valid_within at least.
>But that would require some care because next_active_pageblock expects
>aligned pages. Ble, this code is just horrible. I would just remove it
>altogether. I strongly suspect that nobody is using it for anything
>reasonable anyway. The only reliable way to check whether a block is
>removable is to remove it. Everything else is just racy.
>

Sounds reasonable.

The result return from removable sysfs is transient. If no user rely on
this, remove this is a better way.

>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me

[PATCH V1] mmc: tegra: HW Command Queue Support for Tegra SDMMC

2018-12-14 Thread Sowjanya Komatineni

This patch adds HW Command Queue for supported Tegra SDMMC
controllers.

Tegra SDHCI with Quirk SDHCI_QUIRK2_BROKEN_64_BIT_DMA disables the
use of 64_BIT DMA to disable 64-bit addressing mode access to the
system memory and sdhci_cqe_enable using flag SDHCI_USE_64_BIT_DMA
for ADMA32/ADMA2 Vs ADMA64/ADMA3 DMA selection.

CQE need to use ADMA3 as it need to fetch task descriptor along
with transfer descriptor, so this patch forces DMA Select to be
ADMA3 for CQE.

Note that this patch depends on below patches
[0] DMA Config prior to CQE
https://lkml.org/lkml/2018/12/14/1062
[1] SDMMC address range
https://lkml.org/lkml/2018/12/14/1323
[2] Fix sdhci_do_enable_v4_mode
https://lkml.org/lkml/2018/12/14/72

Signed-off-by: Sowjanya Komatineni 
---
 drivers/mmc/host/sdhci-tegra.c | 76 +-
 drivers/mmc/host/sdhci.c   | 18 +-
 drivers/mmc/host/sdhci.h   |  1 +
 3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 7b95d088fdef..eaebe5e22183 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -33,6 +33,7 @@
 #include 
 
 #include "sdhci-pltfm.h"
+#include "cqhci.h"
 
 /* Tegra SDHOST controller vendor register definitions */
 #define SDHCI_TEGRA_VENDOR_CLOCK_CTRL  0x100
@@ -89,6 +90,10 @@
 #define NVQUIRK_NEEDS_PAD_CONTROL  BIT(7)
 #define NVQUIRK_DIS_CARD_CLK_CONFIG_TAPBIT(8)
 
+/* SDMMC CQE Base Address for Tegra Host Ver 4.1 and Higher */
+#define SDHCI_TEGRA_CQE_BASE_ADDR  0xF000
+
+
 struct sdhci_tegra_soc_data {
const struct sdhci_pltfm_data *pdata;
u32 nvquirks;
@@ -128,6 +133,7 @@ struct sdhci_tegra {
u32 default_tap;
u32 default_trim;
u32 dqs_trim;
+   bool enable_hwcq;
 };
 
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -836,6 +842,17 @@ static void tegra_sdhci_voltage_switch(struct sdhci_host 
*host)
tegra_host->pad_calib_required = true;
 }
 
+static void sdhci_tegra_dumpregs(struct mmc_host *mmc)
+{
+   sdhci_dumpregs(mmc_priv(mmc));
+}
+
+static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = {
+   .enable = sdhci_cqe_enable,
+   .disable = sdhci_cqe_disable,
+   .dumpregs = sdhci_tegra_dumpregs,
+};
+
 static const struct sdhci_ops tegra_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
@@ -989,6 +1006,7 @@ static const struct sdhci_ops tegra186_sdhci_ops = {
.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
.voltage_switch = tegra_sdhci_voltage_switch,
.get_max_clock = tegra_sdhci_get_max_clock,
+   .irq = sdhci_cqhci_irq,
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra186_pdata = {
@@ -1030,6 +1048,55 @@ static const struct of_device_id sdhci_tegra_dt_match[] 
= {
 };
 MODULE_DEVICE_TABLE(of, sdhci_tegra_dt_match);
 
+static int sdhci_tegra_add_host(struct sdhci_host *host)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+   struct cqhci_host *cq_host;
+   bool dma64;
+   int ret;
+
+   if (!tegra_host->enable_hwcq)
+   return sdhci_add_host(host);
+
+   host->v4_mode = true;
+
+   ret = sdhci_setup_host(host);
+   if (ret)
+   return ret;
+
+   host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD;
+
+   cq_host = devm_kzalloc(host->mmc->parent,
+   sizeof(*cq_host), GFP_KERNEL);
+   if (!cq_host) {
+   ret = -ENOMEM;
+   goto cleanup;
+   }
+
+   cq_host->mmio = host->ioaddr + SDHCI_TEGRA_CQE_BASE_ADDR;
+   cq_host->ops = _tegra_cqhci_ops;
+
+   dma64 = host->flags & SDHCI_USE_64_BIT_DMA;
+   if (dma64)
+   cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
+
+   ret = cqhci_init(cq_host, host->mmc, dma64);
+   if (ret)
+   goto cleanup;
+
+   ret = __sdhci_add_host(host);
+   if (ret)
+   goto cleanup;
+
+   return 0;
+
+cleanup:
+   sdhci_cleanup_host(host);
+   return ret;
+
+}
+
 static int sdhci_tegra_probe(struct platform_device *pdev)
 {
const struct of_device_id *match;
@@ -1039,6 +1106,7 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
struct sdhci_tegra *tegra_host;
struct clk *clk;
int rc;
+   struct resource *iomem;
 
match = of_match_device(sdhci_tegra_dt_match, >dev);
if (!match)
@@ -1056,6 +1124,12 @@ static int sdhci_tegra_probe(struct platform_device 
*pdev)
tegra_host->pad_control_available = false;
tegra_host->soc_data = soc_data;
 
+   iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+   if (resource_size(iomem) > SDHCI_TEGRA_CQE_BASE_ADDR)
+

[RFC PATCH 02/10] CHROMIUM: wilco_ec: Add new driver for Wilco EC

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

This EC is an incompatible variant of the typical Chrome OS embedded
controller.  It uses the same low-level communication and a similar
protocol with some significant differences.  The EC firmware does
not support the same mailbox commands so it is not registered as a
cros_ec device type.

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Kconfig|  14 +-
 drivers/platform/chrome/Makefile   |   3 +
 drivers/platform/chrome/wilco_ec.h |  97 +
 drivers/platform/chrome/wilco_ec_mailbox.c | 395 +
 4 files changed, 508 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/wilco_ec.h
 create mode 100644 drivers/platform/chrome/wilco_ec_mailbox.c

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 16b1615958aa..4168d5e6bedc 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -49,6 +49,18 @@ config CHROMEOS_TBMC
  To compile this driver as a module, choose M here: the
  module will be called chromeos_tbmc.
 
+config WILCO_EC
+   tristate "ChromeOS Wilco Embedded Controller"
+   depends on ACPI && (X86 || COMPILE_TEST)
+   select CROS_EC_LPC_MEC
+   help
+ If you say Y here, you get support for talking to the Chrome OS
+ Wilco EC over an eSPI bus. This uses a simple byte-level protocol
+ with a checksum.
+
+ To compile this driver as a module, choose M here: the
+ module will be called wilco_ec.
+
 config CROS_EC_CTL
 tristate
 
@@ -86,7 +98,7 @@ config CROS_EC_LPC
 
 config CROS_EC_LPC_MEC
bool "ChromeOS Embedded Controller LPC Microchip EC (MEC) variant"
-   depends on CROS_EC_LPC
+   depends on CROS_EC_LPC || WILCO_EC
default n
help
  If you say Y here, a variant LPC protocol for the Microchip EC
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index cd591bf872bb..b132ba5b3e3d 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -12,4 +12,7 @@ cros_ec_lpcs-objs := cros_ec_lpc.o 
cros_ec_lpc_reg.o
 cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC) += cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)  += cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)+= cros_ec_proto.o
+
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)   += cros_kbd_led_backlight.o
+wilco_ec-objs  := wilco_ec_mailbox.o
+obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec.h 
b/drivers/platform/chrome/wilco_ec.h
new file mode 100644
index ..ba16fcff87c4
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * wilco_ec - Chrome OS Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef WILCO_EC_H
+#define WILCO_EC_H
+
+#include 
+#include 
+
+#define WILCO_EC_FLAG_NO_RESPONSE  BIT(0) /* EC does not respond */
+#define WILCO_EC_FLAG_EXTENDED_DATABIT(1) /* EC returns 256 data bytes */
+#define WILCO_EC_FLAG_RAW_REQUEST  BIT(2) /* Do not trim request data */
+#define WILCO_EC_FLAG_RAW_RESPONSE BIT(3) /* Do not trim response data */
+#define WILCO_EC_FLAG_RAW  (WILCO_EC_FLAG_RAW_REQUEST | \
+WILCO_EC_FLAG_RAW_RESPONSE)
+
+/**
+ * enum wilco_ec_msg_type - Message type to select a set of command codes.
+ * @WILCO_EC_MSG_LEGACY: Legacy EC messages for standard EC behavior.
+ * @WILCO_EC_MSG_PROPERTY: Get/Set/Sync EC controlled NVRAM property.
+ * @WILCO_EC_MSG_TELEMETRY: Telemetry data provided by the EC.
+ */
+enum wilco_ec_msg_type {
+   WILCO_EC_MSG_LEGACY = 0x00f0,
+   WILCO_EC_MSG_PROPERTY = 0x00f2,
+   WILCO_EC_MSG_TELEMETRY = 0x00f5,
+};
+
+/**
+ * struct wilco_ec_device - Wilco Embedded Controller handle.
+ * @dev: Device handle.
+ * @mailbox_lock: Mutex to ensure one mailbox command at a time.
+ * @io_command: I/O port for mailbox command.  Provided by ACPI.
+ * @io_data: I/O port for mailbox data.  Provided by ACPI.
+ * @io_packet: I/O port for mailbox packet data.  Provided by ACPI.
+ * @data_buffer: Buffer used for EC communication.  The same buffer
+ *   is used to hold the request and the response.
+ * @data_size: Size of the data buffer used for EC communication.
+ */
+struct wilco_ec_device {
+   struct device *dev;
+   struct

[RFC PATCH 06/10] CHROMIUM: wilco_ec: Add event handling

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

The Wilco Embedded Controller can return extended events that
are not handled by standard ACPI objects.  These events can
include hotkeys which map to standard functions like brightness
controls, or information about EC controlled features like the
charger or battery.

These events are triggered with an ACPI Notify(0x90) and the
event data buffer is read through an ACPI method provided by
the BIOS which reads the event buffer from EC RAM.

These events are then processed, with hotkey events being sent
to the input subsystem and other events put into a queue which
can be read by a userspace daemon via a sysfs attribute.

> evtest /dev/input/event6
Input driver version is 1.0.1
Input device ID: bus 0x19 vendor 0x0 product 0x0 version 0x0
Input device name: "Wilco EC hotkeys"
Supported events:
  Event type 0 (EV_SYN)
  Event type 1 (EV_KEY)
Event code 224 (KEY_BRIGHTNESSDOWN)
Event code 225 (KEY_BRIGHTNESSUP)
Event code 240 (KEY_UNKNOWN)
  Event type 4 (EV_MSC)
Event code 4 (MSC_SCAN)
Properties:
Testing ... (interrupt to exit)
Event: type 4 (EV_MSC), code 4 (MSC_SCAN), value 57
Event: type 1 (EV_KEY), code 224 (KEY_BRIGHTNESSDOWN), value 1
Event: -- SYN_REPORT 
Event: type 1 (EV_KEY), code 224 (KEY_BRIGHTNESSDOWN), value 0
Event: -- SYN_REPORT 
Event: type 4 (EV_MSC), code 4 (MSC_SCAN), value 58
Event: type 1 (EV_KEY), code 225 (KEY_BRIGHTNESSUP), value 1
Event: -- SYN_REPORT 
Event: type 1 (EV_KEY), code 225 (KEY_BRIGHTNESSUP), value 0
Event: -- SYN_REPORT 

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile   |   4 +-
 drivers/platform/chrome/wilco_ec.h |  34 ++
 drivers/platform/chrome/wilco_ec_event.c   | 343 +
 drivers/platform/chrome/wilco_ec_mailbox.c |  11 +
 4 files changed, 390 insertions(+), 2 deletions(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_event.c

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 5ca484c2d0d7..60b19190dba1 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -13,6 +13,6 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)+= 
cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)  += cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)+= cros_ec_proto.o
 
-wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_rtc.o \
-  wilco_ec_sysfs.o
+wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_event.o \
+  wilco_ec_rtc.o wilco_ec_sysfs.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec.h 
b/drivers/platform/chrome/wilco_ec.h
index eee5c514e720..6f084188faa1 100644
--- a/drivers/platform/chrome/wilco_ec.h
+++ b/drivers/platform/chrome/wilco_ec.h
@@ -18,7 +18,9 @@
 #define WILCO_EC_H
 
 #include 
+#include 
 #include 
+#include 
 #include 
 
 /* Normal commands have a maximum 32 bytes of data */
@@ -46,6 +48,22 @@ enum wilco_ec_msg_type {
WILCO_EC_MSG_TELEMETRY = 0x00f5,
 };
 
+/**
+ * struct wilco_ec_event - EC extended events.
+ * @lock: Mutex to guard the list of events.
+ * @list: Queue of EC events to be provided to userspace.
+ * @attr: Sysfs attribute for userspace to read events.
+ * @count: Count of events in the queue.
+ * @input: Input device for hotkey events.
+ */
+struct wilco_ec_event {
+   struct mutex lock;
+   struct list_head list;
+   struct bin_attribute attr;
+   size_t count;
+   struct input_dev *input;
+};
+
 /**
  * struct wilco_ec_device - Wilco Embedded Controller handle.
  * @dev: Device handle.
@@ -57,6 +75,7 @@ enum wilco_ec_msg_type {
  *   is used to hold the request and the response.
  * @data_size: Size of the data buffer used for EC communication.
  * @rtc: RTC device handler.
+ * @event: EC extended event handler.
  */
 struct wilco_ec_device {
struct device *dev;
@@ -67,6 +86,7 @@ struct wilco_ec_device {
void *data_buffer;
size_t data_size;
struct rtc_device *rtc;
+   struct wilco_ec_event event;
 };
 
 /**
@@ -143,4 +163,18 @@ int wilco_ec_rtc_write(struct device *dev, struct rtc_time 
*tm);
  */
 int wilco_ec_rtc_sync(struct device *dev);
 
+/**
+ * wilco_ec_event_init() - Prepare to handle EC events.
+ * @ec: EC device.
+ *
+ * Return: 0 for success or negative error code on failure.
+ */
+int wilco_ec_event_init(struct wilco_ec_device *ec);
+
+/**
+ * wilco_ec_event_remove() - Remove EC event handler.
+ * @ec: EC device.
+ */
+void wilco_ec_event_remove(struct wilco_ec_device *ec);
+
 #endif /* WILCO_EC_H */
diff --git a/drivers/platform/chrome/wilco_ec_event.c 
b/drivers/platform/chrome/wilco_ec_event.c
new file mode 100644
index ..248f2ec095ab
--- /dev/null
+++

[RFC PATCH 08/10] CHROMIUM: wilco_ec: Add EC properties

2018-12-14 Thread Nick Crews

A Property is typically a data item that is stored to NVRAM.
Each of these data items has an index associated with it
known as the Property ID (PID). The Property ID is
used by the system BIOS (and EC) to refer to the Property.
Properties may have variable lengths. Many features are
implemented primarily by EC Firmware with system BIOS
just supporting user configuration via BIOS SETUP and/or
SMBIOS changes. In order to implement many of these types of
features the user configuration information is saved to and
retrieved from the EC. The EC stores this configuration
information to NVRAM and then can use it while the system
BIOS is not running or during early boot. Although this
is a typical scenario there may be other reasons to store
information in the EC NVRAM instead of the System NVRAM.
Most of the property services do not have a valid failure
condition, so this field can be ignored. For items that
are write once, a failure is returned when a second
write is attempted.

Add a get and set interface for EC properties.
properties live within the "properties" directory.
Most of the added properties are boolean, but this also
provides the interface for non-boolean properties,
which will be used late for scheduling power routines.

The wilco_ec_sysfs_util.h stuff will be used for
future attributes as well.

> cd /sys/bus/platform/devices/GOOG000C\:00/
> echo 0 > properties/global_mic_mute_led
[mic mute led on keyboard turns off]
> cat
0
> echo 1 > properties/global_mic_mute_led
[mic mute led on keyboard turns on]
> cat properties/global_mic_mute_led
1
> cat properties/wireless_sw_wlan
cat: wireless_sw_wlan: Permission denied
[Good, that is supposed to be write-only]
> echo 0 > properties/wireless_sw_wlan

Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile  |   1 +
 drivers/platform/chrome/wilco_ec_properties.c | 327 ++
 drivers/platform/chrome/wilco_ec_properties.h | 163 +
 drivers/platform/chrome/wilco_ec_sysfs.c  |  66 +++-
 drivers/platform/chrome/wilco_ec_sysfs_util.h |  47 +++
 5 files changed, 598 insertions(+), 6 deletions(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_properties.c
 create mode 100644 drivers/platform/chrome/wilco_ec_properties.h
 create mode 100644 drivers/platform/chrome/wilco_ec_sysfs_util.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 56c39de8e5f5..eefb75e5e69c 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -15,5 +15,6 @@ obj-$(CONFIG_CROS_EC_PROTO)   += cros_ec_proto.o
 
 wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_event.o \
   wilco_ec_rtc.o wilco_ec_legacy.o \
+  wilco_ec_properties.o \
   wilco_ec_sysfs.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec_properties.c 
b/drivers/platform/chrome/wilco_ec_properties.c
new file mode 100644
index ..7131bd79aa61
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_properties.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_properties - set/get properties of Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include "wilco_ec_properties.h"
+#include "wilco_ec.h"
+#include "wilco_ec_sysfs_util.h"
+
+/* Payload length for get/set properties */
+#define PROPERTY_DATA_MAX_LENGTH 4
+
+struct ec_property_get_request {
+   u32 property_id;
+   u8 length;
+} __packed;
+
+struct ec_property_set_request {
+   u32 property_id;
+   u8 length;
+   u8 data[PROPERTY_DATA_MAX_LENGTH];
+} __packed;
+
+struct ec_property_response {
+   u8 status;
+   u8 sub_function;
+   u32 property_id;
+   u8 length;
+   u8 data[PROPERTY_DATA_MAX_LENGTH];
+} __packed;
+
+/* Store a 32 bit property ID into an array or a field in a struct, LSB first 
*/
+static inline void fill_property_id(u32 property_id, u8 field[])
+{
+   field[0] =  property_id& 0xff;
+   field[1] = (property_id >> 8)  & 0xff;
+   field[2] = (property_id >> 16) & 0xff;
+   field[3] = (property_id >> 24) & 0xff;
+}
+
+/* Extract 32 bit property ID from an array or a field in a struct, LSB first 
*/
+static inline u32 extract_property_id(u8 field[])
+{
+   return (uint32_t)field[0]   |
+  (uint32_t)field[1] << 8  |
+  (uint32_t)field[2]

[RFC PATCH 10/10] CHROMIUM: wilco_ec: Add binary telemetry attributes

2018-12-14 Thread Nick Crews

The Wilco Embedded Controller is able to send telemetry data
which is useful for enterprise applications. A daemon running on
the OS sends a command (possibly with args) to the EC via
this sysfs interface, and the EC responds over the sysfs interface
with the response. Both the request and the response are in an opaque
binary format so that information which is proprietary to the
enterprise service provider is secure.

At this point, the Wilco EC does not implement this telemetry
functionality, so any request using the WILCO_EC_MSG_TELEMETRY
command returns an error. This is just an initial change for
comments, until the EC code is implemented.

Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile |  3 +-
 drivers/platform/chrome/wilco_ec_sysfs.c | 17 -
 drivers/platform/chrome/wilco_ec_telemetry.c | 66 
 drivers/platform/chrome/wilco_ec_telemetry.h | 42 +
 4 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_telemetry.c
 create mode 100644 drivers/platform/chrome/wilco_ec_telemetry.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index c9d3d44098f9..62444887ce18 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -17,5 +17,6 @@ wilco_ec-objs := wilco_ec_mailbox.o 
wilco_ec_event.o \
   wilco_ec_rtc.o wilco_ec_legacy.o \
   wilco_ec_sysfs.o \
   wilco_ec_properties.o \
-  wilco_ec_adv_power.o
+  wilco_ec_adv_power.o  \
+  wilco_ec_telemetry.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec_sysfs.c 
b/drivers/platform/chrome/wilco_ec_sysfs.c
index 81dded951058..0019d17c0cf9 100644
--- a/drivers/platform/chrome/wilco_ec_sysfs.c
+++ b/drivers/platform/chrome/wilco_ec_sysfs.c
@@ -19,6 +19,7 @@
 #include "wilco_ec_legacy.h"
 #include "wilco_ec_properties.h"
 #include "wilco_ec_adv_power.h"
+#include "wilco_ec_telemetry.h"
 
 #define WILCO_EC_ATTR_RO(_name)
\
 __ATTR(_name, 0444, wilco_ec_##_name##_show, NULL)
@@ -46,7 +47,21 @@ static struct attribute *wilco_ec_toplevel_attrs[] = {
NULL
 };
 
-ATTRIBUTE_GROUPS(wilco_ec_toplevel);
+static struct bin_attribute telem_attr = TELEMETRY_BIN_ATTR(telemetry);
+static struct bin_attribute *telem_attrs[] = {
+   _attr,
+   NULL
+};
+
+static const struct attribute_group wilco_ec_toplevel_group = {
+   .attrs = wilco_ec_toplevel_attrs,
+   .bin_attrs = telem_attrs,
+};
+
+static const struct attribute_group *wilco_ec_toplevel_groups[] = {
+   _ec_toplevel_group,
+   NULL,
+};
 
 /* Make property attributes, which will live inside GOOG000C:00/properties/  */
 
diff --git a/drivers/platform/chrome/wilco_ec_telemetry.c 
b/drivers/platform/chrome/wilco_ec_telemetry.c
new file mode 100644
index ..5b8168754b43
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_telemetry.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_legacy - Telemetry sysfs attributes for Wilco EC
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "wilco_ec.h"
+#include "wilco_ec_sysfs_util.h"
+
+/* Data buffer for holding EC's response for telemtry data */
+static u8 telemetry_data[EC_MAILBOX_DATA_SIZE_EXTENDED];
+
+ssize_t wilco_ec_telem_write(struct file *filp, struct kobject *kobj,
+struct bin_attribute *attr, char *buf, loff_t loff,
+size_t count)
+{
+   struct wilco_ec_message msg;
+   int ret;
+   struct device *dev = device_from_kobject(kobj);
+   struct wilco_ec_device *ec = dev_get_drvdata(dev);
+
+   if (count < 1 || count > EC_MAILBOX_DATA_SIZE_EXTENDED)
+   return -EINVAL;
+
+   /* Clear response data buffer */
+   memset(telemetry_data, 0, EC_MAILBOX_DATA_SIZE_EXTENDED);
+
+   msg.type = WILCO_EC_MSG_TELEMETRY;
+   msg.flags = WILCO_EC_FLAG_RAW | WILCO_EC_FLAG_EXTENDED_DATA;
+   msg.command = buf[0];
+   msg.request_data = buf + 1;
+   msg.request_size = EC_MAILBOX_DATA_SIZE;
+   msg.response_data = _data;
+   msg.response_size =

Re: [rfd] saving old mice -- button glitching/debouncing

2018-12-14 Thread Dmitry Torokhov

Hi Pavel,

On Fri, Dec 14, 2018 at 3:24 PM Pavel Machek  wrote:
>
>
> I believe I have hardware problem, but I'm kind of hoping software
> could help me...?
>
> Mouse wheel on my machine started glitching on my machine, generating
> double-clicks when I click it once. Which unfortunately is quite
> annoying: texts are pasted twice, two tabs are closed instead of one,
> 
>
> Event: time 1544733054.903129, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
> Event: time 1544733054.903129, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 1
> Event: time 1544733054.903129, -- EV_SYN 
> 1544733054.967251, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
> Event: time 1544733054.967251, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 0
> Event: time 1544733054.967251, -- EV_SYN 
> Event: time 1544733054.975144, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
> Event: time 1544733054.975144, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 1
> Event: time 1544733054.975144, -- EV_SYN 
>  : time 1544733065.127190, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
> Event: time 1544733065.127190, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 0
> Event: time 1544733065.127190, -- EV_SYN 
>
> Now, I could just buy a new mouse, but it seems that most optical mice
> die like this... so maybe it would be nice to have an option to
> debounce the buttons, so that the useful life of mice is extended a
> bit?
>
> (So... I have two mice with that fault -- cheap to replace, but button
> in thinkpad X220 started doing that, too. That one will not be so
> cheap to fix :-( ).
>
> It is possible that some X versions already do something like this.
>
> Patch is obviously not ready; but:
>
> a) would it be useful to people
>
> b) would it be acceptable if done properly? (cmd line option to
> enable, avoiding duplicate/wrong events?)

I'd say if you are attached to failing hardware, solve it in
userspace. Have a utility/daemon that you run (from udev?) that:

- "grabs" input device with EVIOCGRAB
- does the debouncing/filtering/adjusting for the dirty sensor
- reinject events back into kernel with /dev/uinput

It will add some latency, but should be workable.

Thanks.

-- 
Dmitry

[RFC PATCH 07/10] CHROMIUM: wilco_ec: Move legacy attributes to separate file

2018-12-14 Thread Nick Crews

Legacy attributes are EC properties that are non-chromebook specific,
ones which existed before the EC was modified for Chromebooks (as I
understand it at least). This adds no new behavior, but just refactors
the existing legacy attributes so adding more attributes in the future
will work in an elegant way.

wilco_sysfs.c should just contain the information you need to understand
how to use the sysfs interface. It will only specify the names of the
attributes and their location within the directory structure.
The API and the implementation for all the attributes will be located
in individual external files, so if you want to see how to use an
attribute, you can look that that specific file. This should help
with the problem of this sysfs file becoming way too big to understand.

With this new structure, it should be easy to add new attributes by
making new implementation files, #include-ing them in wilco_ec_sysfs.c,
and then defining where to actually place the attributes in
wilco_ec_sysfs.c

Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile  |   3 +-
 drivers/platform/chrome/wilco_ec_legacy.c | 204 ++
 drivers/platform/chrome/wilco_ec_legacy.h |  96 +
 drivers/platform/chrome/wilco_ec_sysfs.c  | 248 --
 4 files changed, 343 insertions(+), 208 deletions(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_legacy.c
 create mode 100644 drivers/platform/chrome/wilco_ec_legacy.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 60b19190dba1..56c39de8e5f5 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -14,5 +14,6 @@ obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)+= cros_ec_proto.o
 
 wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_event.o \
-  wilco_ec_rtc.o wilco_ec_sysfs.o
+  wilco_ec_rtc.o wilco_ec_legacy.o \
+  wilco_ec_sysfs.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec_legacy.c 
b/drivers/platform/chrome/wilco_ec_legacy.c
new file mode 100644
index ..6110117af024
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_legacy.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_legacy - Legacy sysfs attributes for Wilco EC
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+
+#include "wilco_ec.h"
+#include "wilco_ec_legacy.h"
+
+#ifdef CONFIG_WILCO_EC_SYSFS_RAW
+
+/* Raw data buffer, large enough to hold extended responses */
+static size_t raw_response_size;
+static u8 raw_response_data[EC_MAILBOX_DATA_SIZE_EXTENDED];
+
+ssize_t wilco_ec_raw_store(struct device *dev, struct device_attribute *attr,
+  const char *buf, size_t count)
+{
+   struct wilco_ec_device *ec = dev_get_drvdata(dev);
+   struct wilco_ec_message msg;
+   u8 raw_request_data[EC_MAILBOX_DATA_SIZE];
+   int in_offset = 0;
+   int out_offset = 0;
+   int ret;
+
+   while (in_offset < count) {
+   char word_buf[EC_MAILBOX_DATA_SIZE];
+   u8 byte;
+   int start_offset = in_offset;
+   int end_offset;
+
+   /* Find the start of the byte */
+   while (buf[start_offset] && isspace(buf[start_offset]))
+   start_offset++;
+   if (!buf[start_offset])
+   break;
+
+   /* Find the start of the next byte, if any */
+   end_offset = start_offset;
+   while (buf[end_offset] && !isspace(buf[end_offset]))
+   end_offset++;
+   if (start_offset > count || end_offset > count)
+   break;
+   if (start_offset > EC_MAILBOX_DATA_SIZE ||
+   end_offset > EC_MAILBOX_DATA_SIZE)
+   break;
+
+   /* Copy to a new NULL terminated string */
+   memcpy(word_buf, buf + start_offset, end_offset - start_offset);
+   word_buf[end_offset - start_offset] = '\0';
+
+   /* Convert from hex string */
+   ret = kstrtou8(word_buf, 16, );
+   if (ret)
+   break;
+
+   /* Fill this byte into the request buffer */
+   raw_request_data[out_offset++] = byte;
+

[RFC PATCH 05/10] CHROMIUM: wilco_ec: Add RTC class driver

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

This Embedded Controller has an internal RTC that is exposed
as a standard RTC class driver with read/write functionality.

> hwclock --show --rtc /dev/rtc1
2007-12-31 16:01:20.460959-08:00
> hwclock --systohc --rtc /dev/rtc1
> hwclock --show --rtc /dev/rtc1
2018-11-29 17:08:00.780793-08:00

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile   |   3 +-
 drivers/platform/chrome/wilco_ec.h |  29 
 drivers/platform/chrome/wilco_ec_mailbox.c |  15 ++
 drivers/platform/chrome/wilco_ec_rtc.c | 163 +
 4 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_rtc.c

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index e8603bc5b095..5ca484c2d0d7 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -13,5 +13,6 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)+= 
cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)  += cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)+= cros_ec_proto.o
 
-wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_sysfs.o
+wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_rtc.o \
+  wilco_ec_sysfs.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec.h 
b/drivers/platform/chrome/wilco_ec.h
index 0b3dec4e2830..eee5c514e720 100644
--- a/drivers/platform/chrome/wilco_ec.h
+++ b/drivers/platform/chrome/wilco_ec.h
@@ -19,6 +19,7 @@
 
 #include 
 #include 
+#include 
 
 /* Normal commands have a maximum 32 bytes of data */
 #define EC_MAILBOX_DATA_SIZE   32
@@ -55,6 +56,7 @@ enum wilco_ec_msg_type {
  * @data_buffer: Buffer used for EC communication.  The same buffer
  *   is used to hold the request and the response.
  * @data_size: Size of the data buffer used for EC communication.
+ * @rtc: RTC device handler.
  */
 struct wilco_ec_device {
struct device *dev;
@@ -64,6 +66,7 @@ struct wilco_ec_device {
struct resource *io_packet;
void *data_buffer;
size_t data_size;
+   struct rtc_device *rtc;
 };
 
 /**
@@ -114,4 +117,30 @@ int wilco_ec_sysfs_init(struct wilco_ec_device *ec);
  */
 void wilco_ec_sysfs_remove(struct wilco_ec_device *ec);
 
+/**
+ * wilco_ec_rtc_read() - Fill RTC time structure with values from the EC.
+ * @dev: EC device.
+ * @tm: Returns RTC time from EC.
+ *
+ * Return: 0 for success or negative error code on failure.
+ */
+int wilco_ec_rtc_read(struct device *dev, struct rtc_time *tm);
+
+/**
+ * wilco_ec_rtc_write() - Write EC time/date from RTC time structure.
+ * @dev: EC device.
+ * @tm: RTC time to write to EC.
+ *
+ * Return: 0 for success or negative error code on failure.
+ */
+int wilco_ec_rtc_write(struct device *dev, struct rtc_time *tm);
+
+/**
+ * wilco_ec_rtc_sync() - Write EC time/date from system time.
+ * @dev: EC device.
+ *
+ * Return: 0 for success or negative error code on failure.
+ */
+int wilco_ec_rtc_sync(struct device *dev);
+
 #endif /* WILCO_EC_H */
diff --git a/drivers/platform/chrome/wilco_ec_mailbox.c 
b/drivers/platform/chrome/wilco_ec_mailbox.c
index 1cb34b7280fd..2f093a281a30 100644
--- a/drivers/platform/chrome/wilco_ec_mailbox.c
+++ b/drivers/platform/chrome/wilco_ec_mailbox.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "wilco_ec.h"
 
 /* Version of mailbox interface */
@@ -325,6 +326,11 @@ static struct resource *wilco_get_resource(struct 
platform_device *pdev,
return res;
 }
 
+static const struct rtc_class_ops wilco_ec_rtc_ops = {
+   .read_time = wilco_ec_rtc_read,
+   .set_time = wilco_ec_rtc_write,
+};
+
 static int wilco_ec_probe(struct platform_device *pdev)
 {
struct device *dev = >dev;
@@ -355,6 +361,15 @@ static int wilco_ec_probe(struct platform_device *pdev)
cros_ec_lpc_mec_init(ec->io_packet->start,
 ec->io_packet->start + EC_MAILBOX_DATA_SIZE);
 
+   /* Install RTC driver */
+   ec->rtc = devm_rtc_device_register(ec->dev, "wilco_ec",
+  _ec_rtc_ops, THIS_MODULE);
+   if (IS_ERR(ec->rtc)) {
+   dev_err(dev, "Failed to install RTC driver\n");
+   cros_ec_lpc_mec_destroy();
+   return PTR_ERR(ec->rtc);
+   }
+
/* Create sysfs attributes for userspace interaction */
if (wilco_ec_sysfs_init(ec) < 0) {
dev_err(dev, "Failed to create sysfs attributes\n");
diff --git a/drivers/platform/chrome/wilco_ec_rtc.c 
b/drivers/platform/chrome/wilco_ec_rtc.c
new file mode 100644
index ..3e36403d0cb8
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_rtc.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_rtc - RTC interface for Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+

[RFC PATCH 09/10] CHROMIUM: wilco_ec: Add peakshift and adv_batt_charging

2018-12-14 Thread Nick Crews

Create "peakshift" and "advanced_battery_charging" directories
within the "properties" directory, and create the relevant
attributes within these. These properties have to do with
configuring some of the advanced power management options that
prolong battery health and reduce energy use at peak hours
of the day.

Scheduling events uses a 24 hour clock, and only supports time
intervals of 15 minutes. For example, to set
advanced_battery_charging to start at 4:15pm and to last for
6 hours and 45 minutes, you would use the argument "16 15 6 45".

> cd /sys/bus/platform/devices/GOOG000C\:00
> cat properties/peakshift/peakshift_battery_threshold
> 015
[means 15 percent]
> cat properties/peakshift/peakshift_monday
16 00 20 30 00 00
[starts at 4:00 pm, ends at 8:30, charging resumes at midnight]
> echo "16 00 20 31 00 00" > properties/peakshift/peakshift_monday
-bash: echo: write error: Invalid argument
> dmesg | tail -n1
[40.34534] wilco_ec GOOG00C:00: minutes must be at the quarter hour
> echo "16 0 20 45 0 0" > properties/peakshift/peakshift_monday
> cat properties/peakshift/peakshift_monday
16 00 20 45 00 00

Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile |   3 +-
 drivers/platform/chrome/wilco_ec_adv_power.c | 533 +++
 drivers/platform/chrome/wilco_ec_adv_power.h | 193 +++
 drivers/platform/chrome/wilco_ec_sysfs.c | 103 
 4 files changed, 831 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_adv_power.c
 create mode 100644 drivers/platform/chrome/wilco_ec_adv_power.h

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index eefb75e5e69c..c9d3d44098f9 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_CROS_EC_PROTO)   += cros_ec_proto.o
 
 wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_event.o \
   wilco_ec_rtc.o wilco_ec_legacy.o \
+  wilco_ec_sysfs.o \
   wilco_ec_properties.o \
-  wilco_ec_sysfs.o
+  wilco_ec_adv_power.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec_adv_power.c 
b/drivers/platform/chrome/wilco_ec_adv_power.c
new file mode 100644
index ..30120e2dd007
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_adv_power.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_adv_power - peakshift and adv_batt_charging config of Wilco EC
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include "wilco_ec_adv_power.h"
+#include "wilco_ec_properties.h"
+#include "wilco_ec_sysfs_util.h"
+#include "wilco_ec.h"
+
+struct adv_batt_charging_data {
+   int duration_hours;
+   int duration_minutes;
+   int start_hours;
+   int start_minutes;
+};
+
+struct peakshift_data {
+   int start_hours;
+   int start_minutes;
+   int end_hours;
+   int end_minutes;
+   int charge_start_hours;
+   int charge_start_minutes;
+};
+
+/**
+ * struct time_bcd_format - spec for binary coded decimal time format
+ * @hour_position: how many bits left within the byte is the hour
+ * @minute_position: how many bits left within the byte is the minute
+ *
+ * Date and hour information is passed to/from the EC using packed bytes,
+ * where each byte represents an hour and a minute that some event occurs.
+ * The minute field always happens at quarter-hour intervals, so either
+ * 0, 15, 20, or 45. This allows this info to be packed within 2 bits.
+ * Along with the 5 bits of hour info [0-23], this gives us 7 used bits
+ * within each packed byte. The annoying thing is that the PEAKSHIFT and
+ * ADVANCED_BATTERY_CHARGING properties pack these 7 bits differently,
+ * hence this struct.
+ */
+struct time_bcd_format {
+   u8 hour_position;
+   u8 minute_position;
+};
+
+const struct time_bcd_format PEAKSHIFT_BCD_FORMAT = {
+// bit[0] is unused
+   .hour_position = 1,  // bits[1:7]
+   .minute_position = 6 // bits[6:8]
+};
+
+const struct time_bcd_format ADV_BATT_CHARGING_BCD_FORMAT = {
+   .minute_position = 0, // bits[0:2]
+   .hour_position = 2// bits[2:7]
+ // bit[7] is unused
+};
+
+/**
+ * struct peakshift_payload - The formatted peakshift time

[RFC PATCH 03/10] CHROMIUM: wilco_ec: Add sysfs attributes

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

Add some sample sysfs attributes for the Wilco EC that show how
the mailbox interface works.

> cat /sys/bus/platform/devices/GOOG000C\:00/version
Label: 99.99.99
SVN Revision : 738ed.99
Model Number : 08;8
Build Date   : 08/30/18

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Makefile   |   3 +-
 drivers/platform/chrome/wilco_ec.h |  14 +++
 drivers/platform/chrome/wilco_ec_mailbox.c |  12 ++
 drivers/platform/chrome/wilco_ec_sysfs.c   | 121 +
 4 files changed, 148 insertions(+), 2 deletions(-)
 create mode 100644 drivers/platform/chrome/wilco_ec_sysfs.c

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index b132ba5b3e3d..e8603bc5b095 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -13,6 +13,5 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)+= 
cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)  += cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)+= cros_ec_proto.o
 
-obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)   += cros_kbd_led_backlight.o
-wilco_ec-objs  := wilco_ec_mailbox.o
+wilco_ec-objs  := wilco_ec_mailbox.o wilco_ec_sysfs.o
 obj-$(CONFIG_WILCO_EC) += wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec.h 
b/drivers/platform/chrome/wilco_ec.h
index ba16fcff87c4..699f4cf744dc 100644
--- a/drivers/platform/chrome/wilco_ec.h
+++ b/drivers/platform/chrome/wilco_ec.h
@@ -94,4 +94,18 @@ struct wilco_ec_message {
  */
 int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg);
 
+/**
+ * wilco_ec_sysfs_init() - Create sysfs attributes.
+ * @ec: EC device.
+ *
+ * Return: 0 for success or negative error code on failure.
+ */
+int wilco_ec_sysfs_init(struct wilco_ec_device *ec);
+
+/**
+ * wilco_ec_sysfs_remove() - Remove sysfs attributes.
+ * @ec: EC device.
+ */
+void wilco_ec_sysfs_remove(struct wilco_ec_device *ec);
+
 #endif /* WILCO_EC_H */
diff --git a/drivers/platform/chrome/wilco_ec_mailbox.c 
b/drivers/platform/chrome/wilco_ec_mailbox.c
index 6613c18c2a82..414ea0a8ad03 100644
--- a/drivers/platform/chrome/wilco_ec_mailbox.c
+++ b/drivers/platform/chrome/wilco_ec_mailbox.c
@@ -361,11 +361,23 @@ static int wilco_ec_probe(struct platform_device *pdev)
cros_ec_lpc_mec_init(ec->io_packet->start,
 ec->io_packet->start + EC_MAILBOX_DATA_SIZE);
 
+   /* Create sysfs attributes for userspace interaction */
+   if (wilco_ec_sysfs_init(ec) < 0) {
+   dev_err(dev, "Failed to create sysfs attributes\n");
+   cros_ec_lpc_mec_destroy();
+   return -ENODEV;
+   }
+
return 0;
 }
 
 static int wilco_ec_remove(struct platform_device *pdev)
 {
+   struct wilco_ec_device *ec = platform_get_drvdata(pdev);
+
+   /* Remove sysfs attributes */
+   wilco_ec_sysfs_remove(ec);
+
/* Teardown cros_ec interface */
cros_ec_lpc_mec_destroy();
 
diff --git a/drivers/platform/chrome/wilco_ec_sysfs.c 
b/drivers/platform/chrome/wilco_ec_sysfs.c
new file mode 100644
index ..f9ae6cef6169
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec_sysfs.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * wilco_ec_sysfs - Sysfs attributes for Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include "wilco_ec.h"
+
+#define EC_COMMAND_EC_INFO 0x38
+#define EC_INFO_SIZE9
+#define EC_COMMAND_STEALTH_MODE0xfc
+
+struct ec_info {
+   u8 index;
+   const char *label;
+};
+
+static ssize_t wilco_ec_show_info(struct wilco_ec_device *ec, char *buf,
+ ssize_t count, struct ec_info *info)
+{
+   char result[EC_INFO_SIZE];
+   struct wilco_ec_message msg = {
+   .type = WILCO_EC_MSG_LEGACY,
+   .command = EC_COMMAND_EC_INFO,
+   .request_data = >index,
+   .request_size = sizeof(info->index),
+   .response_data = result,
+   .response_size = EC_INFO_SIZE,
+   };
+   int ret;
+
+   ret = wilco_ec_mailbox(ec, );
+   if (ret != EC_INFO_SIZE)
+   return scnprintf(buf + count, PAGE_SIZE - count,
+"%-12s : ERROR %d\n", info->label, ret);
+
+   return scnprintf(buf + count, PAGE_SIZE - count,
+

[RFC PATCH 00/10] cros_ec: Add support for Wilco EC

2018-12-14 Thread Nick Crews

The Chromebook named wilco contains a different Embedded Controller
than the rest of the chromebook series, and thus the kernel requires
a different driver than the already existing and generalized
cros_ec_* drivers. Specifically, this driver adds support for getting
and setting the RTC on the EC, adding a binary sysfs attribute
that receives ACPI events from the EC, adding a binary sysfs
attribute to request telemetry data from the EC (useful for enterprise
applications), and adding normal sysfs attributes to get/set various
other properties on the EC. The core of the communication with the EC
is implemented in wilco_ec_mailbox.c, using a simple byte-level protocol
with a checksum, transmitted over an eSPI bus. For debugging purposes,
a raw attribute is also provided which can write/read arbitrary
bytes to/from the eSPI bus.

We attempted to adhere to the sysfs principles of "one piece of data per
attribute" as much as possible, and mostly succeded. However, with the
wilco_ec_adv_power.h attributes, which deal with scheduling power usage,
we found it most elegant to bundle setting event times for an entire day
into a single attribute, so at most you are using attributes formatted
as "%d %d %d %d %d %d". With the telemetry attribute, we had to use a
binary attribute, instead of the preferable human-readable ascii, in
order to keep secure the information which is proprietary to the
enterprise service provider. This opaque binary data will be read and
sent using a proprietary daemon running on the OS. Finally, the
"version" attribute returns a formatted result that looks something
like:
> cat /sys/bus/platform/devices/GOOG000C\:00/version
Label: 95.00.06
SVN Revision : 5960a.06
Model Number : 08;8
Build Date   : 11/29/18

The RTC driver is exposed as a standard RTC class driver with
read/write functionality.

For event notification, the Wilco EC can return extended events that
are not handled by standard ACPI objects. These events can
include hotkeys which map to standard functions like brightness
controls, or information about EC controlled features like the
charger or battery. These events are triggered with an
ACPI Notify(0x90) and the event data buffer is read through an ACPI
method provided by the BIOS which reads the event buffer from EC RAM.
These events are then processed, with hotkey events being sent
to the input subsystem and other events put into a queue which
can be read by a userspace daemon via a sysfs attribute.

The rest of the attributes are categorized as either "properties" or
"legacy". "legacy" implies that the attribute existed on the EC before it
was modified for ChromeOS, and "properties" implies that the attribute
exposes functionality that was added to the EC specifically for
ChromeOS. They are mostly boolean flags or percentages.

A full thread of the development of these patches can be found at
https://chromium-review.googlesource.com/c/1371034. This thread contains
comments and revisions that could be helpful in understanding how the
driver arrived at the state it is in now. The thread also contains some
ChromeOS specific patches that actually enable the driver. If you want
to test the patch yourself, you would have to install the ChromeOS SDK
and cherry pick in these patches.

I also wrote some integration tests using the Tast testing framework that
ChromeOS uses. It would require a full ChromeOS SDK to actually run the
tests, but the source of the tests, written in Go, are useful for
understanding what the desired behavior is. You can view the tests here:
https://chromium-review.googlesource.com/c/1372575

This is still an initial version of the driver, and we are sending it
upstream for comments now, so that we can incorporate any requested
changes such that it eventually can be merged. Thank you for your
comments!


Duncan Laurie (6):
  CHROMIUM: cros_ec: Remove cros_ec dependency in lpc_mec
  CHROMIUM: wilco_ec: Add new driver for Wilco EC
  CHROMIUM: wilco_ec: Add sysfs attributes
  CHROMIUM: wilco_ec: Add support for raw commands in sysfs
  CHROMIUM: wilco_ec: Add RTC class driver
  CHROMIUM: wilco_ec: Add event handling

Nick Crews (4):
  CHROMIUM: wilco_ec: Move legacy attributes to separate file
  CHROMIUM: wilco_ec: Add EC properties
  CHROMIUM: wilco_ec: Add peakshift and adv_batt_charging
  CHROMIUM: wilco_ec: Add binary telemetry attributes

 drivers/platform/chrome/Kconfig   |  24 +-
 drivers/platform/chrome/Makefile  |   9 +-
 drivers/platform/chrome/cros_ec_lpc_mec.c |  54 +-
 drivers/platform/chrome/cros_ec_lpc_mec.h |  45 +-
 drivers/platform/chrome/cros_ec_lpc_reg.c |  43 +-
 drivers/platform/chrome/wilco_ec.h| 180 ++
 drivers/platform/chrome/wilco_ec_adv_power.c  | 533 ++
 drivers/platform/chrome/wilco_ec_adv_power.h  | 193 +++
 drivers/platform/chrome/wilco_ec_event.c  | 343 +++
 drivers/platform/chrome/wilco_ec_legacy.c | 204 +++

[RFC PATCH 01/10] CHROMIUM: cros_ec: Remove cros_ec dependency in lpc_mec

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

In order to allow this code to be re-used, remove the dependency
on the rest of the cros_ec code from the cros_ec_lpc_mec functions.

Instead of using a hardcoded register base address of 0x800 have
this be passed in to cros_ec_lpc_mec_init().  The existing cros_ec
use case now passes in the 0x800 base address this way.

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/cros_ec_lpc_mec.c | 54 +++
 drivers/platform/chrome/cros_ec_lpc_mec.h | 45 +++
 drivers/platform/chrome/cros_ec_lpc_reg.c | 43 +-
 3 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c 
b/drivers/platform/chrome/cros_ec_lpc_mec.c
index c4edfa83e493..18bd9f82be6c 100644
--- a/drivers/platform/chrome/cros_ec_lpc_mec.c
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -34,6 +35,7 @@
  * EC mutex because memmap data may be accessed without it being held.
  */
 static struct mutex io_mutex;
+static u16 mec_emi_base, mec_emi_end;
 
 /*
  * cros_ec_lpc_mec_emi_write_address
@@ -46,10 +48,39 @@ static struct mutex io_mutex;
 static void cros_ec_lpc_mec_emi_write_address(u16 addr,
enum cros_ec_lpc_mec_emi_access_mode access_type)
 {
-   /* Address relative to start of EMI range */
-   addr -= MEC_EMI_RANGE_START;
-   outb((addr & 0xfc) | access_type, MEC_EMI_EC_ADDRESS_B0);
-   outb((addr >> 8) & 0x7f, MEC_EMI_EC_ADDRESS_B1);
+   outb((addr & 0xfc) | access_type, MEC_EMI_EC_ADDRESS_B0(mec_emi_base));
+   outb((addr >> 8) & 0x7f, MEC_EMI_EC_ADDRESS_B1(mec_emi_base));
+}
+
+/*
+ * cros_ec_lpc_mec_in_range
+ *
+ * Determine if requested addresses are in MEC EMI range.
+ *
+ * @offset:  Address offset
+ * @length: Number of bytes to check
+ *
+ * @return 1 if in range, 0 if not, and -1 if there is an error
+ * such as the mec range not being initialized
+ */
+int cros_ec_lpc_mec_in_range(unsigned int offset, unsigned int length)
+{
+   if (length == 0)
+   return -1;
+
+   if (WARN_ON(mec_emi_base == 0 || mec_emi_end == 0))
+   return -1;
+
+   if (offset >= mec_emi_base && offset < mec_emi_end) {
+   if (WARN_ON(offset + length - 1 <= mec_emi_end))
+   return -1;
+   return 1;
+   }
+
+   if (WARN_ON(offset + length > mec_emi_base && offset < mec_emi_end))
+   return -1;
+
+   return 0;
 }
 
 /*
@@ -71,6 +102,11 @@ u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type 
io_type,
u8 sum = 0;
enum cros_ec_lpc_mec_emi_access_mode access, new_access;
 
+   /* Return checksum of 0 if window is not initialized */
+   WARN_ON(mec_emi_base == 0 || mec_emi_end == 0);
+   if (mec_emi_base == 0 || mec_emi_end == 0)
+   return 0;
+
/*
 * Long access cannot be used on misaligned data since reading B0 loads
 * the data register and writing B3 flushes.
@@ -86,9 +122,9 @@ u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type 
io_type,
cros_ec_lpc_mec_emi_write_address(offset, access);
 
/* Skip bytes in case of misaligned offset */
-   io_addr = MEC_EMI_EC_DATA_B0 + (offset & 0x3);
+   io_addr = MEC_EMI_EC_DATA_B0(mec_emi_base) + (offset & 0x3);
while (i < length) {
-   while (io_addr <= MEC_EMI_EC_DATA_B3) {
+   while (io_addr <= MEC_EMI_EC_DATA_B3(mec_emi_base)) {
if (io_type == MEC_IO_READ)
buf[i] = inb(io_addr++);
else
@@ -118,7 +154,7 @@ u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type 
io_type,
}
 
/* Access [B0, B3] on each loop pass */
-   io_addr = MEC_EMI_EC_DATA_B0;
+   io_addr = MEC_EMI_EC_DATA_B0(mec_emi_base);
}
 
 done:
@@ -128,9 +164,11 @@ u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type 
io_type,
 }
 EXPORT_SYMBOL(cros_ec_lpc_io_bytes_mec);
 
-void cros_ec_lpc_mec_init(void)
+void cros_ec_lpc_mec_init(unsigned int base, unsigned int end)
 {
mutex_init(_mutex);
+   mec_emi_base = base;
+   mec_emi_end = end;
 }
 EXPORT_SYMBOL(cros_ec_lpc_mec_init);
 
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.h 
b/drivers/platform/chrome/cros_ec_lpc_mec.h
index 105068c0e919..a81cc6a8b621 100644
--- a/drivers/platform/chrome/cros_ec_lpc_mec.h
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.h
@@ -24,8 +24,6 @@
 #ifndef __CROS_EC_LPC_MEC_H
 #define __CROS_EC_LPC_MEC_H
 
-#include 
-
 enum cros_ec_lpc_mec_emi_access_mode {
/* 8-bit access */
ACCESS_TYPE_BYTE = 0x0,
@@ -45,35 +43,46 @@ enum cros_ec_lpc_mec_io_type {
MEC_IO_WRITE,
 };
 
-/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
-#define MEC_EMI_RANGE_START

[RFC PATCH 04/10] CHROMIUM: wilco_ec: Add support for raw commands in sysfs

2018-12-14 Thread Nick Crews

From: Duncan Laurie 

Add a sysfs attribute that allows sending raw commands to the EC.
This is useful for development and debug but should not be enabled
in a production environment.

> echo 00 f0 38 00 03 00 > /sys/bus/platform/devices/GOOG000C\:00/raw
> cat /sys/bus/platform/devices/GOOG000C\:00/raw
00 37 33 38 65 64 00...

Signed-off-by: Duncan Laurie 
Signed-off-by: Nick Crews 
---

 drivers/platform/chrome/Kconfig|  10 ++
 drivers/platform/chrome/wilco_ec.h |   6 +
 drivers/platform/chrome/wilco_ec_mailbox.c |   6 -
 drivers/platform/chrome/wilco_ec_sysfs.c   | 126 +
 4 files changed, 142 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 4168d5e6bedc..05c6d9a00395 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -61,6 +61,16 @@ config WILCO_EC
  To compile this driver as a module, choose M here: the
  module will be called wilco_ec.
 
+config WILCO_EC_SYSFS_RAW
+   bool "Enable raw access to EC via sysfs"
+   depends on WILCO_EC
+   default n
+   help
+ If you say Y here, you get support for sending raw commands to
+ the Wilco EC via sysfs.  These commands do not do any byte
+ manipulation and allow for testing arbitrary commands.  This
+ interface is intended for debug only and is disabled by default.
+
 config CROS_EC_CTL
 tristate
 
diff --git a/drivers/platform/chrome/wilco_ec.h 
b/drivers/platform/chrome/wilco_ec.h
index 699f4cf744dc..0b3dec4e2830 100644
--- a/drivers/platform/chrome/wilco_ec.h
+++ b/drivers/platform/chrome/wilco_ec.h
@@ -20,6 +20,12 @@
 #include 
 #include 
 
+/* Normal commands have a maximum 32 bytes of data */
+#define EC_MAILBOX_DATA_SIZE   32
+
+/* Extended commands have 256 bytes of response data */
+#define EC_MAILBOX_DATA_SIZE_EXTENDED  256
+
 #define WILCO_EC_FLAG_NO_RESPONSE  BIT(0) /* EC does not respond */
 #define WILCO_EC_FLAG_EXTENDED_DATABIT(1) /* EC returns 256 data bytes */
 #define WILCO_EC_FLAG_RAW_REQUEST  BIT(2) /* Do not trim request data */
diff --git a/drivers/platform/chrome/wilco_ec_mailbox.c 
b/drivers/platform/chrome/wilco_ec_mailbox.c
index 414ea0a8ad03..1cb34b7280fd 100644
--- a/drivers/platform/chrome/wilco_ec_mailbox.c
+++ b/drivers/platform/chrome/wilco_ec_mailbox.c
@@ -44,12 +44,6 @@
 /* Version of EC protocol */
 #define EC_MAILBOX_PROTO_VERSION   3
 
-/* Normal commands have a maximum 32 bytes of data */
-#define EC_MAILBOX_DATA_SIZE   32
-
-/* Extended commands have 256 bytes of response data */
-#define EC_MAILBOX_DATA_SIZE_EXTENDED  256
-
 /* Number of header bytes to be counted as data bytes */
 #define EC_MAILBOX_DATA_EXTRA  2
 
diff --git a/drivers/platform/chrome/wilco_ec_sysfs.c 
b/drivers/platform/chrome/wilco_ec_sysfs.c
index f9ae6cef6169..eeebd4ba4a39 100644
--- a/drivers/platform/chrome/wilco_ec_sysfs.c
+++ b/drivers/platform/chrome/wilco_ec_sysfs.c
@@ -23,6 +23,126 @@
 #define EC_INFO_SIZE9
 #define EC_COMMAND_STEALTH_MODE0xfc
 
+#ifdef CONFIG_WILCO_EC_SYSFS_RAW
+
+/* Raw data buffer, large enough to hold extended responses */
+static size_t raw_response_size;
+static u8 raw_response_data[EC_MAILBOX_DATA_SIZE_EXTENDED];
+
+/*
+ * raw: write a raw command and return the result
+ *
+ * Bytes 0-1 indicate the message type:
+ *  00 F0 = Execute Legacy Command
+ *  00 F2 = Read/Write NVRAM Property
+ * Byte 2 provides the command code
+ * Bytes 3+ consist of the data passed in the request
+ *
+ * example: read the EC info type 1:
+ *  # echo 00 f0 38 00 01 00 > raw
+ *  # cat raw
+ *  00 38 31 34 34 66 00 00 00 00 00 00 00 00 00 00 00...
+ */
+
+static ssize_t raw_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   ssize_t count = 0;
+
+   if (raw_response_size) {
+   int i;
+
+   for (i = 0; i < raw_response_size; ++i)
+   count += scnprintf(buf + count, PAGE_SIZE - count,
+  "%02x ", raw_response_data[i]);
+
+   count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
+
+   /* Only return response the first time it is read */
+   raw_response_size = 0;
+   }
+
+   return count;
+}
+
+static ssize_t raw_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t count)
+{
+   struct wilco_ec_device *ec = dev_get_drvdata(dev);
+   struct wilco_ec_message msg;
+   u8 raw_request_data[EC_MAILBOX_DATA_SIZE];
+   int in_offset = 0;
+   int out_offset = 0;
+   int ret;
+
+   while (in_offset < count) {
+   char word_buf[EC_MAILBOX_DATA_SIZE];
+   u8 byte;
+   int start_offset = in_offset;
+   int end_offset;
+
+   /*

Re: [RFC RESEND PATCH] kvm: arm64: export memory error recovery capability to user space

2018-12-14 Thread gengdongjiu

> 
> On Fri, 14 Dec 2018 at 13:56, James Morse  wrote:
> >
> > Hi Dongjiu Geng,
> >
> > On 14/12/2018 10:15, Dongjiu Geng wrote:
> > > When user space do memory recovery, it will check whether KVM and 
> > > guest support the error recovery, only when both of them support, 
> > > user space will do the error recovery. This patch exports this 
> > > capability of KVM to user space.
> >
> > I can understand user-space only wanting to do the work if host and 
> > guest support the feature. But 'error recovery' isn't a KVM feature, 
> > its a Linux kernel feature.
> >
> > KVM will send it's user-space a SIGBUS with MCEERR code whenever its 
> > trying to map a page at stage2 that the kernel-mm code refuses this because 
> > its poisoned.
> > (e.g. check_user_page_hwpoison(), get_user_pages() returns 
> > -EHWPOISON)
> >
> > This is exactly the same as happens to a normal user-space process.
> >
> > I think you really want to know if the host kernel was built with 
> > CONFIG_MEMORY_FAILURE.
> 
> Does userspace need to care about that? Presumably if the host kernel 
> wasn't built with that support then it will simply never deliver any memory 
> failure events to QEMU, which is fine.
> 
> The point I was trying to make in the email Dongjiu references
> (https://patchwork.codeaurora.org/patch/652261/) is simply that "QEMU gets 
> memory-failure notifications from the host kernel"
> does not imply "the guest is prepared to receive memory failure 
> notifications", and so the code path which handles the SIGBUS must do 
> some kind of check for whether the guest CPU is a type which expects them and 
> that the board code set up the ACPI tables that it wants to fill in.

Thanks Peter's explanation. Frankly speaking, I agree Peter's suggestion.

To James, I explain more to you, as peter said QEMU needs to check whether the 
guest CPU is a type which can handle the error though guest ACPI table. Let us 
see the X86's QEMU logic:
1. Before the vCPU created, it will set a default env->mcg_cap value with 
MCE_CAP_DEF flag, MCG_SER_P means it expected the guest CPU model supports RAS 
error recovery.[1] 2. when the vCPU initialize, it will check whether host 
kernel support this feature[2]. Only when host kernel and default env->mcg_cap 
value all expected this feature, then it will setup vCPU support RAS error 
recovery[3].
So I add this IOCTL "KVM_CAP_ARM_MEMORY_ERROR_RECOVERY" to Let QEMU check 
whether host/KVM support RAS error detection and recovery, only when this 
supports, QEMU will do the error recovery for the guest memory. 

[1]
#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF |
(cpu->enable_lmce ? MCG_LMCE_P : 0);

[2] ret = kvm_get_mce_cap_supported(cs->kvm_state, _cap, );

[3]
env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK;
ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, >mcg_cap);

-For James's 
comments-
> KVM doesn't detect these errors.
> The hardware detects them and notifies the OS via one of a number of 
> mechanisms.
> This gets plumbed into memory_failure(), which sets a flag that the mm 
> code uses to prevent the page being used again.

> KVM is only involved when it tries to map a page at stage2 and the mm 
> code rejects it with -EHWPOISON. This is the same as the architectures
> do_page_fault() checking for (fault & VM_FAULT_HWPOISON) out of 
> handle_mm_fault(). We don't have a KVM cap for this, nor do we need one.
--
James, for your above comments, I completed understand, but KVM also delivered 
the SIGBUS, which means KVM supports guest memory RAS error recovery, so maybe 
we need to tell user space this capability.

-- For James's comments 
---
> The CPU RAS Extensions are not at all relevant here. It is perfectly 
> possible to support memory-failure without them, AMD-Seattle and 
> APM-X-Gene do this. These systems would report not-supported here, but the 
> kernel does support this stuff.
> Just because the CPU supports this, doesn't mean the kernel was built 
> with CONFIG_MEMORY_FAILURE. The CPU reports may be ignored, or upgraded to 
> SIGKILL.
--
James, for your above comments, if you think we should not check the 
"cpus_have_const_cap(ARM64_HAS_RAS_EXTN)", which do you prefer we should check?
In the X86 KVM code, it uses hardcode to tell use space the host/KVM support 
RAS error software recovery[4]. If KVM does not check the " 
cpus_have_const_cap(ARM64_HAS_RAS_EXTN)", we have to check the hardcode as 
X86's method.

[4]:
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P |

Re: [PATCH v1 9/9] mm: better document PG_reserved

2018-12-14 Thread Randy Dunlap

On 12/14/18 3:10 AM, David Hildenbrand wrote:
> The usage of PG_reserved and how PG_reserved pages are to be treated is
> buried deep down in different parts of the kernel. Let's shine some light
> onto these details by documenting current users and expected
> behavior.
> 
> Especially, clarify on the "Some of them might not even exist" case.
> These are physical memory gaps that will never be dumped as they
> are not marked as IORESOURCE_SYSRAM. PG_reserved does in general not
> hinder anybody from dumping or swapping. In some cases, these pages
> will not be stored in the hibernation image.

Hi,
Thanks for the doc update.
Comments below.

> Cc: Andrew Morton 
> Cc: Stephen Rothwell 
> Cc: Pavel Tatashin 
> Cc: Michal Hocko 
> Cc: Alexander Duyck 
> Cc: Matthew Wilcox 
> Cc: Anthony Yznaga 
> Cc: Miles Chen 
> Cc: yi.z.zh...@linux.intel.com
> Cc: Dan Williams 
> Signed-off-by: David Hildenbrand 
> ---
>  include/linux/page-flags.h | 33 +++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 808b4183e30d..9de2e941cbd5 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -17,8 +17,37 @@
>  /*
>   * Various page->flags bits:
>   *
> - * PG_reserved is set for special pages, which can never be swapped out. Some
> - * of them might not even exist...
> + * PG_reserved is set for special pages. The "struct page" of such a page
> + * should in general not be touched (e.g. set dirty) except by their owner.

   by its owner.

> + * Pages marked as PG_reserved include:
> + * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS,
> + *   initrd, HW tables)
> + * - Pages reserved or allocated early during boot (before the page allocator
> + *   was initialized). This includes (depending on the architecture) the
> + *   initial vmmap, initial page tables, crashkernel, elfcorehdr, and much

VM map,

> + *   much more. Once (if ever) freed, PG_reserved is cleared and they will
> + *   be given to the page allocator.
> + * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
> + *   to read/write these pages might end badly. Don't touch!
> + * - The zero page(s)
> + * - Pages not added to the page allocator when onlining a section because
> + *   they were excluded via the online_page_callback() or because they are
> + *   PG_hwpoison.
> + * - Pages allocated in the context of kexec/kdump (loaded kernel image,
> + *   control pages, vmcoreinfo)
> + * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
> + *   not marked PG_reserved (as they might be in use by somebody else who 
> does
> + *   not respect the caching strategy).
> + * - Pages part of an offline section (struct pages of offline sections 
> should
> + *   not be trusted as they will be initialized when first onlined).
> + * - MCA pages on ia64
> + * - Pages holding CPU notes for POWER Firmware Assisted Dump
> + * - Device memory (e.g. PMEM, DAX, HMM)
> + * Some PG_reserved pages will be excluded from the hibernation image.
> + * PG_reserved does in general not hinder anybody from dumping or swapping
> + * and is no longer required for remap_pfn_range(). ioremap might require it.
> + * Consequently, PG_reserved for a page mapped into user space can indicate
> + * the zero page, the vDSO, MMIO pages or device memory.
>   *
>   * The PG_private bitflag is set on pagecache pages if they contain 
> filesystem
>   * specific data (which is normally at page->private). It can be used by
> 

cheers.
-- 
~Randy

撤回: [RFC RESEND PATCH] kvm: arm64: export memory error recovery capability to user space

2018-12-14 Thread gengdongjiu

gengdongjiu 将撤回邮件“[RFC RESEND PATCH] kvm: arm64: export memory error recovery 
capability to user space”。

Re: [RFC RESEND PATCH] kvm: arm64: export memory error recovery capability to user space

2018-12-14 Thread gengdongjiu

> 
> On Fri, 14 Dec 2018 at 13:56, James Morse  wrote:
> >
> > Hi Dongjiu Geng,
> >
> > On 14/12/2018 10:15, Dongjiu Geng wrote:
> > > When user space do memory recovery, it will check whether KVM and
> > > guest support the error recovery, only when both of them support,
> > > user space will do the error recovery. This patch exports this
> > > capability of KVM to user space.
> >
> > I can understand user-space only wanting to do the work if host and
> > guest support the feature. But 'error recovery' isn't a KVM feature,
> > its a Linux kernel feature.
> >
> > KVM will send it's user-space a SIGBUS with MCEERR code whenever its
> > trying to map a page at stage2 that the kernel-mm code refuses this because 
> > its poisoned.
> > (e.g. check_user_page_hwpoison(), get_user_pages() returns -EHWPOISON)
> >
> > This is exactly the same as happens to a normal user-space process.
> >
> > I think you really want to know if the host kernel was built with
> > CONFIG_MEMORY_FAILURE.
> 
> Does userspace need to care about that? Presumably if the host kernel wasn't 
> built with that support then it will simply never deliver any
> memory failure events to QEMU, which is fine.
> 
> The point I was trying to make in the email Dongjiu references
> (https://patchwork.codeaurora.org/patch/652261/) is simply that "QEMU gets 
> memory-failure notifications from the host kernel"
> does not imply "the guest is prepared to receive memory failure 
> notifications", and so the code path which handles the SIGBUS must do
> some kind of check for whether the guest CPU is a type which expects them and 
> that the board code set up the ACPI tables that it wants to
> fill in.

Thanks Peter's explanation. Frankly speaking, I agree Peter's suggestion.

To James, I explain more to you, as peter said QEMU needs to check whether the 
guest CPU is a type which can handle the error though guest ACPI table. Let us 
see the X86's QEMU logic:
1. Before the vCPU created, it will set a default env->mcg_cap value with 
MCE_CAP_DEF flag, MCG_SER_P means it expected the guest CPU model supports RAS 
error recovery.[1]
2. when the vCPU initialize, it will check whether host kernel support this 
feature[2]. Only when host kernel and default env->mcg_cap value all expected 
this feature, then it will setup vCPU support RAS error recovery[3].
So I add this IOCTL "KVM_CAP_ARM_MEMORY_ERROR_RECOVERY" to Let QEMU check 
whether host/KVM support RAS error detection and recovery, only when this 
supports, QEMU will do the error recovery for the guest memory. 

[1]
#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF |
(cpu->enable_lmce ? MCG_LMCE_P : 0);

[2] ret = kvm_get_mce_cap_supported(cs->kvm_state, _cap, );

[3]
env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK;
ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, >mcg_cap);

-For James's 
comments-
> KVM doesn't detect these errors.
> The hardware detects them and notifies the OS via one of a number of 
> mechanisms.
> This gets plumbed into memory_failure(), which sets a flag that the mm code 
> uses
> to prevent the page being used again.

> KVM is only involved when it tries to map a page at stage2 and the mm code
> rejects it with -EHWPOISON. This is the same as the architectures
> do_page_fault() checking for (fault & VM_FAULT_HWPOISON) out of
> handle_mm_fault(). We don't have a KVM cap for this, nor do we need one.
--
James, for your above comments, I completed understand, but KVM also delivered 
the SIGBUS, which means KVM supports guest memory RAS error recovery, so maybe 
we need to tell user space this capability.

-- For James's comments 
---
> The CPU RAS Extensions are not at all relevant here. It is perfectly possible 
> to
> support memory-failure without them, AMD-Seattle and APM-X-Gene do this. These
> systems would report not-supported here, but the kernel does support this 
> stuff.
> Just because the CPU supports this, doesn't mean the kernel was built with
> CONFIG_MEMORY_FAILURE. The CPU reports may be ignored, or upgraded to SIGKILL.
--
James, for your above comments[4], if you think we should not check the 
"cpus_have_const_cap(ARM64_HAS_RAS_EXTN)", which do you prefer we should check?
In the X86 KVM code, it uses hardcode to tell use space the host/KVM support 
RAS error software recovery. If KVM does not check the " 
cpus_have_const_cap(ARM64_HAS_RAS_EXTN)", we have to check the hardcode as 
X86's method.

[4]:
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;

long

Re: [PATCH v17 18/23] platform/x86: Intel SGX driver

2018-12-14 Thread Sean Christopherson

On Fri, Dec 14, 2018 at 05:59:17PM -0600, Dr. Greg wrote:
> On Wed, Dec 12, 2018 at 08:00:36PM +0200, Jarkko Sakkinen wrote:
> 
> Good evening, I hope the week has gone well for everyone.
> 
> > On Mon, Dec 10, 2018 at 04:49:08AM -0600, Dr. Greg wrote:
> > > In the meantime, I wanted to confirm that your jarkko-sgx/master
> > > branch contains the proposed driver that is headed upstream.
> > > Before adding the SFLC patches we thought it best to run the
> > > driver through some testing in order to verify that any problems
> > > we generated where attributable to our work and not the base
> > > driver.
> >
> > The master branch is by definition unstable at the moment i.e. it
> > can sometimes (not often) contain unfinished changes. Use next for
> > testing.  I update next when I consider the master contents "stable
> > enough".
> 
> I noticed in the last day or so that you appeared to sync
> jarkko-sgx/master with jarkko-sgx/next, so I checked out a local
> branch against jarkko-sgx/next and ran it against our unit tests.
> Based on what we are seeing the driver is still experiencing issues
> with initialization of a non-trivial enclave.

master branch is broken, looks like the VMA code Jarkko is reworking is
buggy.  I should be able to help debug this next week.

[  504.149548] [ cut here ]
[  504.149550] kernel BUG at /home/sean/go/src/kernel.org/linux/mm/mmap.c:669!
[  504.150288] invalid opcode:  [#1] SMP
[  504.150614] CPU: 2 PID: 237 Comm: kworker/u20:2 Not tainted 4.20.0-rc2+ #267
[  504.151165] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 
02/06/2015
[  504.151818] Workqueue: sgx-encl-wq sgx_encl_release_worker
[  504.152267] RIP: 0010:__vma_adjust+0x64a/0x820
[  504.152626] Code: ff 48 89 50 18 e9 6f fc ff ff 4c 8b ab 88 00 00 00 45 31 
e4 e9 61 fb ff ff 31 c0 48 83 c4 60 5b 5d 41 5c 41 5d 41 5e 41 5f c3 <0f> 0b 49 
89 de 49 83 c6 20 0f 84 06 fe ff ff 49 8d 7e e0 e8 fe ee
[  504.154109] RSP: :c94ebd60 EFLAGS: 00010206
[  504.154535] RAX: 7fd92ef7e000 RBX: 888467af16c0 RCX: 888467af16e0
[  504.155104] RDX: 888458fd09e0 RSI: 7fd954021000 RDI: 88846bf9e798
[  504.155673] RBP: 888467af1480 R08: 88845bea2000 R09: 
[  504.156242] R10: 8000 R11: fefefefefefefeff R12: 
[  504.156810] R13: 88846bf9e790 R14: 888467af1b70 R15: 888467af1b60
[  504.157378] FS:  () GS:88846f70() 
knlGS:
[  504.158021] CS:  0010 DS:  ES:  CR0: 80050033
[  504.158483] CR2: 7f2c56e99000 CR3: 05009001 CR4: 00360ee0
[  504.159054] DR0:  DR1:  DR2: 
[  504.159623] DR3:  DR6: fffe0ff0 DR7: 0400
[  504.160193] Call Trace:
[  504.160406]  __split_vma+0x16f/0x180
[  504.160706]  ? __switch_to_asm+0x40/0x70
[  504.161024]  __do_munmap+0xfb/0x450
[  504.161308]  sgx_encl_release_worker+0x44/0x70
[  504.161675]  process_one_work+0x200/0x3f0
[  504.162004]  worker_thread+0x2d/0x3d0
[  504.162301]  ? process_one_work+0x3f0/0x3f0
[  504.162645]  kthread+0x113/0x130
[  504.162912]  ? kthread_park+0x90/0x90
[  504.163209]  ret_from_fork+0x35/0x40
[  504.163503] Modules linked in: bridge stp llc
[  504.163866] ---[ end trace 83076139fc25e3e0 ]---

Re: [PATCH v17 18/23] platform/x86: Intel SGX driver

2018-12-14 Thread Dr. Greg

On Wed, Dec 12, 2018 at 08:00:36PM +0200, Jarkko Sakkinen wrote:

Good evening, I hope the week has gone well for everyone.

> On Mon, Dec 10, 2018 at 04:49:08AM -0600, Dr. Greg wrote:
> > In the meantime, I wanted to confirm that your jarkko-sgx/master
> > branch contains the proposed driver that is headed upstream.
> > Before adding the SFLC patches we thought it best to run the
> > driver through some testing in order to verify that any problems
> > we generated where attributable to our work and not the base
> > driver.
>
> The master branch is by definition unstable at the moment i.e. it
> can sometimes (not often) contain unfinished changes. Use next for
> testing.  I update next when I consider the master contents "stable
> enough".

I noticed in the last day or so that you appeared to sync
jarkko-sgx/master with jarkko-sgx/next, so I checked out a local
branch against jarkko-sgx/next and ran it against our unit tests.
Based on what we are seeing the driver is still experiencing issues
with initialization of a non-trivial enclave.

On the first test boot of the new kernel, the EINIT ioctl consistently
returned EBUSY over multiple invocations of the unit test.  This did
not appear to generate any negative issues with the kernel at large.

We rebooted the box to run the test against a fresh kernel load.  This
time around we experienced issues similar to what we had previously
described.  The EINIT ioctl generates a segmentation fault which seems
to largely incapacitate the kernel.

Here are the logs and first backtrace from the test:

---
Dec 14 13:25:06 nuc2 kernel: PGD 4f001067 P4D 4f001067 PUD 0 
Dec 14 13:25:06 nuc2 kernel: BUG: unable to handle kernel paging request at 
97bf3ae916fe
Dec 14 13:25:06 nuc2 kernel: Oops: 0002 [#1] SMP PTI
Dec 14 13:25:06 nuc2 kernel: CPU: 1 PID: 34 Comm: kworker/1:1 Not tainted 
4.20.0-rc2-sgx-nuc2+ #12
Dec 14 13:25:06 nuc2 kernel: Hardware name: Intel Corporation NUC7CJYH/NUC7JYB, 
BIOS JYGLKCPX.86A.0046.2018.1103.1316 11/03/2018
Dec 14 13:25:06 nuc2 kernel: Workqueue: events cache_reap
Dec 14 13:25:06 nuc2 kernel: RIP: 0010:free_block+0xe3/0x182
Dec 14 13:25:06 nuc2 kernel: Code: 20 45 29 d4 41 d3 ec 0f b6 4f 1d 45 01 e2 41 
d3 ea 41 8b 49 30 ff c9 49 83 79 20 00 41 89 49 30 75 04 4d 89 59 20 4d 8b 59 
20 <45> 88 14 0b 49 8d 49 08 41 83 79 30 00 75 1a 4c 8b 50 28 49 89 4a
Dec 14 13:25:06 nuc2 kernel: RSP: 0018:b90800123db0 EFLAGS: 00210046
Dec 14 13:25:06 nuc2 kernel: RAX: 97be3b419080 RBX: 000f RCX: 
ff7e
Dec 14 13:25:06 nuc2 kernel: RDX: 0018 RSI: d907ffc82b70 RDI: 
97be3b44c200
Dec 14 13:25:06 nuc2 kernel: RBP: b90800123dd8 R08: b90800123e10 R09: 
f9b345eba440
Dec 14 13:25:06 nuc2 kernel: R10: 0051f663 R11: 97be3ae91780 R12: 
11ede5c3
Dec 14 13:25:06 nuc2 kernel: R13: 8000 R14: 97be3b419088 R15: 
97be3b4190a8
Dec 14 13:25:06 nuc2 kernel: FS:  () 
GS:97be3be8() knlGS:
Dec 14 13:25:06 nuc2 kernel: CS:  0010 DS:  ES:  CR0: 80050033
Dec 14 13:25:06 nuc2 kernel: CR2: 97bf3ae916fe CR3: 4ec0a000 CR4: 
00340ee0
Dec 14 13:25:06 nuc2 kernel: Call Trace:
Dec 14 13:25:06 nuc2 kernel:  drain_array_locked+0x50/0x75
Dec 14 13:25:06 nuc2 kernel:  drain_array.constprop.67+0x57/0x72
Dec 14 13:25:06 nuc2 kernel:  cache_reap+0x58/0x101
Dec 14 13:25:06 nuc2 kernel:  process_one_work+0x183/0x271
Dec 14 13:25:06 nuc2 kernel:  worker_thread+0x1e5/0x2b4
Dec 14 13:25:06 nuc2 kernel:  ? cancel_delayed_work_sync+0x10/0x10
Dec 14 13:25:06 nuc2 kernel:  kthread+0x116/0x11e
Dec 14 13:25:06 nuc2 kernel:  ? kthread_park+0x7e/0x7e
Dec 14 13:25:06 nuc2 kernel:  ret_from_fork+0x1f/0x40
Dec 14 13:25:06 nuc2 kernel: Modules linked in:
Dec 14 13:25:06 nuc2 kernel: CR2: 97bf3ae916fe
Dec 14 13:25:06 nuc2 kernel: ---[ end trace 7f5dc24edc7285b3 ]---
Dec 14 13:25:06 nuc2 kernel: RIP: 0010:free_block+0xe3/0x182
Dec 14 13:25:06 nuc2 kernel: Code: 20 45 29 d4 41 d3 ec 0f b6 4f 1d 45 01 e2 41 
d3 ea 41 8b 49 30 ff c9 49 83 79 20 00 41 89 49 30 75 04 4d 89 59 20 4d 8b 59 
20 <45> 88 14 0b 49 8d 49 08 41 83 79 30 00 75 1a 4c 8b 50 28 49 89 4a
Dec 14 13:25:06 nuc2 kernel: RSP: 0018:b90800123db0 EFLAGS: 00210046
Dec 14 13:25:06 nuc2 kernel: RAX: 97be3b419080 RBX: 000f RCX: 
ff7e
Dec 14 13:25:06 nuc2 kernel: RDX: 0018 RSI: d907ffc82b70 RDI: 
97be3b44c200
Dec 14 13:25:06 nuc2 kernel: RBP: b90800123dd8 R08: b90800123e10 R09: 
f9b345eba440
Dec 14 13:25:06 nuc2 kernel: R10: 0051f663 R11: 97be3ae91780 R12: 
11ede5c3
Dec 14 13:25:06 nuc2 kernel: R13: 8000 R14: 97be3b419088 R15: 
97be3b4190a8
Dec 14 13:25:06 nuc2 kernel: FS:  () 
GS:97be3be8() knlGS:
Dec 14 13:25:06 nuc2 kernel: CS:  0010 DS:  ES:

[PATCH v3] binder: fix use-after-free due to ksys_close() during fdget()

2018-12-14 Thread Todd Kjos

44d8047f1d8 ("binder: use standard functions to allocate fds")
exposed a pre-existing issue in the binder driver.

fdget() is used in ksys_ioctl() as a performance optimization.
One of the rules associated with fdget() is that ksys_close() must
not be called between the fdget() and the fdput(). There is a case
where this requirement is not met in the binder driver which results
in the reference count dropping to 0 when the device is still in
use. This can result in use-after-free or other issues.

If userpace has passed a file-descriptor for the binder driver using
a BINDER_TYPE_FDA object, then kys_close() is called on it when
handling a binder_ioctl(BC_FREE_BUFFER) command. This violates
the assumptions for using fdget().

The problem is fixed by deferring the close using task_work_add(). A
new variant of __close_fd() was created that returns a struct file
with a reference. The fput() is deferred instead of using ksys_close().

Fixes: 44d8047f1d87a ("binder: use standard functions to allocate fds")
Suggested-by: Al Viro 
Signed-off-by: Todd Kjos 
---
v2:
- simplified code
v3:
- implemented Al Viro's suggestion to pass struct file instead of fd
- added __close_fd_get_file() to close the fd, but reference the file

 drivers/android/binder.c | 63 ++--
 fs/file.c| 29 ++
 include/linux/fdtable.h  |  1 +
 3 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c525b164d39d2f..c4ee11d883dd93 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -72,6 +72,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -2184,6 +2185,64 @@ static bool binder_validate_fixup(struct binder_buffer 
*b,
return (fixup_offset >= last_min_offset);
 }
 
+/**
+ * struct binder_task_work_cb - for deferred close
+ *
+ * @twork:callback_head for task work
+ * @fd:   fd to close
+ *
+ * Structure to pass task work to be handled after
+ * returning from binder_ioctl() via task_work_add().
+ */
+struct binder_task_work_cb {
+   struct callback_head twork;
+   struct file *file;
+};
+
+/**
+ * binder_do_fd_close() - close list of file descriptors
+ * @twork: callback head for task work
+ *
+ * It is not safe to call ksys_close() during the binder_ioctl()
+ * function if there is a chance that binder's own file descriptor
+ * might be closed. This is to meet the requirements for using
+ * fdget() (see comments for __fget_light()). Therefore use
+ * task_work_add() to schedule the close operation once we have
+ * returned from binder_ioctl(). This function is a callback
+ * for that mechanism and does the actual ksys_close() on the
+ * given file descriptor.
+ */
+static void binder_do_fd_close(struct callback_head *twork)
+{
+   struct binder_task_work_cb *twcb = container_of(twork,
+   struct binder_task_work_cb, twork);
+
+   fput(twcb->file);
+   kfree(twcb);
+}
+
+/**
+ * binder_deferred_fd_close() - schedule a close for the given file-descriptor
+ * @fd:file-descriptor to close
+ *
+ * See comments in binder_do_fd_close(). This function is used to schedule
+ * a file-descriptor to be closed after returning from binder_ioctl().
+ */
+static void binder_deferred_fd_close(int fd)
+{
+   struct binder_task_work_cb *twcb;
+
+   twcb = kzalloc(sizeof(*twcb), GFP_KERNEL);
+   if (!twcb)
+   return;
+   init_task_work(>twork, binder_do_fd_close);
+   __close_fd_get_file(fd, >file);
+   if (twcb->file)
+   task_work_add(current, >twork, true);
+   else
+   kfree(twcb);
+}
+
 static void binder_transaction_buffer_release(struct binder_proc *proc,
  struct binder_buffer *buffer,
  binder_size_t *failed_at)
@@ -2323,7 +2382,7 @@ static void binder_transaction_buffer_release(struct 
binder_proc *proc,
}
fd_array = (u32 *)(parent_buffer + 
(uintptr_t)fda->parent_offset);
for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
-   ksys_close(fd_array[fd_index]);
+   binder_deferred_fd_close(fd_array[fd_index]);
} break;
default:
pr_err("transaction release %d bad object type %x\n",
@@ -3942,7 +4001,7 @@ static int binder_apply_fd_fixups(struct 
binder_transaction *t)
} else if (ret) {
u32 *fdp = (u32 *)(t->buffer->data + fixup->offset);
 
-   ksys_close(*fdp);
+   binder_deferred_fd_close(*fdp);
}
list_del(>fixup_entry);
kfree(fixup);
diff --git a/fs/file.c b/fs/file.c
index 7ffd6e9d103d64..8d059d8973e9fc 100644
--- a/fs/file.c
+++

Re: [RFC PATCH 2/2] PCI/portdrv Hisilicon PCIe transport layer Port PMU driver.

2018-12-14 Thread Bjorn Helgaas

[+cc Rafael, Len, linux-acpi, linux-kernel]

Thanks a lot for working on this!  I've been hoping somebody would
push on the PMU issue because there are some real wrinkles to work
out.

On Fri, Dec 14, 2018 at 09:10:55PM +0800, Jonathan Cameron wrote:
> The Hip08 SoCs contain relatively detailed performance units for the
> PCIe Transport Layer at each port.
> 
> The support here is a subset of what will come, but is intended to
> provide some initial basic functionality.
> 
> Note that there is a _lot_ more functionality in this hardware unit
> so this is the first RFC of several.
> 
> RFC questions:
> 
> 1. There is no standard for PCIe PMUs.  However, there are things that
>are elements of the PCIe protocol so any similar PMU is likely to
>support them.  Do we want to have a go at some consistent naming?

Is this a perf question, i.e., are you asking about the event names
from "perf list"?  If so, I have no idea :)  But you're right that the
events on the PCIe side are mostly defined by the PCIe spec and I
agree it would make a lot of sense to use common names for those
things.

> 2. We are using an ACPI DSDT description to find what is basically a
>platform device that is associated with a PCIe device. Is this an
>acceptable thing to do?

If the PCIe device itself, e.g., a Root Port, consumes address space,
it should have a BAR that describes it.

If the Root Complex, which is not a PCIe device and does not have an
architected configuration or programming model, consumes address
space, it would make sense to describe it via ACPI in the PNP0A03
device like other host bridge registers.

>From your cover letter, I think you have the latter situation, and if
I understand correctly, you have something like this in ACPI:

  PNP0A03   PCI host bridge
HISIPMU registers for Root Port 00:1c.0
HISIPMU registers for Root Port 00:1c.2

The PNP0A03 _CRS describes the address space it consumes, which
comprises (1) the register space that operates the bridge itself,
e.g., sets apertures, performs PCI config accesses, etc., and
(2) space that is converted to PCI transactions on the downstream
side.

So this feels a little strange to me because ACPI says the HISI
devices are below the host bridge, but instead of consuming PCI
address space, they consume part of the PNP0A03 register space.
But maybe that makes sense in ACPI, I dunno.

As far as I can tell, you don't actually need *anything* supplied by
portdrv except the pcie_device.port, which you only use to find the
ACPI companion device and to hang the devm_ things on.

As written, hisi_pcie_pmu will bind to any Port, including Switch
Ports as well as Root Ports.  It has a poor-man's match() built into
hisi_pcie_pmu_probe(), which is sort of sub-optimal.  It would be
nicer to have a real match() function run by the bus driver.  I know
portdrv doesn't give you that, and since portdrv claims every Port
itself, there's no good way for other drivers like this to get
connected to Ports.

Do you have a hotplug strategy yet?  PMUs in Switches below the Root
Ports sound like a useful thing.  Since you bind to every Port, you
will bind to hot-added Switch Ports, but unless you use ACPI hotplug,
you won't have a chance to add ACPI companion devices for them.

> Not yet fully established.
> 
> 1. I haven't done enough testing with high performance devices to
>establish the 'minimum' frequency of the high resolution timer.
>It may be that we just end up dropping some of the counters
>because the load is too high to sample them often enough.
>Once the basic approach is established I'll run the numbers
>on this. These are up to Gen4x16 ports so things go pretty quick.
> 
> Signed-off-by: Jonathan Cameron 
> ---
>  drivers/pci/pcie/Kconfig |   9 +
>  drivers/pci/pcie/Makefile|   2 +
>  drivers/pci/pcie/hisi_pcie_pmu.c | 528 +++
>  include/linux/cpuhotplug.h   |   1 +
>  4 files changed, 540 insertions(+)

This looks like a nice piece of work, but I'm not sure where to put
it.  It really doesn't feel like part of the PCI core in the sense
that the PCI specs don't help you write it, and it doesn't provide any
PCI services used by other parts of the kernel.  It feels more like
just another driver for a device that happens to be on a PCI bus.

Granted, it currently requires portdrv, but I think the only reason
for that is to resolve the "multiple drivers need the same device"
problem.  I don't think portdrv is a good solution for that.

The other portdrv services (AER, hotplug, DPC, etc) are all defined in
the spec, and they're related in strange ways like sharing interrupts.
I would *like* to see those services moved into the PCI core directly
so they're not *drivers*, they're just optional features like MSI,
IOV, etc.

Then the Ports would by default not have a driver bound to them and
device-specific drivers like this could directly bind to the Port PCI
device.

Bjorn

[PATCH v6] gpu: ipu-csi: Swap fields according to input/output field types

2018-12-14 Thread Steve Longerbeam

The function ipu_csi_init_interface() was inverting the F-bit for
NTSC case, in the CCIR_CODE_1/2 registers. The result being that
for NTSC bottom-top field order, the CSI would swap fields and
capture in top-bottom order.

Instead, base field swap on the field order of the input to the CSI,
and the field order of the requested output. If the input/output
fields are sequential but different, swap fields, otherwise do
not swap. This requires passing both the input and output mbus
frame formats to ipu_csi_init_interface().

Move this code to a new private function ipu_csi_set_bt_interlaced_codes()
that programs the CCIR_CODE_1/2 registers for interlaced BT.656 (and
possibly interlaced BT.1120 in the future).

When detecting input video standard from the input frame width/height,
make sure to double height if input field type is alternate, since
in that case input height only includes lines for one field.

Signed-off-by: Steve Longerbeam 
Reviewed-by: Philipp Zabel 
---
Changes since v5:
- Convert to const the infmt, outfmt, and mbus_cfg pointer args to
  ipu_csi_init_interface(), suggested by Philipp Zabel.
- Bring back if_fmt local var and don't copy outfmt to local stack in
  csi_setup(), suggested by Philipp.

Changes since v4:
- Cleaned up some convoluted code in ipu_csi_init_interface(), suggested
  by Philipp.
- Fixed a regression in csi_setup(), caught by Philipp.
---
 drivers/gpu/ipu-v3/ipu-csi.c  | 126 +++---
 drivers/staging/media/imx/imx-media-csi.c |   7 +-
 include/video/imx-ipu-v3.h|   5 +-
 3 files changed, 89 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index aa0e30a2ba18..d1e575571a8d 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -325,12 +325,21 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config 
*cfg, u32 mbus_code,
return 0;
 }
 
+/* translate alternate field mode based on given standard */
+static inline enum v4l2_field
+ipu_csi_translate_field(enum v4l2_field field, v4l2_std_id std)
+{
+   return (field != V4L2_FIELD_ALTERNATE) ? field :
+   ((std & V4L2_STD_525_60) ?
+V4L2_FIELD_SEQ_BT : V4L2_FIELD_SEQ_TB);
+}
+
 /*
  * Fill a CSI bus config struct from mbus_config and mbus_framefmt.
  */
 static int fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg,
-struct v4l2_mbus_config *mbus_cfg,
-struct v4l2_mbus_framefmt *mbus_fmt)
+   const struct v4l2_mbus_config *mbus_cfg,
+   const struct v4l2_mbus_framefmt *mbus_fmt)
 {
int ret;
 
@@ -374,22 +383,76 @@ static int fill_csi_bus_cfg(struct ipu_csi_bus_config 
*csicfg,
return 0;
 }
 
+static int
+ipu_csi_set_bt_interlaced_codes(struct ipu_csi *csi,
+   const struct v4l2_mbus_framefmt *infmt,
+   const struct v4l2_mbus_framefmt *outfmt,
+   v4l2_std_id std)
+{
+   enum v4l2_field infield, outfield;
+   bool swap_fields;
+
+   /* get translated field type of input and output */
+   infield = ipu_csi_translate_field(infmt->field, std);
+   outfield = ipu_csi_translate_field(outfmt->field, std);
+
+   /*
+* Write the H-V-F codes the CSI will match against the
+* incoming data for start/end of active and blanking
+* field intervals. If input and output field types are
+* sequential but not the same (one is SEQ_BT and the other
+* is SEQ_TB), swap the F-bit so that the CSI will capture
+* field 1 lines before field 0 lines.
+*/
+   swap_fields = (V4L2_FIELD_IS_SEQUENTIAL(infield) &&
+  V4L2_FIELD_IS_SEQUENTIAL(outfield) &&
+  infield != outfield);
+
+   if (!swap_fields) {
+   /*
+* Field0BlankEnd  = 110, Field0BlankStart  = 010
+* Field0ActiveEnd = 100, Field0ActiveStart = 000
+* Field1BlankEnd  = 111, Field1BlankStart  = 011
+* Field1ActiveEnd = 101, Field1ActiveStart = 001
+*/
+   ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN,
+ CSI_CCIR_CODE_1);
+   ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2);
+   } else {
+   dev_dbg(csi->ipu->dev, "capture field swap\n");
+
+   /* same as above but with F-bit inverted */
+   ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN,
+ CSI_CCIR_CODE_1);
+   ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2);
+   }
+
+   ipu_csi_write(csi, 0xFF, CSI_CCIR_CODE_3);
+
+   return 0;
+}
+
+
 int ipu_csi_init_interface(struct ipu_csi *csi,
-  struct v4l2_mbus_config *mbus_cfg,
-  struct v4l2_mbus_framefmt *mbus_fmt)
+

Re: [PATCH v5 02/12] gpu: ipu-csi: Swap fields according to input/output field types

2018-12-14 Thread Steve Longerbeam





On 12/13/18 4:59 AM, Philipp Zabel wrote:

Hi Steve,

On Tue, 2018-10-16 at 17:00 -0700, Steve Longerbeam wrote:

The function ipu_csi_init_interface() was inverting the F-bit for
NTSC case, in the CCIR_CODE_1/2 registers. The result being that
for NTSC bottom-top field order, the CSI would swap fields and
capture in top-bottom order.

Instead, base field swap on the field order of the input to the CSI,
and the field order of the requested output. If the input/output
fields are sequential but different, swap fields, otherwise do
not swap. This requires passing both the input and output mbus
frame formats to ipu_csi_init_interface().

Move this code to a new private function ipu_csi_set_bt_interlaced_codes()
that programs the CCIR_CODE_1/2 registers for interlaced BT.656 (and
possibly interlaced BT.1120 in the future).

When detecting input video standard from the input frame width/height,
make sure to double height if input field type is alternate, since
in that case input height only includes lines for one field.

Signed-off-by: Steve Longerbeam 
---
Changes since v4:
- Cleaned up some convoluted code in ipu_csi_init_interface(), suggested
   by Philipp Zabel.
- Fixed a regression in csi_setup(), caught by Philipp.
---
  drivers/gpu/ipu-v3/ipu-csi.c  | 119 +++---
  drivers/staging/media/imx/imx-media-csi.c |  17 +---
  include/video/imx-ipu-v3.h|   3 +-
  3 files changed, 88 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index aa0e30a2ba18..4a15e513fa05 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -325,6 +325,15 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config 
*cfg, u32 mbus_code,
return 0;
  }
  
+/* translate alternate field mode based on given standard */

+static inline enum v4l2_field
+ipu_csi_translate_field(enum v4l2_field field, v4l2_std_id std)
+{
+   return (field != V4L2_FIELD_ALTERNATE) ? field :
+   ((std & V4L2_STD_525_60) ?
+V4L2_FIELD_SEQ_BT : V4L2_FIELD_SEQ_TB);
+}
+
  /*
   * Fill a CSI bus config struct from mbus_config and mbus_framefmt.
   */
@@ -374,22 +383,75 @@ static int fill_csi_bus_cfg(struct ipu_csi_bus_config 
*csicfg,
return 0;
  }
  
+static int ipu_csi_set_bt_interlaced_codes(struct ipu_csi *csi,

+  struct v4l2_mbus_framefmt *infmt,
+  struct v4l2_mbus_framefmt *outfmt,

infmt and outfmt parameters could be const.


Agreed, I will convert these pointer args to const. And since we are 
changing the API to ipu_csi_init_interface() anyway, I went ahead and 
converted the mbus_cfg, infmt, and outfmt pointer args to const there as 
well.




+  v4l2_std_id std)
+{
+   enum v4l2_field infield, outfield;
+   bool swap_fields;
+
+   /* get translated field type of input and output */
+   infield = ipu_csi_translate_field(infmt->field, std);
+   outfield = ipu_csi_translate_field(outfmt->field, std);
+
+   /*
+* Write the H-V-F codes the CSI will match against the
+* incoming data for start/end of active and blanking
+* field intervals. If input and output field types are
+* sequential but not the same (one is SEQ_BT and the other
+* is SEQ_TB), swap the F-bit so that the CSI will capture
+* field 1 lines before field 0 lines.
+*/
+   swap_fields = (V4L2_FIELD_IS_SEQUENTIAL(infield) &&
+  V4L2_FIELD_IS_SEQUENTIAL(outfield) &&
+  infield != outfield);
+
+   if (!swap_fields) {
+   /*
+* Field0BlankEnd  = 110, Field0BlankStart  = 010
+* Field0ActiveEnd = 100, Field0ActiveStart = 000
+* Field1BlankEnd  = 111, Field1BlankStart  = 011
+* Field1ActiveEnd = 101, Field1ActiveStart = 001
+*/
+   ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN,
+ CSI_CCIR_CODE_1);
+   ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2);
+   } else {
+   dev_dbg(csi->ipu->dev, "capture field swap\n");
+
+   /* same as above but with F-bit inverted */
+   ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN,
+ CSI_CCIR_CODE_1);
+   ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2);
+   }
+
+   ipu_csi_write(csi, 0xFF, CSI_CCIR_CODE_3);
+
+   return 0;
+}
+
+
  int ipu_csi_init_interface(struct ipu_csi *csi,
   struct v4l2_mbus_config *mbus_cfg,
-  struct v4l2_mbus_framefmt *mbus_fmt)
+  struct v4l2_mbus_framefmt *infmt,
+  struct v4l2_mbus_framefmt *outfmt)
  {
struct ipu_csi_bus_config cfg;
unsigned long flags;
u32 width,

Re: [PATCH] ip6mr: Fix potential Spectre v1 vulnerability

2018-12-14 Thread David Miller

From: "Gustavo A. R. Silva" 
Date: Tue, 11 Dec 2018 14:10:08 -0600

> vr.mifi is indirectly controlled by user-space, hence leading to
> a potential exploitation of the Spectre variant 1 vulnerability.
> 
> This issue was detected with the help of Smatch:
> 
> net/ipv6/ip6mr.c:1845 ip6mr_ioctl() warn: potential spectre issue 
> 'mrt->vif_table' [r] (local cap)
> net/ipv6/ip6mr.c:1919 ip6mr_compat_ioctl() warn: potential spectre issue 
> 'mrt->vif_table' [r] (local cap)
> 
> Fix this by sanitizing vr.mifi before using it to index mrt->vif_table'
> 
> Notice that given that speculation windows are large, the policy is
> to kill the speculation on the first load and not worry if it can be
> completed with a dependent load/store [1].
> 
> [1] https://marc.info/?l=linux-kernel=152449131114778=2
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied and queued up for -stable.

Re: [PATCH v9 6/6] PCI: Stub out pci_request_acs() when CONFIG_PCI is not set

2018-12-14 Thread Robin Murphy


On 2018-12-14 10:26 pm, Bjorn Helgaas wrote:

[+cc Lorenzo, Robin, Logan]

On Fri, Dec 14, 2018 at 04:33:19PM +, Sinan Kaya wrote:

ACPI IORT table code relies on pci_request_acs() to be present. Define
a stub function when CONFI_PCI is not set.


This doesn't seem like the simplest approach to me, but I probably
don't understand what's going on in IORT.

It looks like *all* of iort_enable_acs() (the caller of
pci_request_acs()) is PCI-specific; at least, the whole thing is
wrapped in a test for ACPI_IORT_NODE_PCI_ROOT_COMPLEX.  So the whole
function could be wrapped in #ifdef CONFIG_PCI.

Here's the caller of iort_enable_acs():

   iort_init_platform_devices
 acs_enabled = false
 for (i = 0; i < iort->node_count; i++) {
   if (!acs_enabled)
 acs_enabled = iort_enable_acs(iort_node);

It seems like the acs_enabled state could be encapsulated inside
iort_enable_acs().


It could, but with the tiny disadvantage of having to allocate static 
storage for it to maintain the "don't bother checking if we want ACS if 
we've already requested it once" logic for multiple root complexes.



Today pci_request_acs() is a system-wide thing, but I don't know why
that's the case.  Isn't it conceivable that different PCI hierarchies
could have different ACS policies, e.g., because of P2P DMA or
something?


I can certainly imagine systems using entirely separate PCI domains for, 
say, expansion slots vs. on-board/on-chip devices, where the former may 
be expected to be assigned to VMs or userspace drivers and the latter 
only ever controlled by the host kernel. Not forcing ACS overhead upon 
the "non-virtualisable" (or "trusted" vs. "untrusted") domain could 
perhaps be beneficial in some cases.



Bottom line, pci_request_acs() is being called from what looks like
PCI-specific code in IORT, and it would make more sense to me to prune
out that code in IORT than to make a stub pci_request_acs().


Agreed - without CONFIG_PCI the whole of iort_enable_acs() may as well 
be stubbed out, because it won't achieve anything anyway. The 
implication is that if we have a root complex whose inbound DMA is 
controlled by an SMMU, we should enable ACS to make VFIO useful (or in 
case we want to enforce strict DMA isolation in general), but if Linux 
is never going to touch any PCI devices there's no point even looking at 
the RC node(s). TBH I'm pretty dubious about having IORT at all without 
PCI, but I suppose there could potentially be embedded SoCs which have 
an ITS and platform MSIs, or use an SMMU for their video/display 
subsystem, and for some reason demand ACPI-based firmware.


Robin.


Signed-off-by: Sinan Kaya 
---
  include/linux/pci.h | 4 
  1 file changed, 4 insertions(+)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 51a5a5217667..f0f2f55ea93c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2101,7 +2101,11 @@ static inline struct pci_dev *pcie_find_root_port(struct 
pci_dev *dev)
return NULL;
  }
  
+#ifdef CONFIG_PCI

  void pci_request_acs(void);
+#else
+static inline void pci_request_acs(void) {}
+#endif
  bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
  bool pci_acs_path_enabled(struct pci_dev *start,
  struct pci_dev *end, u16 acs_flags);
--
2.19.0

Interest!!!

2018-12-14 Thread Lehmann Schulz

Hello, I have sent the transaction details.

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

Re: [PATCH 5/7] [stable-4.19] i2c: aspeed: fix build warning

2018-12-14 Thread Brendan Higgins

On Fri, Dec 14, 2018 at 2:12 PM Arnd Bergmann  wrote:
>
> Upstream commit 3e9efc3299dd ("i2c: aspeed: Handle master/slave combined irq 
> events
> properly") reworked the interrupt handling and fixed a warning in the process:
>
> drivers/i2c/busses/i2c-aspeed.c: In function 'aspeed_i2c_bus_irq':
> drivers/i2c/busses/i2c-aspeed.c:567:1: error: label 'out' defined but not 
> used [-Werror=unused-label]
>
> The warning is still present in v4.19.8 and can be fixed either by applying
> that original patch, or by adding a simple #ifdef.
>
> Here, I choose the second simpler option as the original patch seems too
> invasive for a stable backport.
>
> Signed-off-by: Arnd Bergmann 

Thanks!

Reviewed-by: Brendan Higgins

[rfd] saving old mice -- button glitching/debouncing

2018-12-14 Thread Pavel Machek


I believe I have hardware problem, but I'm kind of hoping software
could help me...?

Mouse wheel on my machine started glitching on my machine, generating
double-clicks when I click it once. Which unfortunately is quite
annoying: texts are pasted twice, two tabs are closed instead of one,


Event: time 1544733054.903129, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
Event: time 1544733054.903129, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 1
Event: time 1544733054.903129, -- EV_SYN 
1544733054.967251, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
Event: time 1544733054.967251, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 0
Event: time 1544733054.967251, -- EV_SYN 
Event: time 1544733054.975144, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
Event: time 1544733054.975144, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 1
Event: time 1544733054.975144, -- EV_SYN 
 : time 1544733065.127190, type 4 (EV_MSC), code 4 (MSC_SCAN), value 90003
Event: time 1544733065.127190, type 1 (EV_KEY), code 274 (BTN_MIDDLE), value 0
Event: time 1544733065.127190, -- EV_SYN 

Now, I could just buy a new mouse, but it seems that most optical mice
die like this... so maybe it would be nice to have an option to
debounce the buttons, so that the useful life of mice is extended a
bit?

(So... I have two mice with that fault -- cheap to replace, but button
in thinkpad X220 started doing that, too. That one will not be so
cheap to fix :-( ).

It is possible that some X versions already do something like this.

Patch is obviously not ready; but:

a) would it be useful to people

b) would it be acceptable if done properly? (cmd line option to
enable, avoiding duplicate/wrong events?)

Thanks,
Pavel
Signed-off-by: Pavel Machek 

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 3304aaa..ce0d081 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -40,6 +40,11 @@ static DEFINE_IDA(input_ida);
 static LIST_HEAD(input_dev_list);
 static LIST_HEAD(input_handler_list);
 
+static void input_repeat_key(struct timer_list *t);
+static void input_debounce_key(struct timer_list *t);
+
+static int debounce = 20;
+
 /*
  * input_mutex protects access to both input_dev_list and input_handler_list.
  * This also causes input_[un]register_device and input_[un]register_handler
@@ -77,6 +82,7 @@ static void input_start_autorepeat(struct input_dev *dev, int 
code)
if (test_bit(EV_REP, dev->evbit) &&
dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] &&
dev->timer.function) {
+   dev->timer.function = input_repeat_key;
dev->repeat_key = code;
mod_timer(>timer,
  jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]));
@@ -88,18 +94,42 @@ static void input_stop_autorepeat(struct input_dev *dev)
del_timer(>timer);
 }
 
+static void input_start_debounce(struct input_dev *dev, int code)
+{
+   dev->timer.function = input_debounce_key;
+   dev->debounce_key = code;
+   mod_timer(>timer,
+ jiffies + msecs_to_jiffies(debounce));
+}
+
+static void input_stop_debounce(struct input_dev *dev)
+{
+   del_timer(>timer);
+   dev->debounce_key = -1;
+}
+
 /*
  * Pass event first through all filters and then, if event has not been
  * filtered out, through all open handles. This function is called with
  * dev->event_lock held and interrupts disabled.
  */
-static unsigned int input_to_handler(struct input_handle *handle,
+static unsigned int input_to_handler(struct input_dev *dev, struct 
input_handle *handle,
struct input_value *vals, unsigned int count)
 {
struct input_handler *handler = handle->handler;
struct input_value *end = vals;
struct input_value *v;
 
+   if (!test_bit(EV_REP, dev->evbit) && test_bit(EV_KEY, dev->evbit) && 
debounce)
+   for (v = vals; v != vals + count; v++) {
+   if (v->type == EV_KEY && v->value == 0 && 
dev->debounce_key == -1) {
+   input_start_debounce(dev, v->code);
+   v->code = -2;
+   }
+   if (v->type == EV_KEY && v->value == 1 && 
dev->debounce_key == v->code)
+   input_stop_debounce(dev);
+   }
+
if (handler->filter) {
for (v = vals; v != vals + count; v++) {
if (handler->filter(handle, v->type, v->code, v->value))
@@ -117,8 +147,9 @@ static unsigned int input_to_handler(struct input_handle 
*handle,
if (handler->events)
handler->events(handle, vals, count);
else if (handler->event)
-   for (v = vals; v != vals + count; v++)
+   for (v = vals; v != vals + count; v++) {

Re: [PATCH v2 0/8] net: y2038-safe socket timestamps

2018-12-14 Thread David Miller

From: Deepa Dinamani 
Date: Tue, 11 Dec 2018 12:25:12 -0800

> The series introduces new socket timestamps that are
> y2038 safe.

Please address Willem's feedback, thank you.

Re: [PATCH v9 0/7] KVM: x86: Allow Qemu/KVM to use PVH entry point

2018-12-14 Thread Boris Ostrovsky

On 12/10/18 2:05 PM, Maran Wilson wrote:
> For certain applications it is desirable to rapidly boot a KVM virtual
> machine. In cases where legacy hardware and software support within the
> guest is not needed, Qemu should be able to boot directly into the
> uncompressed Linux kernel binary without the need to run firmware.
>
> There already exists an ABI to allow this for Xen PVH guests and the ABI
> is supported by Linux and FreeBSD:
>
>https://xenbits.xen.org/docs/unstable/misc/pvh.html
>
> This patch series would enable Qemu to use that same entry point for
> booting KVM guests.
>


Applied to for-linus-4.21


-boris

Re: [LKP] [mm] ac5b2c1891: vm-scalability.throughput -61.3% regression

2018-12-14 Thread Mel Gorman

On Fri, Dec 14, 2018 at 01:04:11PM -0800, David Rientjes wrote:
> On Wed, 12 Dec 2018, Vlastimil Babka wrote:
> 
> > > Regarding the role of direct reclaim in the allocator, I think we need 
> > > work on the feedback from compaction to determine whether it's 
> > > worthwhile.  
> > > That's difficult because of the point I continue to bring up: 
> > > isolate_freepages() is not necessarily always able to access this freed 
> > > memory.
> > 
> > That's one of the *many* reasons why having free base pages doesn't
> > guarantee compaction success. We can and will improve on that. But I
> > don't think it would be e.g. practical to check the pfns of free pages
> > wrt compaction scanner positions and decide based on that.
> 
> Yeah, agreed.  Rather than proposing that memory is only reclaimed if its 
> known that it can be accessible to isolate_freepages(), I'm wondering 
> about the implementation of the freeing scanner entirely.
> 
> In other words, I think there is a lot of potential stranding that occurs 
> for both scanners that could otherwise result in completely free 
> pageblocks.  If there a single movable page present near the end of the 
> zone in an otherwise fully free pageblock, surely we can do better than 
> the current implementation that would never consider this very easy to 
> compact memory.
> 

While it's somewhat premature, I posted a series before I had a full set
of results because it uses free lists to reduce searches and reduces
inference between multiple scanners. Preliminary results indicated it
boosted allocation success rates by 20%ish, reduced migration scanning
by 99% and free scanning by 27%.

> The same problem occurs for the migration scanner where we can iterate 
> over a ton of free memory that is never considered a suitable migration 
> target.  The implementation that attempts to migrate all memory toward the 
> end of the zone penalizes the freeing scanner when it is reset: we just 
> iterate over a ton of used pages.
> 

Yes, partially addressed in series. It can be improved significantly but it
hit a boundary condition near the points where compaction scanners meet. I
dropped the patch in question as it needs more thought on how to deal
with the boundary condition without remigrating the blocks close to it.
Besides, at 14 patches, it would probably be best to get that reviewed
and finalised before building upon it further so review would be welcome.

> Has anybody tried a migration scanner that isn't linearly based, rather 
> finding the highest-order free page of the same migratetype, iterating the 
> pages of its pageblock, and using this to determine whether the actual 
> migration will be worthwhile or not?  I could imagine pageblock_skip being 
> repurposed for this as the heuristic.
> 

Yes, but it has downsides. Redoing the same work on pageblocks, tracking
state and tracking the exit conditions are tricky. I think it's best to
squeeze the most out of the linear scanning first and the series is the
first step in that.

> It would be interesting to know if anybody has tried using the per-zone 
> free_area's to determine migration targets and set a bit if it should be 
> considered a migration source or a migration target.  If all pages for a 
> pageblock are not on free_areas, they are fully used.
> 

Series has patches which implement something similar to this idea.

-- 
Mel Gorman
SUSE Labs

Re: [PATCH v1 2/2] usb:cdns3 Add Cadence USB3 DRD Driver

2018-12-14 Thread kbuild test robot

Hi Pawel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on usb/usb-testing]
[also build test WARNING on v4.20-rc6 next-20181214]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Pawel-Laszczak/dt-bindings-add-binding-for-USBSS-DRD-controller/20181211-025348
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git 
usb-testing
config: m68k-allmodconfig (attached as .config)
compiler: m68k-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=m68k 

All warnings (new ones prefixed by >>):

   drivers/usb//cdns3/host.c: In function 'cdns3_host_init':
>> drivers/usb//cdns3/host.c:65:5: warning: "CONFIG_PM" is not defined, 
>> evaluates to 0 [-Wundef]
#if CONFIG_PM
^

vim +/CONFIG_PM +65 drivers/usb//cdns3/host.c

53  
54  int cdns3_host_init(struct cdns3 *cdns)
55  {
56  struct cdns3_role_driver *rdrv;
57  
58  rdrv = devm_kzalloc(cdns->dev, sizeof(*rdrv), GFP_KERNEL);
59  if (!rdrv)
60  return -ENOMEM;
61  
62  rdrv->start = __cdns3_host_init;
63  rdrv->stop  = cdns3_host_exit;
64  rdrv->state = CDNS3_ROLE_STATE_INACTIVE;
  > 65  #if CONFIG_PM

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH 13/14] mm, compaction: Capture a page under direct compaction

2018-12-14 Thread Mel Gorman

Compaction is inherently race-prone as a suitable page freed during compaction
can be allocated by any parallel task. This patch uses a capture_control
structure to isolate a page immediately when it is freed by a direct compactor
in the slow path of the page allocator.

4.20.0-rc6 4.20.0-rc6
 findfree-v1r8   capture-v1r8
Amean fault-both-3  2911.07 (   0.00%) 2898.64 (   0.43%)
Amean fault-both-5  4692.96 (   0.00%) 4296.58 (   8.45%)
Amean fault-both-7  6449.17 (   0.00%) 6203.55 (   3.81%)
Amean fault-both-12 9778.40 (   0.00%) 9309.13 (   4.80%)
Amean fault-both-1811756.92 (   0.00%) 6245.27 *  46.88%*
Amean fault-both-2413675.93 (   0.00%)15083.42 ( -10.29%)
Amean fault-both-3017195.41 (   0.00%)11498.60 *  33.13%*
Amean fault-both-3218150.08 (   0.00%) 9684.82 *  46.64%*

As expected, the biggest reduction in latency is when there are multiple
compaction instances that would previously compete for the same blocks.
THP allocation rates are also slightly higher.

   4.20.0-rc6 4.20.0-rc6
findfree-v1r8   capture-v1r8
Percentage huge-1 0.00 (   0.00%)0.00 (   0.00%)
Percentage huge-397.63 (   0.00%)   98.12 (   0.49%)
Percentage huge-596.11 (   0.00%)   98.83 (   2.84%)
Percentage huge-795.44 (   0.00%)   97.99 (   2.68%)
Percentage huge-12   95.36 (   0.00%)   99.00 (   3.82%)
Percentage huge-18   95.32 (   0.00%)   98.92 (   3.78%)
Percentage huge-24   95.13 (   0.00%)   99.08 (   4.15%)
Percentage huge-30   95.53 (   0.00%)   99.22 (   3.86%)
Percentage huge-32   94.94 (   0.00%)   98.97 (   4.25%)

And scan rates are reduced

Compaction migrate scanned2763428419002941
Compaction free scanned   5527951946395714

Signed-off-by: Mel Gorman 
---
 include/linux/compaction.h |  3 ++-
 include/linux/sched.h  |  4 
 kernel/sched/core.c|  3 +++
 mm/compaction.c| 31 +++--
 mm/internal.h  |  9 +++
 mm/page_alloc.c| 58 ++
 6 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 68250a57aace..b0d530cf46d1 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -95,7 +95,8 @@ extern int sysctl_compact_unevictable_allowed;
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, unsigned int alloc_flags,
-   const struct alloc_context *ac, enum compact_priority prio);
+   const struct alloc_context *ac, enum compact_priority prio,
+   struct page **page);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int classzone_idx);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8681905589f0..f1758ef4d1e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@ struct pid_namespace;
 struct pipe_inode_info;
 struct rcu_node;
 struct reclaim_state;
+struct capture_control;
 struct robust_list_head;
 struct sched_attr;
 struct sched_param;
@@ -964,6 +965,9 @@ struct task_struct {
 
struct io_context   *io_context;
 
+#ifdef CONFIG_COMPACTION
+   struct capture_control  *capture_control;
+#endif
/* Ptrace state: */
unsigned long   ptrace_message;
kernel_siginfo_t*last_siginfo;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f41fd2e0b6b..cd6d816aa40b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2177,6 +2177,9 @@ static void __sched_fork(unsigned long clone_flags, 
struct task_struct *p)
INIT_HLIST_HEAD(>preempt_notifiers);
 #endif
 
+#ifdef CONFIG_COMPACTION
+   p->capture_control = NULL;
+#endif
init_numa_balancing(clone_flags, p);
 }
 
diff --git a/mm/compaction.c b/mm/compaction.c
index ba3035dcc548..39d33b6d1172 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1949,7 +1949,8 @@ bool compaction_zonelist_suitable(struct alloc_context 
*ac, int order,
return false;
 }
 
-static enum compact_result compact_zone(struct compact_control *cc)
+static enum compact_result
+compact_zone(struct compact_control *cc, struct capture_control *capc)
 {
enum compact_result ret;
unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2086,6 +2087,11 @@ static enum compact_result compact_zone(struct 
compact_control *cc)
}
}
 
+   /* Stop if

[PATCH 14/14] mm, compaction: Do not direct compact remote memory

2018-12-14 Thread Mel Gorman

Remote compaction is expensive and possibly counter-productive. Locality
is expected to often have better performance characteristics than remote
high-order pages. For small allocations, it's expected that locality is
generally required or fallbacks are possible. For larger allocations such
as THP, they are forbidden at the time of writing but if __GFP_THISNODE
is ever removed, then it would still be preferable to fallback to small
local base pages over remote THP in the general case. kcompactd is still
woken via kswapd so compaction happens eventually.

Signed-off-by: Mel Gorman 
---
 mm/compaction.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/mm/compaction.c b/mm/compaction.c
index 39d33b6d1172..05fecd7227e4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2208,6 +2208,16 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, 
unsigned int order,
continue;
}
 
+   /*
+* Do not compact remote memory. It's expensive and high-order
+* small allocations are expected to prefer or require local
+* memory. Similarly, larger requests such as THP can fallback
+* to base pages in preference to remote huge pages if
+* __GFP_THISNODE is not specified
+*/
+   if (zone_to_nid(zone) != 
zone_to_nid(ac->preferred_zoneref->zone))
+   continue;
+
status = compact_zone_order(zone, order, gfp_mask, prio,
alloc_flags, ac_classzone_idx(ac), capture);
rc = max(status, rc);
-- 
2.16.4


-- 
Mel Gorman
SUSE Labs

[PATCH 12/14] mm, compaction: Use free lists to quickly locate a migration target

2018-12-14 Thread Mel Gorman

Similar to the migration scanner, this uses the free lists to quickly
locate a migration target. The search is different in that lower orders
will be searched for a suitable high PFN if necessary but the search
is still bound. This is justified on the grounds that the free scanner
typically scans linearly much more than the migration scanner.

If a free page is found, it is isolated and the full pageblock is scanned
for any remaining free pages. This is done so that it's possible to mark
the pageblock for skipping in the near future.

1-socket thpfioscale
4.20.0-rc6 4.20.0-rc6
  isolmig-v1r4  findfree-v1r8
Amean fault-both-3  2980.25 (   0.00%) 2911.07 (   2.32%)
Amean fault-both-5  4393.04 (   0.00%) 4692.96 (  -6.83%)
Amean fault-both-7  5797.16 (   0.00%) 6449.17 ( -11.25%)
Amean fault-both-12 9849.61 (   0.00%) 9778.40 (   0.72%)
Amean fault-both-1813816.96 (   0.00%)11756.92 (  14.91%)
Amean fault-both-2416255.20 (   0.00%)13675.93 *  15.87%*
Amean fault-both-3015741.25 (   0.00%)17195.41 (  -9.24%)
Amean fault-both-3216624.73 (   0.00%)18150.08 (  -9.18%)

The impact on latency is variable but the search is optimistic and
sensitive to the exact system state. Success rates are similar but
the major impact is to the rate of scanning

4.20.0-rc6  4.20.0-rc6
  isolmig-v1r4findfree-v1r8
Compaction migrate scanned2558745327634284
Compaction free scanned   8773589455279519

The free scan rates are reduced by 37%.

Signed-off-by: Mel Gorman 
---
 mm/compaction.c | 201 ++--
 1 file changed, 197 insertions(+), 4 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index b0309bf409b3..ba3035dcc548 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1117,7 +1117,7 @@ static inline bool compact_scanners_met(struct 
compact_control *cc)
 
 /* Reorder the free list to reduce repeated future searches */
 static void
-move_freelist_tail(struct list_head *freelist, struct page *freepage)
+move_freelist_head(struct list_head *freelist, struct page *freepage)
 {
LIST_HEAD(sublist);
 
@@ -1128,6 +1128,193 @@ move_freelist_tail(struct list_head *freelist, struct 
page *freepage)
}
 }
 
+static void
+move_freelist_tail(struct list_head *freelist, struct page *freepage)
+{
+   LIST_HEAD(sublist);
+
+   if (!list_is_last(freelist, >lru)) {
+   list_cut_before(, freelist, >lru);
+   if (!list_empty())
+   list_splice_tail(, freelist);
+   }
+}
+
+static void
+fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned 
long nr_isolated)
+{
+   unsigned long start_pfn, end_pfn;
+   struct page *page = pfn_to_page(pfn);
+
+   /* Do not search around if there are enough pages already */
+   if (cc->nr_freepages >= cc->nr_migratepages)
+   return;
+
+   /* Minimise scanning during async compaction */
+   if (cc->direct_compaction && cc->mode == MIGRATE_ASYNC)
+   return;
+
+   /* Pageblock boundaries */
+   start_pfn = pageblock_start_pfn(pfn);
+   end_pfn = min(start_pfn + pageblock_nr_pages, zone_end_pfn(cc->zone));
+
+   /* Scan before */
+   if (start_pfn != pfn) {
+   isolate_freepages_block(cc, _pfn, pfn, >freepages, 
false);
+   if (cc->nr_freepages >= cc->nr_migratepages)
+   return;
+   }
+
+   /* Scan after */
+   start_pfn = pfn + nr_isolated;
+   if (start_pfn != end_pfn)
+   isolate_freepages_block(cc, _pfn, end_pfn, 
>freepages, false);
+
+   /* Skip this pageblock in the future as it's full or nearly full */
+   if (cc->nr_freepages < cc->nr_migratepages)
+   set_pageblock_skip(page);
+}
+
+static unsigned long
+fast_isolate_freepages(struct compact_control *cc)
+{
+   unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+   unsigned int order_scanned = 0, nr_scanned = 0;
+   unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
+   unsigned long nr_isolated = 0;
+   unsigned long distance;
+   struct page *page = NULL;
+   bool scan_start = false;
+   int order;
+
+   /*
+* If starting the scan, use a deeper search and use the highest
+* PFN found if a suitable one is not found.
+*/
+   if (cc->free_pfn == pageblock_start_pfn(zone_end_pfn(cc->zone) - 1)) {
+   limit = pageblock_nr_pages >> 1;
+   scan_start = true;
+   }
+
+   /*
+* Preferred point is in the top quarter of the scan space but take
+* a pfn from the top half if the search is problematic.
+*/
+   distance = (cc->free_pfn - cc->migrate_pfn);
+   low_pfn =

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1344 matches

Mail list logo