Re: [PATCH 1/2] arm64: tegra: add ACONNECT, ADMA and AGIC nodes

2019-06-05 Thread Sameer Pujar



On 6/4/2019 2:37 PM, Jon Hunter wrote:

On 06/05/2019 12:58, Sameer Pujar wrote:

Add DT nodes for following devices on Tegra186 and Tegra194
  * ACONNECT
  * ADMA
  * AGIC

Signed-off-by: Sameer Pujar 
---
  arch/arm64/boot/dts/nvidia/tegra186.dtsi | 67 
  arch/arm64/boot/dts/nvidia/tegra194.dtsi | 67 
  2 files changed, 134 insertions(+)

diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 6e2b6ce..2c432c9 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -1153,4 +1153,71 @@
(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
interrupt-parent = <>;
};
+
+   aconnect@2a41000 {
+   compatible = "nvidia,tegra210-aconnect";
+   clocks = < TEGRA186_CLK_APE>,
+< TEGRA186_CLK_APB2APE>;
+   clock-names = "ape", "apb2ape";
+   power-domains = < TEGRA186_POWER_DOMAIN_AUD>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;

My recollection is that non-empty range is preferred from what Rob told
me in the past. See the Tegra210 binding.
I see at various places empty ranges property is used. From DT spec, it 
means

there is no translation in the address space between parent/child.

Also I looked at Tegra210 binding,
ranges = <0x702c 0x0 0x702c 0x0004>;
Should it be encoded as a triplet(child addr, parent addr, length)?



+   status = "disabled";
+
+   dma@293 {

Although I did not do this for Tegra210, I think that the preferred
convention is 'dma-controller@x'.

done.

+   compatible = "nvidia,tegra186-adma";
+   reg = <0x0 0x0293 0x0 0x5>;
+   interrupt-parent = <>;
+   interrupts =  ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ,
+ ;
+   #dma-cells = <1>;
+   clocks = < TEGRA186_CLK_AHUB>;
+   clock-names = "d_audio";
+   status = "disabled";
+   };
+
+   agic: agic@2a41000 {
+   compatible = "nvidia,tegra210-agic";
+   #interrupt-cells = <4>;

Why 4? This does not match the binding document for the arm-gic.

will fix.

+   interrupt-controller;
+   reg = <0x0 0x02a41000 0x0 0x1000>,
+ <0x0 0x02a42000 0x0 0x1000>;

I believe that the 2nd address range should have size 0x2000 for the CPU
interfaces.

done

+   interrupts = ;
+   clocks = < TEGRA186_CLK_APE>;
+   clock-names = "clk";
+   status = "disabled";
+   };
+   };
  };
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi 
b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index c77ca21..dcab504 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1054,4 +1054,71 @@
(GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
interrupt-parent = <>;
};
+
+   aconnect@2a41000 {
+   compatible = "nvidia,tegra210-aconnect";
+   clocks = < TEGRA194_CLK_APE>,
+< TEGRA194_CLK_APB2APE>;
+   clock-names = "ape", "apb2ape";
+   power-domains = < TEGRA194_POWER_DOMAIN_AUD>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;

Same as above.


+   status = "disabled";
+
+   dma@293 {

Same as 

[PATCH V2] i2c: busses: tegra: Add suspend-resume support

2019-06-05 Thread Bitan Biswas
Post suspend I2C registers have power on reset values. Before any
transfer initialize I2C registers to prevent I2C transfer timeout
and implement suspend and resume callbacks needed. Fix below errors
post suspend:

1) Tegra I2C transfer timeout during jetson tx2 resume:

[   27.520613] pca953x 1-0074: calling pca953x_resume+0x0/0x1b0 @ 2939, parent: 
i2c-1
[   27.633623] tegra-i2c 316.i2c: i2c transfer timed out
[   27.639162] pca953x 1-0074: Unable to sync registers 0x3-0x5. -110
[   27.645336] pca953x 1-0074: Failed to sync GPIO dir registers: -110
[   27.651596] PM: dpm_run_callback(): pca953x_resume+0x0/0x1b0 returns -110
[   27.658375] pca953x 1-0074: pca953x_resume+0x0/0x1b0 returned -110 after 
127152 usecs
[   27.666194] PM: Device 1-0074 failed to resume: error -110

2) Tegra I2C transfer timeout error on jetson Xavier post resume.

Remove i2c bus lock-unlock calls in resume callback as i2c_mark_adapter_*
(suspended-resumed) help ensure i2c core calls from client are not
executed before i2c-tegra resume.

Signed-off-by: Bitan Biswas 
---
 drivers/i2c/busses/i2c-tegra.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index ebaa78d..76b7926 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -1687,7 +1687,30 @@ static int tegra_i2c_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int tegra_i2c_suspend(struct device *dev)
+{
+   struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
+
+   i2c_mark_adapter_suspended(_dev->adapter);
+
+   return 0;
+}
+
+static int tegra_i2c_resume(struct device *dev)
+{
+   struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
+   int ret;
+
+   ret = tegra_i2c_init(i2c_dev, false);
+   if (ret)
+   return ret;
+
+   i2c_mark_adapter_resumed(_dev->adapter);
+   return 0;
+}
+
 static const struct dev_pm_ops tegra_i2c_pm = {
+   SET_SYSTEM_SLEEP_PM_OPS(tegra_i2c_suspend, tegra_i2c_resume)
SET_RUNTIME_PM_OPS(tegra_i2c_runtime_suspend, tegra_i2c_runtime_resume,
   NULL)
 };
-- 
2.7.4



Re: [PATCH RFC 00/10] RDMA/FS DAX truncate proposal

2019-06-05 Thread John Hubbard
On 6/5/19 6:45 PM, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> ... V1,000,000   ;-)
> 
> Pre-requisites:
>   John Hubbard's put_user_pages() patch series.[1]
>   Jan Kara's ext4_break_layouts() fixes[2]
> 
> Based on the feedback from LSFmm and the LWN article which resulted.  I've
> decided to take a slightly different tack on this problem.
> 
> The real issue is that there is no use case for a user to have RDMA pinn'ed
> memory which is then truncated.  So really any solution we present which:
> 
> A) Prevents file system corruption or data leaks
> ...and...
> B) Informs the user that they did something wrong
> 
> Should be an acceptable solution.
> 
> Because this is slightly new behavior.  And because this is gonig to be
> specific to DAX (because of the lack of a page cache) we have made the user
> "opt in" to this behavior.
> 
> The following patches implement the following solution.
> 
> 1) The user has to opt in to allowing GUP pins on a file with a layout lease
>(now made visible).
> 2) GUP will fail (EPERM) if a layout lease is not taken
> 3) Any truncate or hole punch operation on a GUP'ed DAX page will fail.
> 4) The user has the option of holding the layout lease to receive a SIGIO for
>notification to the original thread that another thread has tried to delete
>their data.  Furthermore this indicates that if the user needs to GUP the
>file again they will need to retake the Layout lease before doing so.
> 
> 
> NOTE: If the user releases the layout lease or if it has been broken by 
> another
> operation further GUP operations on the file will fail without re-taking the
> lease.  This means that if a user would like to register pieces of a file and
> continue to register other pieces later they would be advised to keep the
> layout lease, get a SIGIO notification, and retake the lease.
> 
> NOTE2: Truncation of pages which are not actively pinned will succeed.  
> Similar
> to accessing an mmap to this area GUP pins of that memory may fail.
> 

Hi Ira,

Wow, great to see this. This looks like basically the right behavior, IMHO.

1. We'll need man page additions, to explain it. In fact, even after a quick 
first
pass through, I'm vague on two points:

a) I'm not sure how this actually provides "opt-in to new behavior", because I 
don't see any CONFIG_* or boot time choices, and it looks like the new behavior 
just is there. That is, if user space doesn't set F_LAYOUT on a range, 
GUP FOLL_LONGTERM will now fail, which is new behavior. (Did I get that right?)

b) Truncate and hole punch behavior, with and without user space having a SIGIO
handler. (I'm sure this is obvious after another look through, but it might go
nicely in a man page.)

2. It *seems* like ext4, xfs are taken care of here, not just for the DAX case,
but for general RDMA on them? Or is there more that must be done?

3. Christophe Hellwig's unified gup patchset wreaks havoc in gup.c, and will
conflict violently, as I'm sure you noticed. :)


thanks,
-- 
John Hubbard
NVIDIA

> 
> A general overview follows for background.
> 
> It should be noted that one solution for this problem is to use RDMA's On
> Demand Paging (ODP).  There are 2 big reasons this may not work.
> 
>   1) The hardware being used for RDMA may not support ODP
>   2) ODP may be detrimental to the over all network (cluster or cloud)
>  performance
> 
> Therefore, in order to support RDMA to File system pages without On Demand
> Paging (ODP) a number of things need to be done.
> 
> 1) GUP "longterm" users need to inform the other subsystems that they have
>taken a pin on a page which may remain pinned for a very "long time".[3]
> 
> 2) Any page which is "controlled" by a file system needs to have special
>handling.  The details of the handling depends on if the page is page cache
>fronted or not.
> 
>2a) A page cache fronted page which has been pinned by GUP long term can 
> use a
>bounce buffer to allow the file system to write back snap shots of the 
> page.
>This is handled by the FS recognizing the GUP long term pin and making a 
> copy
>of the page to be written back.
>   NOTE: this patch set does not address this path.
> 
>2b) A FS "controlled" page which is not page cache fronted is either easier
>to deal with or harder depending on the operation the filesystem is trying
>to do.
> 
>   2ba) [Hard case] If the FS operation _is_ a truncate or hole punch the
>   FS can no longer use the pages in question until the pin has been
>   removed.  This patch set presents a solution to this by introducing
>   some reasonable restrictions on user space applications.
> 
>   2bb) [Easy case] If the FS operation is _not_ a truncate or hole punch
>   then there is nothing which need be done.  Data is Read or Written
>   directly to the page.  This is an easy case which would currently work
>   if not for GUP long term pins being disabled.  Therefore 

Re: [PATCH v4 2/2] arm64: dts: qcom: Add Lenovo Miix 630

2019-06-05 Thread Lee Jones
On Tue, 23 Apr 2019, Jeffrey Hugo wrote:

> This adds the initial DT for the Lenovo Miix 630 laptop.  Supported
> functionality includes USB (host), microSD-card, keyboard, and trackpad.
> 
> Signed-off-by: Jeffrey Hugo 
> ---
>  arch/arm64/boot/dts/qcom/Makefile |   1 +
>  .../boot/dts/qcom/msm8998-clamshell.dtsi  | 278 ++
>  .../boot/dts/qcom/msm8998-lenovo-miix-630.dts |  30 ++

What's happening with this patch?

It's been on the list a while now.  I'm waiting for it to be accepted,
since there are patches I wish to submit which are based on it.

Who is responsible for merging these?

>  3 files changed, 309 insertions(+)
>  create mode 100644 arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
>  create mode 100644 arch/arm64/boot/dts/qcom/msm8998-lenovo-miix-630.dts
> 
> diff --git a/arch/arm64/boot/dts/qcom/Makefile 
> b/arch/arm64/boot/dts/qcom/Makefile
> index 21d548f02d39..c3e4307bcbd4 100644
> --- a/arch/arm64/boot/dts/qcom/Makefile
> +++ b/arch/arm64/boot/dts/qcom/Makefile
> @@ -6,6 +6,7 @@ dtb-$(CONFIG_ARCH_QCOM)   += msm8916-mtp.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += msm8992-bullhead-rev-101.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += msm8994-angler-rev-101.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += msm8996-mtp.dtb
> +dtb-$(CONFIG_ARCH_QCOM)  += msm8998-lenovo-miix-630.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += msm8998-mtp.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += sdm845-mtp.dtb
>  dtb-$(CONFIG_ARCH_QCOM)  += qcs404-evb-1000.dtb
> diff --git a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi 
> b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
> new file mode 100644
> index ..1a341d4b1597
> --- /dev/null
> +++ b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi
> @@ -0,0 +1,278 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2019, Jeffrey Hugo. All rights reserved. */
> +
> +/*
> + * Common include for MSM8998 clamshell devices, ie the Lenovo Miix 630,
> + * Asus NovaGo TP370QL, and HP Envy x2.  All three devices are basically the
> + * same, with differences in peripherals.
> + */
> +
> +#include "msm8998.dtsi"
> +#include "pm8998.dtsi"
> +#include "pm8005.dtsi"
> +
> +/ {
> + chosen {
> + };
> +
> + thermal-zones {
> + battery-thermal {
> + polling-delay-passive = <250>;
> + polling-delay = <1000>;
> +
> + thermal-sensors = < 0>;
> +
> + trips {
> + battery_crit: trip0 {
> + temperature = <6>;
> + hysteresis = <2000>;
> + type = "critical";
> + };
> + };
> + };
> +
> + skin-thermal {
> + polling-delay-passive = <250>;
> + polling-delay = <1000>;
> +
> + thermal-sensors = < 5>;
> +
> + trips {
> + skin_alert: trip0 {
> + temperature = <44000>;
> + hysteresis = <2000>;
> + type = "passive";
> + };
> +
> + skip_crit: trip1 {
> + temperature = <7>;
> + hysteresis = <2000>;
> + type = "critical";
> + };
> + };
> + };
> + };
> +
> + vph_pwr: vph-pwr-regulator {
> + compatible = "regulator-fixed";
> + regulator-name = "vph_pwr";
> + regulator-always-on;
> + regulator-boot-on;
> + };
> +};
> +
> + {
> + status = "okay";
> +
> + vdda-pll-supply = <_l12a_1p8>;
> + vdda-phy-dpdm-supply = <_l24a_3p075>;
> +};
> +
> +_requests {
> + pm8998-regulators {
> + compatible = "qcom,rpm-pm8998-regulators";
> +
> + vdd_s1-supply = <_pwr>;
> + vdd_s2-supply = <_pwr>;
> + vdd_s3-supply = <_pwr>;
> + vdd_s4-supply = <_pwr>;
> + vdd_s5-supply = <_pwr>;
> + vdd_s6-supply = <_pwr>;
> + vdd_s7-supply = <_pwr>;
> + vdd_s8-supply = <_pwr>;
> + vdd_s9-supply = <_pwr>;
> + vdd_s10-supply = <_pwr>;
> + vdd_s11-supply = <_pwr>;
> + vdd_s12-supply = <_pwr>;
> + vdd_s13-supply = <_pwr>;
> + vdd_l1_l27-supply = <_s7a_1p025>;
> + vdd_l2_l8_l17-supply = <_s3a_1p35>;
> + vdd_l3_l11-supply = <_s7a_1p025>;
> + vdd_l4_l5-supply = <_s7a_1p025>;
> + vdd_l6-supply = <_s5a_2p04>;
> + vdd_l7_l12_l14_l15-supply = <_s5a_2p04>;
> + vdd_l9-supply = <_pwr>;
> + vdd_l10_l23_l25-supply = <_pwr>;
> + 

Re: [PATCH] mtd: spi-nor: Add prep/unprep for spi_nor_resume

2019-06-05 Thread Tudor.Ambarus
Hi, Han,

On 04/25/2019 12:08 AM, Han Xu wrote:
> External E-Mail
> 
> 
> In the new implemented spi_nor_resume function, the spi_nor_init()
> should be braced by prep/unprep functions._
> 

Would you please explain why this is needed? Have you tried a suspend/resume
cycle while a write was in progress and it failed?

Thanks,
ta


Re: [PATCH 1/3] KVM: LAPIC: Make lapic timer unpinned when timer is injected by posted-interrupt

2019-06-05 Thread Wanpeng Li
On Wed, 5 Jun 2019 at 21:04, Paolo Bonzini  wrote:
>
> On 05/06/19 12:09, Wanpeng Li wrote:
> > +static inline bool posted_interrupt_inject_timer(struct kvm_vcpu *vcpu)
> > +{
> > + return (kvm_x86_ops->pi_inject_timer_enabled(vcpu) &&
> > + kvm_mwait_in_guest(vcpu->kvm));
> > +}
> > +
>
> Here you need to check kvm_halt_in_guest, not kvm_mwait_in_guest,
> because you need to go through kvm_apic_expired if the guest needs to be
> woken up from kvm_vcpu_block.
>
> There is a case when you get to kvm_vcpu_block with kvm_halt_in_guest,
> which is when the guest disables asynchronous page faults.  Currently,
> timer interrupts are delivered while apf.halted = true, with this change

You are right. I check it in v2 2/3.

> they wouldn't.  I would just disable KVM_REQ_APF_HALT in
> kvm_can_do_async_pf if kvm_halt_in_guest is true, let me send a patch
> for that later.
>
> When you do this, I think you don't need the
> kvm_x86_ops->pi_inject_timer_enabled check at all, because if we know

I still keep check mwait and apicv in v2, since w/o mwait exposed, the
emulated timer can't be offload(thanks to preemption timer is
disabled). In addition,  w/o posted-interrupt, we can't avoid the
timer fire vmexit.

> that the vCPU cannot be asleep in kvm_vcpu_block, then we can inject the
> timer interrupt immediately with __apic_accept_irq (if APICv is
> disabled, it will set IRR and do kvm_make_request + kvm_vcpu_kick).
>
> You can keep the module parameter, mostly for debugging reasons, but
> please move it from kvm-intel to kvm, and add something like
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 123ea07a3f3b..1cc7973c382e 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -14,6 +14,11 @@
>  static cpumask_var_t housekeeping_mask;
>  static unsigned int housekeeping_flags;
>
> +bool housekeeping_enabled(enum hk_flags flags)
> +{
> +   return !!(housekeeping_flags & flags);
> +}
> +
>  int housekeeping_any_cpu(enum hk_flags flags)
>  {
> if (static_branch_unlikely(_overridden))
>
> so that the default for the module parameter can be
> housekeeping_enabled(HK_FLAG_TIMER).

Agreed. Thanks for the quick review. :)

Regards,
Wanpeng Li


Re: [PATCH V1] i2c: busses: tegra: Add suspend-resume support

2019-06-05 Thread Bitan Biswas




On 5/31/19 5:43 AM, Dmitry Osipenko wrote:

31.05.2019 11:50, Bitan Biswas пишет:



On 5/30/19 4:27 AM, Dmitry Osipenko wrote:

30.05.2019 8:55, Bitan Biswas пишет:

Post suspend I2C registers have power on reset values. Before any
transfer initialize I2C registers to prevent I2C transfer timeout
and implement suspend and resume callbacks needed. Fix below errors
post suspend:

1) Tegra I2C transfer timeout during jetson tx2 resume:

[   27.520613] pca953x 1-0074: calling pca953x_resume+0x0/0x1b0 @
2939, parent: i2c-1
[   27.633623] tegra-i2c 316.i2c: i2c transfer timed out
[   27.639162] pca953x 1-0074: Unable to sync registers 0x3-0x5. -110
[   27.645336] pca953x 1-0074: Failed to sync GPIO dir registers: -110
[   27.651596] PM: dpm_run_callback(): pca953x_resume+0x0/0x1b0
returns -110
[   27.658375] pca953x 1-0074: pca953x_resume+0x0/0x1b0 returned -110
after 127152 usecs
[   27.666194] PM: Device 1-0074 failed to resume: error -110

2) Tegra I2C transfer timeout error on jetson Xavier post resume.

Signed-off-by: Bitan Biswas 
---
   drivers/i2c/busses/i2c-tegra.c | 24 
   1 file changed, 24 insertions(+)

diff --git a/drivers/i2c/busses/i2c-tegra.c
b/drivers/i2c/busses/i2c-tegra.c
index ebaa78d..f6a377f 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -1687,9 +1687,33 @@ static int tegra_i2c_remove(struct
platform_device *pdev)
   }
     #ifdef CONFIG_PM_SLEEP
+static int tegra_i2c_suspend(struct device *dev)
+{
+    struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
+
+    i2c_mark_adapter_suspended(_dev->adapter);
+
+    return 0;
+}
+
+static int tegra_i2c_resume(struct device *dev)
+{
+    struct tegra_i2c_dev *i2c_dev = dev_get_drvdata(dev);
+    int ret;
+
+    i2c_lock_bus(_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
+    ret = tegra_i2c_init(i2c_dev, false);
+    i2c_unlock_bus(_dev->adapter, I2C_LOCK_ROOT_ADAPTER);


Why the locking is needed here?


async resume could result in stress test issues if some client accesses
the i2c instance. This ensures the i2c instance is locked till the
initialization is complete.


1) This doesn't make much sense.. if client could access I2C during of
tegra_i2c_init execution, then what stops it to perform the access
before the lock is taken?
Client resumes will start after I2C instance resume because of driver 
dependency. Since lock is the first call in i2c-tegra I believe I2C 
calls of client will not start.




2) The whole point of the i2c_mark_adapter_* API is to catch those
faulty clients that have a broken suspend-resume sequence. Client will
get a -ESHUTDOWN on trying to issue I2C transfer while controller is
marked as suspended.
i2c lock bus calls were used in the resume callback implementation that 
was reverted few months back. Hence, these were added in this patch 
which should be more like a revert-of-revert .


But I feel probably your point also makes sense. Old resume callback did 
not have i2c_mark_adapter_* calls. Based on the i2c_adapter_mark_* API 
documentation it should be taking care that core i2c calls from client 
are not started. I plan to update the patch and remove the lock-unlock 
guards in resume callback.





3) Please don't use async suspend-resume where it doesn't make sense.
This is a system wide setting. /sys/power/pm_async by default is 1 and 
there is no driver specific change in this patch to choose async 
suspend-resume.




Corollary: you should drop the locking because it doesn't do anything
useful.

I did some basic suspend resume tests and do not see any problems 
removing the i2c_lock_bus call you pointed out.


-Thanks,
 Bitan



linux-next: manual merge of the pidfd tree with Linus' tree

2019-06-05 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the pidfd tree got a conflict in:

  tools/testing/selftests/pidfd/pidfd_test.c

between commit:

  1fcd0eb356ad ("tests: fix pidfd-test compilation")

from Linus' tree and commit:

  233ad92edbea ("pidfd: add polling selftests")

from the pidfd tree.

I fixed it up (I just used the latter) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.



-- 
Cheers,
Stephen Rothwell


pgpyPqn8sHRZb.pgp
Description: OpenPGP digital signature


Re: [PATCH] kbuild: s390, do not remove autogenerated headers on clean

2019-06-05 Thread Jiri Slaby
On 05. 06. 19, 13:29, Masahiro Yamada wrote:
> This issue had already been fixed by the side-effect of
> another patch, which is in linux-next.

Great, thanks.

> See this:
> 
> 
> commit 10077c9f2dae1afabab2808a0326ecf3e8e5a82c
> Author: Masahiro Yamada 
> Date:   Fri May 17 16:54:27 2019 +0900
> 
> s390: drop meaningless 'targets' from tools Makefile
> 
> 'targets' should be specified to include .*.cmd files to evaluate
> if_changed or friends.
> 
> Here, facility-defs.h and dis-defs.h are generated by filechk.
> 
> Because filechk does not generate .*.cmd file, the 'targets' addition
> is meaningless. The filechk correctly updates the target when its
> content is changed.
> 
> Signed-off-by: Masahiro Yamada 
> Signed-off-by: Heiko Carstens 


-- 
js
suse labs


WARNING: refcount bug in css_task_iter_next

2019-06-05 Thread syzbot

Hello,

syzbot found the following crash on:

HEAD commit:b2924447 Add linux-next specific files for 20190605
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=11c492d2a0
kernel config:  https://syzkaller.appspot.com/x/.config?x=4248d6bc70076f7d
dashboard link: https://syzkaller.appspot.com/bug?extid=644dc16442b3a35f3629
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+644dc16442b3a35f3...@syzkaller.appspotmail.com

[ cut here ]
refcount_t: increment on 0; use-after-free.
WARNING: CPU: 0 PID: 4184 at lib/refcount.c:156 refcount_inc_checked  
lib/refcount.c:156 [inline]
WARNING: CPU: 0 PID: 4184 at lib/refcount.c:156  
refcount_inc_checked+0x61/0x70 lib/refcount.c:154

Kernel panic - not syncing: panic_on_warn set ...
CPU: 0 PID: 4184 Comm: syz-executor.3 Not tainted 5.2.0-rc3-next-20190605 #9
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 panic+0x2cb/0x744 kernel/panic.c:219
 __warn.cold+0x20/0x4d kernel/panic.c:576
 report_bug+0x263/0x2b0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:179 [inline]
 fixup_bug arch/x86/kernel/traps.c:174 [inline]
 do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272
 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:986
RIP: 0010:refcount_inc_checked lib/refcount.c:156 [inline]
RIP: 0010:refcount_inc_checked+0x61/0x70 lib/refcount.c:154
Code: 1d db 0e 68 06 31 ff 89 de e8 1b c4 3b fe 84 db 75 dd e8 d2 c2 3b fe  
48 c7 c7 e0 b6 c4 87 c6 05 bb 0e 68 06 01 e8 dd db 0d fe <0f> 0b eb c1 90  
90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41

RSP: 0018:8882000ef290 EFLAGS: 00010082
RAX:  RBX:  RCX: 
RDX: 0004 RSI: 815b04b6 RDI: ed104001de44
RBP: 8882000ef2a0 R08: 8882035744c0 R09: ed1015d040f1
R10: ed1015d040f0 R11: 8880ae820787 R12: 88804436a660
R13: 8882000ef368 R14: 88804436a640 R15: 11104001de5d
 css_task_iter_next+0xf9/0x190 kernel/cgroup/cgroup.c:4568
 mem_cgroup_scan_tasks+0xbb/0x180 mm/memcontrol.c:1168
 select_bad_process mm/oom_kill.c:374 [inline]
 out_of_memory mm/oom_kill.c:1088 [inline]
 out_of_memory+0x6b2/0x1280 mm/oom_kill.c:1035
 mem_cgroup_out_of_memory+0x1ca/0x230 mm/memcontrol.c:1573
 mem_cgroup_oom mm/memcontrol.c:1905 [inline]
 try_charge+0xfbe/0x1480 mm/memcontrol.c:2468
 mem_cgroup_try_charge+0x24d/0x5e0 mm/memcontrol.c:6073
 __add_to_page_cache_locked+0x425/0xe70 mm/filemap.c:839
 add_to_page_cache_lru+0x1cb/0x760 mm/filemap.c:916
 pagecache_get_page+0x357/0x850 mm/filemap.c:1655
 grab_cache_page_write_begin+0x75/0xb0 mm/filemap.c:3157
 simple_write_begin+0x36/0x2c0 fs/libfs.c:438
 generic_perform_write+0x22a/0x520 mm/filemap.c:3207
 __generic_file_write_iter+0x25e/0x630 mm/filemap.c:3336
 generic_file_write_iter+0x360/0x610 mm/filemap.c:3368
 call_write_iter include/linux/fs.h:1870 [inline]
 new_sync_write+0x4d3/0x770 fs/read_write.c:483
 __vfs_write+0xe1/0x110 fs/read_write.c:496
 vfs_write+0x268/0x5d0 fs/read_write.c:558
 ksys_write+0x14f/0x290 fs/read_write.c:611
 __do_sys_write fs/read_write.c:623 [inline]
 __se_sys_write fs/read_write.c:620 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:620
 do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x459279
Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00

RSP: 002b:7f9a334d9c78 EFLAGS: 0246 ORIG_RAX: 0001
RAX: ffda RBX: 0003 RCX: 00459279
RDX: 03d3427e RSI: 2180 RDI: 0004
RBP: 0075bf20 R08:  R09: 
R10:  R11: 0246 R12: 7f9a334da6d4
R13: 004c8ee8 R14: 004dfbb0 R15: 
Shutting down cpus with NMI
Kernel Offset: disabled
Rebooting in 86400 seconds..


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.


Re: [PATCH 2/3] KVM: LAPIC: lapic timer is injected by posted interrupt

2019-06-05 Thread Wanpeng Li
On Wed, 5 Jun 2019 at 20:30, Paolo Bonzini  wrote:
>
> On 05/06/19 12:09, Wanpeng Li wrote:
> > +static void apic_timer_expired_pi(struct kvm_lapic *apic)
> > +{
> > + struct kvm_timer *ktimer = >lapic_timer;
> > +
> > + kvm_apic_local_deliver(apic, APIC_LVTT);
> > + if (apic_lvtt_tscdeadline(apic))
> > + ktimer->tscdeadline = 0;
> > + if (apic_lvtt_oneshot(apic)) {
> > + ktimer->tscdeadline = 0;
> > + ktimer->target_expiration = 0;
> > + }
> > +}
>
> Please rename this function to kvm_apic_inject_pending_timer_irqs and
> call it from kvm_inject_apic_timer_irqs.
>
> Then apic_timer_expired can just do
>
> if (atomic_read(>lapic_timer.pending))
> return;
>
> +   if (unlikely(posted_interrupt_inject_timer(apic->vcpu))) {
> +   kvm_apic_inject_pending_timer_irqs(apic);
> +   return;
> +   }

Do it in v2.

Regards,
Wanpeng Li


[PATCH v2 2/3] KVM: LAPIC: lapic timer interrupt is injected by posted interrupt

2019-06-05 Thread Wanpeng Li
From: Wanpeng Li 

Dedicated instances are currently disturbed by unnecessary jitter due 
to the emulated lapic timers fire on the same pCPUs which vCPUs resident.
There is no hardware virtual timer on Intel for guest like ARM. Both 
programming timer in guest and the emulated timer fires incur vmexits.
This patch tries to avoid vmexit which is incurred by the emulated 
timer fires in dedicated instance scenario. 

When nohz_full is enabled in dedicated instances scenario, the emulated 
timers can be offload to the nearest busy housekeeping cpus since APICv 
is really common in recent years. The guest timer interrupt is injected 
by posted-interrupt which is delivered by housekeeping cpu once the emulated 
timer fires. 

3%~5% redis performance benefit can be observed on Skylake server.

Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/lapic.c | 32 +---
 arch/x86/kvm/x86.h   |  5 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 09b7387..c08e5a8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -133,6 +133,12 @@ static inline bool 
posted_interrupt_inject_timer_enabled(struct kvm_vcpu *vcpu)
kvm_mwait_in_guest(vcpu->kvm);
 }
 
+static inline bool can_posted_interrupt_inject_timer(struct kvm_vcpu *vcpu)
+{
+   return posted_interrupt_inject_timer_enabled(vcpu) &&
+   !vcpu_halt_in_guest(vcpu);
+}
+
 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -1441,6 +1447,19 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
}
 }
 
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+   struct kvm_timer *ktimer = >lapic_timer;
+
+   kvm_apic_local_deliver(apic, APIC_LVTT);
+   if (apic_lvtt_tscdeadline(apic))
+   ktimer->tscdeadline = 0;
+   if (apic_lvtt_oneshot(apic)) {
+   ktimer->tscdeadline = 0;
+   ktimer->target_expiration = 0;
+   }
+}
+
 static void apic_timer_expired(struct kvm_lapic *apic)
 {
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1450,6 +1469,11 @@ static void apic_timer_expired(struct kvm_lapic *apic)
if (atomic_read(>lapic_timer.pending))
return;
 
+   if (unlikely(can_posted_interrupt_inject_timer(apic->vcpu))) {
+   kvm_apic_inject_pending_timer_irqs(apic);
+   return;
+   }
+
atomic_inc(>lapic_timer.pending);
kvm_set_pending_timer(vcpu);
 
@@ -2386,13 +2410,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
 
if (atomic_read(>lapic_timer.pending) > 0) {
-   kvm_apic_local_deliver(apic, APIC_LVTT);
-   if (apic_lvtt_tscdeadline(apic))
-   apic->lapic_timer.tscdeadline = 0;
-   if (apic_lvtt_oneshot(apic)) {
-   apic->lapic_timer.tscdeadline = 0;
-   apic->lapic_timer.target_expiration = 0;
-   }
+   kvm_apic_inject_pending_timer_irqs(apic);
atomic_set(>lapic_timer.pending, 0);
}
 }
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index aa539d6..74c86cb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -364,4 +364,9 @@ static inline bool kvm_pat_valid(u64 data)
 void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
 void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
 
+static inline bool vcpu_halt_in_guest(struct kvm_vcpu *vcpu)
+{
+   return vcpu->arch.apf.halted;
+}
+
 #endif
-- 
2.7.4



[PATCH v2 1/3] KVM: LAPIC: Make lapic timer unpinned when timer is injected by posted-interrupt

2019-06-05 Thread Wanpeng Li
From: Wanpeng Li 

Make lapic timer unpinned when timer is injected by posted-interrupt,
the emulated timer can be offload to the housekeeping cpus.

The host admin should fine tuned, e.g. dedicated instances scenario 
w/ nohz_full cover the pCPUs which vCPUs resident, several pCPUs 
surplus for housekeeping, disable mwait/hlt/pause vmexits to occupy 
the pCPUs, fortunately preemption timer is disabled after mwait is 
exposed to guest which makes emulated timer offload can be possible. 

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/lapic.c| 20 
 arch/x86/kvm/x86.c  |  5 +
 arch/x86/kvm/x86.h  |  2 ++
 include/linux/sched/isolation.h |  2 ++
 kernel/sched/isolation.c|  6 ++
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fcf42a3..09b7387 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -127,6 +127,12 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
 }
 
+static inline bool posted_interrupt_inject_timer_enabled(struct kvm_vcpu *vcpu)
+{
+   return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
+   kvm_mwait_in_guest(vcpu->kvm);
+}
+
 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -1581,7 +1587,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-   hrtimer_start(>timer, expire, HRTIMER_MODE_ABS_PINNED);
+   hrtimer_start(>timer, expire,
+   posted_interrupt_inject_timer_enabled(vcpu) ?
+   HRTIMER_MODE_ABS : HRTIMER_MODE_ABS_PINNED);
} else
apic_timer_expired(apic);
 
@@ -1683,7 +1691,8 @@ static void start_sw_period(struct kvm_lapic *apic)
 
hrtimer_start(>lapic_timer.timer,
apic->lapic_timer.target_expiration,
-   HRTIMER_MODE_ABS_PINNED);
+   posted_interrupt_inject_timer_enabled(apic->vcpu) ?
+   HRTIMER_MODE_ABS : HRTIMER_MODE_ABS_PINNED);
 }
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -2320,7 +2329,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int 
timer_advance_ns)
apic->vcpu = vcpu;
 
hrtimer_init(>lapic_timer.timer, CLOCK_MONOTONIC,
-HRTIMER_MODE_ABS_PINNED);
+   posted_interrupt_inject_timer_enabled(vcpu) ?
+   HRTIMER_MODE_ABS : HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = 1000;
@@ -2509,7 +2519,9 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 
timer = >arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer))
-   hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+   hrtimer_start_expires(timer,
+   posted_interrupt_inject_timer_enabled(vcpu) ?
+   HRTIMER_MODE_ABS : HRTIMER_MODE_ABS_PINNED);
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6200d5a..2ef2394 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -155,6 +156,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
 static bool __read_mostly force_emulation_prefix = false;
 module_param(force_emulation_prefix, bool, S_IRUGO);
 
+bool __read_mostly pi_inject_timer = 0;
+module_param(pi_inject_timer, bool, S_IRUGO | S_IWUSR);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -7030,6 +7034,7 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
kvm_lapic_init();
+   pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
 #ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(_gtod_notifier);
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 275b3b6..aa539d6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -296,6 +296,8 @@ extern unsigned int min_timer_period_us;
 
 extern bool enable_vmware_backdoor;
 
+extern bool pi_inject_timer;
+
 extern struct static_key kvm_no_apic_vcpu;
 
 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index b0fb144..6fc5407 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -19,6 +19,7 @@ enum hk_flags {
 DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
 extern int housekeeping_any_cpu(enum hk_flags flags);
 extern const struct cpumask *housekeeping_cpumask(enum 

[PATCH v2 3/3] KVM: LAPIC: Ignore timer migration when lapic timer is injected by posted-interrupt

2019-06-05 Thread Wanpeng Li
From: Wanpeng Li 

When lapic timer is injected by posted-interrupt, the emulated timer is
offload to the housekeeping cpu. The timer interrupt will be delivered
properly, no need to migrate timer.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 arch/x86/kvm/lapic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c08e5a8..a3e4ca8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2532,7 +2532,8 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
struct hrtimer *timer;
 
-   if (!lapic_in_kernel(vcpu))
+   if (!lapic_in_kernel(vcpu) ||
+   posted_interrupt_inject_timer_enabled(vcpu))
return;
 
timer = >arch.apic->lapic_timer.timer;
-- 
2.7.4



[PATCH v2 0/3] KVM: LAPIC: Implement Exitless Timer

2019-06-05 Thread Wanpeng Li
Dedicated instances are currently disturbed by unnecessary jitter due 
to the emulated lapic timers fire on the same pCPUs which vCPUs resident.
There is no hardware virtual timer on Intel for guest like ARM. Both 
programming timer in guest and the emulated timer fires incur vmexits.
This patchset tries to avoid vmexit which is incurred by the emulated 
timer fires in dedicated instance scenario. 

When nohz_full is enabled in dedicated instances scenario, the unpinned 
timer will be moved to the nearest busy housekeepers after commit 444969223c8
("sched/nohz: Fix affine unpinned timers mess"). However, KVM always makes 
lapic timer pinned to the pCPU which vCPU residents, the reason is explained 
by commit 61abdbe0 (kvm: x86: make lapic hrtimer pinned). Actually, these 
emulated timers can be offload to the housekeeping cpus since APICv 
is really common in recent years. The guest timer interrupt is injected by 
posted-interrupt which is delivered by housekeeping cpu once the emulated 
timer fires. 

The host admin should fine tuned, e.g. dedicated instances scenario w/ 
nohz_full cover the pCPUs which vCPUs resident, several pCPUs surplus 
for housekeeping, disable mwait/hlt/pause vmexits to occupy the pCPUs, 
fortunately preemption timer is disabled after mwait is exposed to 
guest which makes emulated timer offload can be possible. 
3%~5% redis performance benefit can be observed on Skylake server.

v1 -> v2:
 * check vcpu_halt_in_guest
 * move module parameter from kvm-intel to kvm
 * add housekeeping_enabled
 * rename apic_timer_expired_pi to kvm_apic_inject_pending_timer_irqs

Wanpeng Li (3):
  KVM: LAPIC: Make lapic timer unpinned when timer is injected by
posted-interrupt
  KVM: LAPIC: lapic timer interrupt is injected by posted interrupt
  KVM: LAPIC: Ignore timer migration when lapic timer is injected by
posted-interrupt

 arch/x86/kvm/lapic.c| 55 -
 arch/x86/kvm/x86.c  |  5 
 arch/x86/kvm/x86.h  |  7 ++
 include/linux/sched/isolation.h |  2 ++
 kernel/sched/isolation.c|  6 +
 5 files changed, 63 insertions(+), 12 deletions(-)

-- 
2.7.4



Re: [PATCH] mm: Remove VM_BUG_ON in __alloc_pages_node

2019-06-05 Thread Michal Hocko
On Wed 05-06-19 21:25:01, Bharath Vedartham wrote:
> IMO the reason why a lot of failures must not have occured in the past
> might be because the programs which use it use stuff like cpu_to_node or
> have checks for nid.
> If one day we do get a program which passes an invalid node id without
> VM_BUG_ON enabled, it might get weird.

It will blow up on a NULL NODE_DATA and it will be quite obvious what
that was so I wouldn't lose any sleep over that. I do not think we have
any directly user controlable way to provide a completely ad-hoc numa
node for an allocation.

-- 
Michal Hocko
SUSE Labs


Re: [PATCH 1/2] mfd: core: Support multiple OF child devices of the same type

2019-06-05 Thread Lee Jones
On Wed, 05 Jun 2019, Robert Hancock wrote:

> On 2019-06-05 12:45 p.m., Lee Jones wrote:
>  diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
>  index 99c0395..470f6cb 100644
>  --- a/include/linux/mfd/core.h
>  +++ b/include/linux/mfd/core.h
>  @@ -55,6 +55,9 @@ struct mfd_cell {
>    */
>   const char  *of_compatible;
>   
>  +/* Optionally match against a specific device of a given type */
>  +const char  *of_full_name;
>  +
> >>>
> >>> Can you give me an example for when this might be useful?
> >>
> >> This is an example of some device tree entries for our MFD device:
> >>
> >> axi_iic_0: i2c@c {
> >> compatible = "xlnx,xps-iic-2.00.a";
> >> clocks = <_clk>;
> >> clock-frequency = <10>;
> >> interrupts = <7>;
> >> #size-cells = <0>;
> >> #address-cells = <1>;
> >> };
> >>
> >> axi_iic_1: i2c@d {
> >> compatible = "xlnx,xps-iic-2.00.a";
> >> clocks = <_clk>;
> >> clock-frequency = <10>;
> >> interrupts = <8>;
> >> #size-cells = <0>;
> >> #address-cells = <1>;
> >> };
> >>
> >> and the corresponding MFD cells:
> >>
> >> {
> >>.name   = "axi_iic_0",
> >>.of_compatible  = "xlnx,xps-iic-2.00.a",
> >>.of_full_name   = "i2c@c",
> >>.num_resources  = ARRAY_SIZE(dbe_i2c1_resources),
> >>.resources  = dbe_i2c1_resources
> >> },
> >> {
> >>.name   = "axi_iic_1",
> >>.of_compatible  = "xlnx,xps-iic-2.00.a",
> >>.of_full_name   = "i2c@d",
> >>.num_resources  = ARRAY_SIZE(dbe_i2c2_resources),
> >>.resources  = dbe_i2c2_resources
> >> },
> >>
> >> Without having the .of_full_name support, both MFD cells ended up
> >> wrongly matching against the i2c@c device tree node since we just
> >> picked the first one where of_compatible matched.
> > 
> > What is contained in each of their resources?
> 
> These are the resource entries for those two devices:
> 
> static const struct resource dbe_i2c1_resources[] = {
> {
>   .start  = 0xc,
>   .end= 0xc,
>   .name   = "xi2c1_regs",
>   .flags  = IORESOURCE_MEM,
>   .desc   = IORES_DESC_NONE
> },
> };
> 
> static const struct resource dbe_i2c2_resources[] = {
> {
>   .start  = 0xd,
>   .end= 0xd,
>   .name   = "xi2c2_regs",
>   .flags  = IORESOURCE_MEM,
>   .desc   = IORES_DESC_NONE
> },
> };

This is your problem.  You are providing the memory resources through
*both* DT and MFD.  I don't believe I've seen your MFD driver, but it
looks like it's probably not required at all.  Just allow DT to probe
each of your child devices.  You can obtain the IO memory from there
directly using the usual platform_get_resource() calls.

> Ideally the IO memory resource entries would be picked up and mapped
> through the device tree as well, as they are with the interrupts, but I
> haven't yet found the device tree magic that would allow that to happen
> yet, if it's possible. The setup we have has a number of peripherals on
> an AXI bus which are behind a PCIe to AXI bridge, and we're using mfd to
> instantiate each of those AXI devices under the PCIe device.
> 

-- 
Lee Jones [李琼斯]
Linaro Services Technical Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


[PATCH] scsi: lpfc: Avoid unused function warnings

2019-06-05 Thread Nathan Chancellor
When building powerpc pseries_defconfig or powernv_defconfig:

drivers/scsi/lpfc/lpfc_nvmet.c:224:1: error: unused function
'lpfc_nvmet_get_ctx_for_xri' [-Werror,-Wunused-function]
drivers/scsi/lpfc/lpfc_nvmet.c:246:1: error: unused function
'lpfc_nvmet_get_ctx_for_oxid' [-Werror,-Wunused-function]

These functions are only compiled when CONFIG_NVME_TARGET_FC is enabled.
Use that same condition so there is no more warning. While the fixes
commit did not introduce these functions, it caused these warnings.

Fixes: 4064b27417a7 ("scsi: lpfc: Make some symbols static")
Signed-off-by: Nathan Chancellor 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index e471bbcca838..f3d9a5545164 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -220,6 +220,7 @@ lpfc_nvmet_cmd_template(void)
/* Word 12, 13, 14, 15 - is zero */
 }
 
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 static struct lpfc_nvmet_rcv_ctx *
 lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
 {
@@ -263,6 +264,7 @@ lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 
oxid, u32 sid)
 
return NULL;
 }
+#endif
 
 static void
 lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx 
*ctxp)
-- 
2.22.0.rc3



Re: [PATCH v4 1/2] HID: quirks: Refactor ELAN 400 and 401 handling

2019-06-05 Thread Lee Jones
On Tue, 23 Apr 2019, Jeffrey Hugo wrote:

> There needs to be coordination between hid-quirks and the elan_i2c driver
> about which devices are handled by what drivers.  Currently, both use
> whitelists, which results in valid devices being unhandled by default,
> when they should not be rejected by hid-quirks.  This is quickly becoming
> an issue.
> 
> Since elan_i2c has a maintained whitelist of what devices it will handle,
> use that to implement a blacklist in hid-quirks so that only the devices
> that need to be handled by elan_i2c get rejected by hid-quirks, and
> everything else is handled by default.  The downside is the whitelist and
> blacklist need to be kept in sync.
> 
> Suggested-by: Benjamin Tissoires 
> Signed-off-by: Jeffrey Hugo 
> ---
>  drivers/hid/hid-quirks.c| 64 -
>  drivers/input/mouse/elan_i2c_core.c |  4 ++
>  2 files changed, 58 insertions(+), 10 deletions(-)

Reviewed-by: Lee Jones 
Tested-by: Lee Jones 

-- 
Lee Jones [李琼斯]
Linaro Services Technical Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog


Re: KASAN: use-after-free Read in tomoyo_realpath_from_path

2019-06-05 Thread Tetsuo Handa
Tetsuo Handa wrote:
> The problem is that TOMOYO is accessing already freed socket from 
> security_file_open()
> which later fails with -ENXIO (because we can't get file descriptor of 
> sockets via
> /proc/pid/fd/n interface), and the file descriptor is getting released before
> security_file_open() completes because we do not raise "struct file"->f_count 
> of
> the file which is accessible via /proc/pid/fd/n interface. We can avoid this 
> problem
> if we can avoid calling security_file_open() which after all fails with 
> -ENXIO.
> How should we handle this race? Let LSM modules check if security_file_open() 
> was
> called on a socket?

Well, just refusing security_file_open() is not sufficient, for open(O_PATH) 
allows installing
file descriptor where SOCKET_I(inode)->sk can change at any moment, and TOMOYO 
cannot tell
whether it is safe to access SOCKET_I(inode)->sk from security_inode_getattr().

But refusing open(O_PATH) as well might break userspace programs. Oh, no...


diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..ea69668e2cd8 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -728,6 +728,16 @@ static int do_dentry_open(struct file *f,
/* Ensure that we skip any errors that predate opening of the file */
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
 
+   /*
+* Sockets must not be opened via /proc/pid/fd/n, even with O_PATH,
+* for SOCKET_I(inode)->sk can be kfree()d at any moment after a file
+* descriptor obtained by opening /proc/pid/fd/n was installed.
+*/
+   if (unlikely(S_ISSOCK(inode->i_mode))) {
+   error = (f->f_flags & O_PATH) ? -ENOENT : -ENXIO;
+   goto cleanup_file;
+   }
+
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = _fops;



#include 
#include 
#include 
#include 
#include 
#include 

int main(int argc, char *argv[])
{
pid_t pid = getpid();
int fd = socket(AF_INET, SOCK_STREAM, 0);
char buffer[128] = { };
if (fork() == 0) {
struct stat buf = { };
close(fd);
snprintf(buffer, sizeof(buffer) - 1, "/proc/%u/fd/%u", pid, fd);
fd = open(buffer, __O_PATH);
sleep(5);
fstat(fd, );
_exit(0);
}
sleep(2);
close(fd);
return 0;
}



Quation needed For June Inquiry

2019-06-05 Thread Jpexcc Salesi
Hello dear,
 
We are in the market for your products after meeting at your stand during last 
expo.
 
Please kindly send us your latest catalog and price list so as to start a new 
project/order as promised during the exhibition. 
 
I would appreciate your response about the above details required so we can 
revert back to you asap.
 
Kind regards
 
Rhema Zoeh


RE: [PATCH v7 0/6] Introduced new Cadence USBSS DRD Driver.

2019-06-05 Thread Pawel Laszczak
>On 6/5/2019 17:03, Pawel Laszczak wrote:
>> This patch introduces new Cadence USBSS DRD driver to Linux kernel.
>>
>> The Cadence USBSS DRD Driver is a highly configurable IP Core which
>> can be instantiated as Dual-Role Device (DRD), Peripheral Only and
>> Host Only (XHCI)configurations.
>>
>The driver is not an IP Core, the hardware device is.
>
I remember that I had such comment in the paste. I don't know why there is "IP 
Core".

I will change it.
thanks ,
Pawel
>
>/Lars
>



[PATCH] ASoC: rt1011: Mark format integer literals as unsigned

2019-06-05 Thread Nathan Chancellor
Clang warns:

sound/soc/codecs/rt1011.c:1291:12: warning: integer literal is too large
to be represented in type 'long', interpreting as 'unsigned long' per
C89; this literal will have type 'long long' in C99 onwards
[-Wc99-compat]
format = 2147483648; /* 2^24 * 128 */
 ^
sound/soc/codecs/rt1011.c:2123:13: warning: integer literal is too large
to be represented in type 'long', interpreting as 'unsigned long' per
C89; this literal will have type 'long long' in C99 onwards
[-Wc99-compat]
format = 2147483648; /* 2^24 * 128 */
 ^
2 warnings generated.

Mark the integer literals as unsigned explicitly so that if the kernel
does ever bump the C standard it uses, the behavior is consitent.

Fixes: d6e65bb7ff0d ("ASoC: rt1011: Add RT1011 amplifier driver")
Link: https://github.com/ClangBuiltLinux/linux/issues/506
Signed-off-by: Nathan Chancellor 
---
 sound/soc/codecs/rt1011.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/rt1011.c b/sound/soc/codecs/rt1011.c
index 349d6db7ecd4..3a0ae80c5ee0 100644
--- a/sound/soc/codecs/rt1011.c
+++ b/sound/soc/codecs/rt1011.c
@@ -1288,7 +1288,7 @@ static int rt1011_r0_load_mode_put(struct snd_kcontrol 
*kcontrol,
if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) {
rt1011->r0_reg = ucontrol->value.integer.value[0];
 
-   format = 2147483648; /* 2^24 * 128 */
+   format = 2147483648U; /* 2^24 * 128 */
r0_integer = format / rt1011->r0_reg / 128;
r0_factor = ((format / rt1011->r0_reg * 100) / 128)
- (r0_integer * 100);
@@ -2120,7 +2120,7 @@ static int rt1011_calibrate(struct rt1011_priv *rt1011, 
unsigned char cali_flag)
dev_err(dev,"Calibrate R0 Failure\n");
ret = -EAGAIN;
} else {
-   format = 2147483648; /* 2^24 * 128 */
+   format = 2147483648U; /* 2^24 * 128 */
r0_integer = format / r0[0] / 128;
r0_factor = ((format / r0[0] * 100) / 128)
- (r0_integer * 100);
-- 
2.22.0.rc3



Re: [PATCH] sg: fix a double-fetch bug in sg_write()

2019-06-05 Thread Jiri Slaby
On 05. 06. 19, 17:35, Gen Zhang wrote:
> On Wed, Jun 05, 2019 at 08:41:11AM +0200, Jiri Slaby wrote:
>> On 31. 05. 19, 3:27, Gen Zhang wrote:
>>> In sg_write(), the opcode of the command is fetched the first time from 
>>> the userspace by __get_user(). Then the whole command, the opcode 
>>> included, is fetched again from userspace by __copy_from_user(). 
>>> However, a malicious user can change the opcode between the two fetches.
>>> This can cause inconsistent data and potential errors as cmnd is used in
>>> the following codes.
>>>
>>> Thus we should check opcode between the two fetches to prevent this.
>>>
>>> Signed-off-by: Gen Zhang 
>>> ---
>>> diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
>>> index d3f1531..a2971b8 100644
>>> --- a/drivers/scsi/sg.c
>>> +++ b/drivers/scsi/sg.c
>>> @@ -694,6 +694,8 @@ sg_write(struct file *filp, const char __user *buf, 
>>> size_t count, loff_t * ppos)
>>> hp->flags = input_size; /* structure abuse ... */
>>> hp->pack_id = old_hdr.pack_id;
>>> hp->usr_ptr = NULL;
>>> +   if (opcode != cmnd[0])
>>> +   return -EINVAL;
>>> if (__copy_from_user(cmnd, buf, cmd_size))
>>> return -EFAULT;
>>
>> You are sending the same patches like a broken machine. Please STOP this
>> and give people some time to actually review your patches! (Don't expect
>> replies in days.)
>>
> Thanks for your reply. I resubmitted this one after 8-day-no-reply. I 
> don't judge whether this is a short time period or not. I politely hope
> that you can reply more kindly.

There is no reason to be offended. I am just asking you to wait a bit
more before reposting. 8 days is too few. My personal experience says to
give patches like these something close to a month, esp. during the
merge window. The issues are present for a long time, nobody hit them
during that timeframe, so there is no reason to haste.

> I am just a PhD candidate. All I did is submitting patches, discussing 
> with maintainers in accordance with linux community rules for academic papers.

Yes, despite I have no idea what "linux community rules for academic
papers" are.

> I guess that you might be busy person and hope that submitting patches 
> didn't bother you.

It does not bother me at all. Patches are welcome, but newcomers tend to
send new versions of patches (or reposts) too quickly. It then leads to
wasting time of people where one person comments on one version and the
others don't see it and reply to some other.

thanks,
-- 
js
suse labs


Re: [PATCH V2 4/4] arm64/mm: Drop local variable vm_fault_t from __do_page_fault()

2019-06-05 Thread Anshuman Khandual



On 06/04/2019 08:26 PM, Catalin Marinas wrote:
> On Mon, Jun 03, 2019 at 12:11:25PM +0530, Anshuman Khandual wrote:
>> __do_page_fault() is over complicated with multiple goto statements. This
>> cleans up the code flow and while there drops local variable vm_fault_t.
> 
> I'd change the subject as well here to something like refactor or
> simplify __do_page_fault().

Sure.

> 
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 4bb65f3..41fa905 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -397,37 +397,29 @@ static void do_bad_area(unsigned long addr, unsigned 
>> int esr, struct pt_regs *re
>>  static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
>> unsigned int mm_flags, unsigned long vm_flags)
>>  {
>> -struct vm_area_struct *vma;
>> -vm_fault_t fault;
>> +struct vm_area_struct *vma = find_vma(mm, addr);
>>  
>> -vma = find_vma(mm, addr);
>> -fault = VM_FAULT_BADMAP;
>>  if (unlikely(!vma))
>> -goto out;
>> -if (unlikely(vma->vm_start > addr))
>> -goto check_stack;
>> +return VM_FAULT_BADMAP;
>>  
>>  /*
>>   * Ok, we have a good vm_area for this memory access, so we can handle
>>   * it.
>>   */
>> -good_area:
>> +if (unlikely(vma->vm_start > addr)) {
>> +if (!(vma->vm_flags & VM_GROWSDOWN))
>> +return VM_FAULT_BADMAP;
>> +if (expand_stack(vma, addr))
>> +return VM_FAULT_BADMAP;
>> +}
> 
> You could have a single return here:
> 
>   if (unlikely(vma->vm_start > addr) &&
>   (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, addr)))
>   return VM_FAULT_BADMAP;
> 
> Not sure it's any clearer though.
> 

TBH the proposed one seems clearer as it separates effect (vma->vm_start > addr)
from required permission check (vma->vm_flags & VM_GROWSDOWN) and required 
action
(expand_stack(vma, addr)). But I am happy to change as you have mentioned if 
that
is preferred.


[UPDATE][PATCH v4] HID: intel-ish-hid: fix wrong driver_data usage

2019-06-05 Thread Srinivas Pandruvada
From: Hyungwoo Yang 

Currently, in suspend() and resume(), ishtp client drivers are using
driver_data to get "struct ishtp_cl_device" object which is set by
bus driver. It's wrong since the driver_data should not be owned bus.
driver_data should be owned by the corresponding ishtp client driver.
Due to this, some ishtp client driver like cros_ec_ishtp which uses
its driver_data to transfer its data to its child doesn't work correctly.

So this patch removes setting driver_data in bus drier and instead of
using driver_data to get "struct ishtp_cl_device", since "struct device"
is embedded in "struct ishtp_cl_device", we introduce a helper function
that returns "struct ishtp_cl_device" from "struct device".

Signed-off-by: Hyungwoo Yang 
Acked-by: Srinivas Pandruvada 
---
For 5.3

v4- Updated
Added version history for tracking by Srinivas Pandruvada

v4
- Cleaned up submission by removing linux-next merge commit from the
  series.

v3
-Remove cros-ec dependency of the patch which is not in the mainline.

v2
-Make patch so that it can be applied to mainline kernel.
-Updated description to add why this patch is required?


 drivers/hid/intel-ish-hid/ishtp-hid-client.c |  4 ++--
 drivers/hid/intel-ish-hid/ishtp/bus.c| 15 ++-
 include/linux/intel-ish-client-if.h  |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c 
b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 56777a43e69c..19102a3be4ca 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -899,7 +899,7 @@ static int hid_ishtp_cl_reset(struct ishtp_cl_device 
*cl_device)
  */
 static int hid_ishtp_cl_suspend(struct device *device)
 {
-   struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+   struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
 
@@ -920,7 +920,7 @@ static int hid_ishtp_cl_suspend(struct device *device)
  */
 static int hid_ishtp_cl_resume(struct device *device)
 {
-   struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+   struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
 
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c 
b/drivers/hid/intel-ish-hid/ishtp/bus.c
index fb8ca12955b4..4b4a6047dc72 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -479,7 +479,6 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct 
ishtp_device *dev,
}
 
ishtp_device_ready = true;
-   dev_set_drvdata(>dev, device);
 
return device;
 }
@@ -647,6 +646,20 @@ void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device)
 }
 EXPORT_SYMBOL(ishtp_get_drvdata);
 
+/**
+ * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance
+ * @device: device instance
+ *
+ * Get ish_cl_device instance which embeds device instance in it.
+ *
+ * Return: pointer to ishtp_cl_device instance
+ */
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device)
+{
+   return to_ishtp_cl_device(device);
+}
+EXPORT_SYMBOL(ishtp_dev_to_cl_device);
+
 /**
  * ishtp_bus_new_client() - Create a new client
  * @dev:   ISHTP device instance
diff --git a/include/linux/intel-ish-client-if.h 
b/include/linux/intel-ish-client-if.h
index 16255c2ca2f4..0d6b4bc191c5 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -103,6 +103,7 @@ void ishtp_put_device(struct ishtp_cl_device *cl_dev);
 void ishtp_get_device(struct ishtp_cl_device *cl_dev);
 void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data);
 void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device);
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev);
 int ishtp_register_event_cb(struct ishtp_cl_device *device,
void (*read_cb)(struct ishtp_cl_device *));
 struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev,
-- 
2.17.2



Re: rcu_read_lock lost its compiler barrier

2019-06-05 Thread Herbert Xu
On Tue, Jun 04, 2019 at 10:44:18AM -0400, Alan Stern wrote:
>
> Currently the LKMM says the test is allowed and there is a data race, 
> but this answer clearly is wrong since it would violate the RCU 
> guarantee.

Thank you! This is what I tried to say all along in this thread
but you expressed it in a much better way :)
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


RE: [PATCH v3] PCI: xilinx-nwl: Fix Multi MSI data programming

2019-06-05 Thread Bharat Kumar Gogada
> On 31/05/2019 17:09, Lorenzo Pieralisi wrote:
> > [+Marc]
> >
> > On Wed, May 29, 2019 at 06:07:49PM +0530, Bharat Kumar Gogada wrote:
> >> The current Multi MSI data programming fails if multiple end points
> >> requesting MSI and multi MSI are connected with switch, i.e the
> >> current multi MSI data being given is not considering the number of
> >> vectors being requested in case of multi MSI.
> >> Ex: Two EP's connected via switch, EP1 requesting single MSI first,
> >> EP2 requesting Multi MSI of count four. The current code gives MSI
> >> data 0x0 to EP1 and 0x1 to EP2, but EP2 can modify lower two bits due
> >> to which EP2 also sends interrupt with MSI data 0x0 which results in
> >> always invoking virq of EP1 due to which EP2 MSI interrupt never gets
> >> handled.
> >
> > If this is a problem it is not the only driver where it should be
> > fixed it seems. CC'ed Marc in case I have missed something in relation
> > to MSI IRQs but AFAIU it looks like HW is allowed to toggled bits
> > (according to bits[6:4] in Message Control for MSI) in the MSI data,
> > given that the data written is the hwirq number (in this specific MSI
> > controller) it ought to be fixed.
> 
> Yeah, it looks like a number of MSI controllers could be quite broken in this
> particular area.
> 
> >
> > The commit log and patch should be rewritten (I will do that) but
> > first I would like to understand if there are more drivers to be
> > updated.
> >
> > 
Hi Lorenzo and Marc, thanks for your time.
Marc, I'm yet to test the below suggested solution,
GIC v2m and GIC v3 supports multi MSI, do we see above issue in these MSI 
controllers ?

Regards,
Bharat
> >
> >> Fix Multi MSI data programming with required alignment by using
> >> number of vectors being requested.
> >>
> >> Fixes: ab597d35ef11 ("PCI: xilinx-nwl: Add support for Xilinx NWL
> >> PCIe Host Controller")
> >> Signed-off-by: Bharat Kumar Gogada 
> >> ---
> >> V3:
> >>  - Added example description of the issue
> >> ---
> >>  drivers/pci/controller/pcie-xilinx-nwl.c | 11 ++-
> >>  1 file changed, 10 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c
> >> b/drivers/pci/controller/pcie-xilinx-nwl.c
> >> index 81538d7..8efcb8a 100644
> >> --- a/drivers/pci/controller/pcie-xilinx-nwl.c
> >> +++ b/drivers/pci/controller/pcie-xilinx-nwl.c
> >> @@ -483,7 +483,16 @@ static int nwl_irq_domain_alloc(struct
> irq_domain *domain, unsigned int virq,
> >>int i;
> >>
> >>mutex_lock(>lock);
> >> -  bit = bitmap_find_next_zero_area(msi->bitmap, INT_PCI_MSI_NR, 0,
> >> +
> >> +  /*
> >> +   * Multi MSI count is requested in power of two
> >> +   * Check if multi msi is requested
> >> +   */
> >> +  if (nr_irqs % 2 == 0)
> >> +  bit = bitmap_find_next_zero_area(msi->bitmap,
> INT_PCI_MSI_NR, 0,
> >> +   nr_irqs, nr_irqs - 1);
> >> +  else
> >> +  bit = bitmap_find_next_zero_area(msi->bitmap,
> INT_PCI_MSI_NR, 0,
> >> nr_irqs, 0);
> >>if (bit >= INT_PCI_MSI_NR) {
> >>mutex_unlock(>lock);
> >> --
> >> 2.7.4
> >>
> 
> This doesn't look like the best fix. The only case where nr_irqs is not
> set to 1 is when using Multi-MSI, so the '% 2' case actually covers all
> cases. Now, and in the interest of consistency, other drivers use a
> construct such as:
> 
> offset = bitmap_find_free_region(bitmap, bitmap_size,
>get_count_order(nr_irqs));
> 
> which has the advantage of dealing with the bitmap setting as well.
> 
> I'd suggest something like this (completely untested):
> 
> diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c
> b/drivers/pci/controller/pcie-xilinx-nwl.c
> index 3b031f00a94a..8b9b58909e7c 100644
> --- a/drivers/pci/controller/pcie-xilinx-nwl.c
> +++ b/drivers/pci/controller/pcie-xilinx-nwl.c
> @@ -482,15 +482,13 @@ static int nwl_irq_domain_alloc(struct
> irq_domain *domain, unsigned int virq,
>   int i;
> 
>   mutex_lock(>lock);
> - bit = bitmap_find_next_zero_area(msi->bitmap, INT_PCI_MSI_NR, 0,
> -  nr_irqs, 0);
> - if (bit >= INT_PCI_MSI_NR) {
> + bit = bitmap_find_free_region(msi->bitmap, INT_PCI_MSI_NR,
> +   get_count_order(nr_irqs));
> + if (bit < 0) {
>   mutex_unlock(>lock);
>   return -ENOSPC;
>   }
> 
> - bitmap_set(msi->bitmap, bit, nr_irqs);
> -
>   for (i = 0; i < nr_irqs; i++) {
>   irq_domain_set_info(domain, virq + i, bit + i, _irq_chip,
>   domain->host_data, handle_simple_irq,
> @@ -508,7 +506,7 @@ static void nwl_irq_domain_free(struct irq_domain
> *domain, unsigned int virq,
>   struct nwl_msi *msi = >msi;
> 
>   mutex_lock(>lock);
> - bitmap_clear(msi->bitmap, data->hwirq, nr_irqs);
> + bitmap_release_region(msi->bitmap, data->hwirq,
> get_count_order(nr_irqs));
>   

memory leak in rawv6_sendmsg

2019-06-05 Thread syzbot

Hello,

syzbot found the following crash on:

HEAD commit:c50bbf61 Merge tag 'platform-drivers-x86-v5.2-2' of git://..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=14e86bf8a0
kernel config:  https://syzkaller.appspot.com/x/.config?x=61dd9e15a761691d
dashboard link: https://syzkaller.appspot.com/bug?extid=0210b383c62bb2a35e32
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=131e5c9aa0
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=14092dbca0

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+0210b383c62bb2a35...@syzkaller.appspotmail.com

ed '10.128.1.53' (ECDSA) to the list of known hosts.
executing program
executing program
executing program
BUG: memory leak
unreferenced object 0x8881099cc500 (size 224):
  comm "syz-executor618", pid 7230, jiffies 4294944637 (age 13.010s)
  hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
  backtrace:
[] kmemleak_alloc_recursive  
include/linux/kmemleak.h:55 [inline]

[] slab_post_alloc_hook mm/slab.h:439 [inline]
[] slab_alloc_node mm/slab.c:3269 [inline]
[] kmem_cache_alloc_node+0x153/0x2a0 mm/slab.c:3579
[] __alloc_skb+0x6e/0x210 net/core/skbuff.c:198
[<7051ec41>] alloc_skb include/linux/skbuff.h:1058 [inline]
[<7051ec41>] alloc_skb_with_frags+0x5f/0x250  
net/core/skbuff.c:5327
[] sock_alloc_send_pskb+0x269/0x2a0  
net/core/sock.c:2225

[<07cd012b>] sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2242
[] rawv6_send_hdrinc net/ipv6/raw.c:644 [inline]
[] rawv6_sendmsg+0x9c9/0x12f0 net/ipv6/raw.c:935
[<3e27012a>] inet_sendmsg+0x64/0x120 net/ipv4/af_inet.c:802
[<5750e5ca>] sock_sendmsg_nosec net/socket.c:652 [inline]
[<5750e5ca>] sock_sendmsg+0x54/0x70 net/socket.c:671
[<2a4faea6>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2292
[] __sys_sendmsg+0x80/0xf0 net/socket.c:2330
[] __do_sys_sendmsg net/socket.c:2339 [inline]
[] __se_sys_sendmsg net/socket.c:2337 [inline]
[] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2337
[<388062fd>] do_syscall_64+0x76/0x1a0  
arch/x86/entry/common.c:301

[<9c436e23>] entry_SYSCALL_64_after_hwframe+0x44/0xa9



---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches


[PATCH v2] arm64: dts: qcom: Add Dragonboard 845c

2019-06-05 Thread Bjorn Andersson
This adds an initial dts for the Dragonboard 845. Supported
functionality includes Debug UART, UFS, USB-C (peripheral), USB-A
(host), microSD-card and Bluetooth.

Initializing the SMMU is clearing the mapping used for the splash screen
framebuffer, which causes the board to reboot. This can be worked around
using:

  fastboot oem select-display-panel none

Signed-off-by: Bjorn Andersson 
---

Changes since v1:
- Dropped PCIe as this hasn't landed
- Added adsp_pas and cdsp_pas
- Added regulators for wifi
- Updated LED labels to match 96boards specification

 arch/arm64/boot/dts/qcom/Makefile  |   1 +
 arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 556 +
 2 files changed, 557 insertions(+)
 create mode 100644 arch/arm64/boot/dts/qcom/sdm845-db845c.dts

diff --git a/arch/arm64/boot/dts/qcom/Makefile 
b/arch/arm64/boot/dts/qcom/Makefile
index 21d548f02d39..b3fe72ff2955 100644
--- a/arch/arm64/boot/dts/qcom/Makefile
+++ b/arch/arm64/boot/dts/qcom/Makefile
@@ -7,6 +7,7 @@ dtb-$(CONFIG_ARCH_QCOM) += msm8992-bullhead-rev-101.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= msm8994-angler-rev-101.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= msm8996-mtp.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= msm8998-mtp.dtb
+dtb-$(CONFIG_ARCH_QCOM)+= sdm845-db845c.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= sdm845-mtp.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= qcs404-evb-1000.dtb
 dtb-$(CONFIG_ARCH_QCOM)+= qcs404-evb-4000.dtb
diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts 
b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
new file mode 100644
index ..0424227f0c96
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/dts-v1/;
+
+#include 
+#include 
+#include 
+#include "sdm845.dtsi"
+#include "pm8998.dtsi"
+#include "pmi8998.dtsi"
+
+/ {
+   model = "Thundercomm Dragonboard 845c";
+   compatible = "thundercomm,db845c", "qcom,sdm845";
+
+   aliases {
+   serial0 = 
+   hsuart0 = 
+   };
+
+   chosen {
+   stdout-path = "serial0:115200n8";
+   };
+
+   dc12v: dc12v-regulator {
+   compatible = "regulator-fixed";
+   regulator-name = "DC12V";
+   regulator-min-microvolt = <1200>;
+   regulator-max-microvolt = <1200>;
+   regulator-always-on;
+   };
+
+   lt9611_1v8: lt9611-vdd18-regulator {
+   compatible = "regulator-fixed";
+   regulator-name = "LT9611_1V8";
+
+   vin-supply = <_5v>;
+   regulator-min-microvolt = <180>;
+   regulator-max-microvolt = <180>;
+
+   gpio = < 89 GPIO_ACTIVE_HIGH>;
+   enable-active-high;
+   };
+
+   lt9611_3v3: lt9611-3v3 {
+   compatible = "regulator-fixed";
+   regulator-name = "LT9611_3V3";
+
+   vin-supply = <_3v3>;
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+
+   // TODO: make it possible to drive same GPIO from two clients
+   // gpio = < 89 GPIO_ACTIVE_HIGH>;
+   // enable-active-high;
+   };
+
+   pcie0_1p05v: pcie-0-1p05v-regulator {
+   compatible = "regulator-fixed";
+   regulator-name = "PCIE0_1.05V";
+
+   vin-supply = <>;
+   regulator-min-microvolt = <105>;
+   regulator-max-microvolt = <105>;
+
+   // TODO: make it possible to drive same GPIO from two clients
+   // gpio = < 90 GPIO_ACTIVE_HIGH>;
+   // enable-active-high;
+   };
+
+   pcie0_3p3v_dual: vldo-3v3-regulator {
+   compatible = "regulator-fixed";
+   regulator-name = "VLDO_3V3";
+
+   vin-supply = <>;
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+
+   gpio = < 90 GPIO_ACTIVE_HIGH>;
+   enable-active-high;
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_pwren_state>;
+   };
+
+   gpio_keys {
+   compatible = "gpio-keys";
+   #address-cells = <1>;
+   #size-cells = <0>;
+   autorepeat;
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_up_pin_a>;
+
+   vol-up {
+   label = "Volume Up";
+   linux,code = ;
+   gpios = <_gpio 6 GPIO_ACTIVE_LOW>;
+   };
+   };
+
+   leds {
+   compatible = "gpio-leds";
+
+   user4 {
+   label = "green:user4";
+   gpios = <_gpio 13 GPIO_ACTIVE_HIGH>;
+   linux,default-trigger = "panic-indicator";
+   default-state = "off";
+   };
+
+   wlan {
+   label = 

Re: rcu_read_lock lost its compiler barrier

2019-06-05 Thread Herbert Xu
On Tue, Jun 04, 2019 at 08:30:39PM -0700, Paul E. McKenney wrote:
>
> Understood.  Does the patch I sent out a few hours ago cover it?  Or is
> something else needed?

It looks good to me.

> Other than updates to the RCU requirements documentation, which is
> forthcoming.

Thanks Paul.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


[PATCH -next] phy: usb: phy-brcm-usb: Fix platform_no_drv_owner.cocci warnings

2019-06-05 Thread YueHaibing
Remove .owner field if calls are used which set it automatically
Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

Signed-off-by: YueHaibing 
---
 drivers/phy/broadcom/phy-brcm-usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb.c 
b/drivers/phy/broadcom/phy-brcm-usb.c
index 292d5b3fc66c..f050de6fd109 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -451,7 +451,6 @@ static struct platform_driver brcm_usb_driver = {
.remove = brcm_usb_phy_remove,
.driver = {
.name   = "brcmstb-usb-phy",
-   .owner  = THIS_MODULE,
.pm = _usb_phy_pm_ops,
.of_match_table = brcm_usb_dt_ids,
},







[PATCH v2] kbuild: use more portable 'command -v' for cc-cross-prefix

2019-06-05 Thread Masahiro Yamada
To print the pathname that will be used by shell in the current
environment, 'command -v' is a standardized way. [1]

'which' is also often used in scripts, but it is less portable.

When I worked on commit bd55f96fa9fc ("kbuild: refactor cc-cross-prefix
implementation"), I was eager to use 'command -v' but it did not work.
(The reason is explained below.)

I kept 'which' as before but got rid of '> /dev/null 2>&1' as I
thought it was no longer needed. Sorry, I was wrong.

It works well on my Ubuntu machine, but Alexey Brodkin reports noisy
warnings on CentOS7 when 'which' fails to find the given command in
the PATH environment.

  $ which foo
  which: no foo in 
(/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin)

Given that behavior of 'which' depends on system (and it may not be
installed by default), I want to try 'command -v' once again.

The specification [1] clearly describes the behavior of 'command -v'
when the given command is not found:

  Otherwise, no output shall be written and the exit status shall reflect
  that the name was not found.

However, we need a little magic to use 'command -v' from Make.

$(shell ...) passes the argument to a subshell for execution, and
returns the standard output of the command.

Here is a trick. GNU Make may optimize this by executing the command
directly instead of forking a subshell, if no shell special characters
are found in the command and omitting the subshell will not change the
behavior.

In this case, no shell special character is used. So, Make will try
to run it directly. However, 'command' is a shell-builtin command,
then Make would fail to find it in the PATH environment:

  $ make ARCH=m68k defconfig
  make: command: Command not found
  make: command: Command not found
  make: command: Command not found

In fact, Make has a table of shell-builtin commands because it must
ask the shell to execute them.

Until recently, 'command' was missing in the table.

This issue was fixed by the following commit:

| commit 1af314465e5dfe3e8baa839a32a72e83c04f26ef
| Author: Paul Smith 
| Date:   Sun Nov 12 18:10:28 2017 -0500
|
| * job.c: Add "command" as a known shell built-in.
|
| This is not a POSIX shell built-in but it's common in UNIX shells.
| Reported by Nick Bowler .

Because the latest release is GNU Make 4.2.1 in 2016, this commit is
not included in any released versions. (But some distributions may
have back-ported it.)

We need to trick Make to spawn a subshell. There are various ways to
do so:

 1) Use a shell special character '~' as dummy

$(shell : ~; command -v $(c)gcc)

 2) Use a variable reference, which always expands to the empty string
(suggested by David Laight)

$(shell command$${x:+} -v $(c)gcc)

 3) Use redirect

$(shell command -v $(c)gcc 2>/dev/null)

I chose 3) to not confuse people. The stderr would not be polluted
anyway, but it will provide extra safety, and is easy to understand.

Tested on Make 3.81, 3.82, 4.0, 4.1, 4.2, 4.2.1

[1] http://pubs.opengroup.org/onlinepubs/9699919799/utilities/command.html

Fixes: bd55f96fa9fc ("kbuild: refactor cc-cross-prefix implementation")
Cc: linux-stable  # 5.1
Reported-by: Alexey Brodkin 
Signed-off-by: Masahiro Yamada 
Tested-by: Alexey Brodkin 
---

Changes in v2:
  - Use dummy redirect

 scripts/Kbuild.include | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 85d758233483..fd8aa314c156 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -74,8 +74,11 @@ endef
 # Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
 # Return first  where a gcc is found in PATH.
 # If no gcc found in PATH with listed prefixes return nothing
+#
+# Note: 2>/dev/null is here to force Make to invoke a shell. This workaround
+# is needed because this issue was only fixed after GNU Make 4.2.1 release.
 cc-cross-prefix = $(firstword $(foreach c, $(filter-out -%, $(1)), \
-   $(if $(shell which $(c)gcc), $(c
+   $(if $(shell command -v $(c)gcc 2>/dev/null), $(c
 
 # output directory for tests below
 TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
-- 
2.17.1



Re: [PATCH v2 6/7] perf diff: Print the basic block cycles diff

2019-06-05 Thread Jin, Yao




On 6/5/2019 7:44 PM, Jiri Olsa wrote:

On Mon, Jun 03, 2019 at 10:36:16PM +0800, Jin Yao wrote:

perf record -b ./div
perf record -b ./div

Following is the default perf diff output

  # perf diff

  # Event 'cycles'
  #
  # Baseline  Delta Abs  Shared Object Symbol
  #   .    
  #
  49.03% +0.30%  div   [.] main
  16.29% -0.20%  libc-2.23.so  [.] __random
  18.82% -0.07%  libc-2.23.so  [.] __random_r
   8.11% -0.04%  div   [.] compute_flag
   2.25% +0.01%  div   [.] rand@plt
   0.00% +0.01%  [kernel.vmlinux]  [k] task_tick_fair
   5.46% +0.01%  libc-2.23.so  [.] rand
   0.01% -0.01%  [kernel.vmlinux]  [k] native_irq_return_iret
   0.00% -0.00%  [kernel.vmlinux]  [k] interrupt_entry

This patch creates a new computation selection 'cycles'.

  # perf diff -c cycles

  # Event 'cycles'
  #
  # Baseline Block cycles diff [start:end]  Shared Object Symbol
  #       

  #
  49.03%-9 [ 4ef: 520]  div   [.] main
  49.03% 0 [ 4e8: 4ea]  div   [.] main
  49.03% 0 [ 4ef: 500]  div   [.] main
  49.03% 0 [ 4ef: 51c]  div   [.] main
  49.03% 0 [ 4ef: 535]  div   [.] main
  18.82% 0 [   3ac40:   3ac4d]  libc-2.23.so  [.] 
__random_r
  18.82% 0 [   3ac40:   3ac5c]  libc-2.23.so  [.] 
__random_r
  18.82% 0 [   3ac40:   3ac76]  libc-2.23.so  [.] 
__random_r
  18.82% 0 [   3ac40:   3ac88]  libc-2.23.so  [.] 
__random_r
  18.82% 0 [   3ac90:   3ac9c]  libc-2.23.so  [.] 
__random_r
  16.29%-8 [   3aac0:   3aac0]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3aac0:   3aad2]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3aae0:   3aae7]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3ab03:   3ab0f]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3ab14:   3ab1b]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3ab28:   3ab2e]  libc-2.23.so  [.] 
__random
  16.29% 0 [   3ab4a:   3ab53]  libc-2.23.so  [.] 
__random
   8.11% 0 [ 640: 644]  div   [.] 
compute_flag
   8.11% 0 [ 649: 659]  div   [.] 
compute_flag
   5.46% 0 [   3af60:   3af60]  libc-2.23.so  [.] rand
   5.46% 0 [   3af60:   3af64]  libc-2.23.so  [.] rand
   2.25% 0 [ 490: 490]  div   [.] 
rand@plt
   0.01%26 [  c00a27:  c00a27]  [kernel.vmlinux]  [k] 
native_irq_return_iret
   0.00%  -157 [  2bf9f2:  2bfa63]  [kernel.vmlinux]  [k] 
update_blocked_averages
   0.00%   -56 [  2bf980:  2bf9d3]  [kernel.vmlinux]  [k] 
update_blocked_averages
   0.00%48 [  2bf934:  2bf942]  [kernel.vmlinux]  [k] 
update_blocked_averages
   0.00% 3 [  2bfb38:  2bfb67]  [kernel.vmlinux]  [k] 
update_blocked_averages
   0.00% 0 [  2bf968:  2bf97b]  [kernel.vmlinux]  [k] 
update_blocked_averages



so what I'd expect would be Baseline column with cycles and another
column showing the differrence (in cycles) for given symbol


"[start:end]" indicates the basic block range. The output is sorted
by "Baseline" and the basic blocks in the same function are sorted
by cycles diff.


hum, why is there multiple basic blocks [start:end] for a symbol?

thanks,
jirka



The basic block is the code between 2 branches (for one branch, for 
example, jmp, call, ret, interrupt, ...). So it's expected that one 
function (symbol is function) may contain multiple basic blocks.


The idea is, sorting by baseline to display the hottest functions and 
the second column shows the cycles diff of blocks in this function 
(comparing between different perf data files). This would allow to 
identify performance changes in specific code accurately and effectively.


Thanks
Jin Yao




Re: [PATCH] firmware: ti_sci: Add support for processor control

2019-06-05 Thread Lokesh Vutla



On 06/06/19 4:03 AM, Suman Anna wrote:
> Texas Instrument's System Control Interface (TI-SCI) Message Protocol
> is used in Texas Instrument's System on Chip (SoC) such as those
> in K3 family AM654 SoC to communicate between various compute
> processors with a central system controller entity.
> 
> The system controller provides various services including the control
> of other compute processors within the SoC. Extend the TI-SCI protocol
> support to add various TI-SCI commands to invoke services associated
> with power and reset control, and boot vector management of the
> various compute processors from the Linux kernel.
> 
> Signed-off-by: Suman Anna 

Reviewed-by: Lokesh Vutla 

Thanks and regards,
Lokesh

> ---
> Hi Santosh, Nishanth, Tero,
> 
> Appreciate it if this patch can be picked up for the 5.3 merge window.
> This is a dependency patch for my various remoteproc drivers on TI K3
> SoCs. Patch is on top of v5.2-rc1.
> 
> regards
> Suman
> 
>  drivers/firmware/ti_sci.c  | 350 +
>  drivers/firmware/ti_sci.h  | 135 ++
>  include/linux/soc/ti/ti_sci_protocol.h |  31 +++
>  3 files changed, 516 insertions(+)
> 
> diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
> index 36ce11a67235..596acad89e86 100644
> --- a/drivers/firmware/ti_sci.c
> +++ b/drivers/firmware/ti_sci.c
> @@ -2004,6 +2004,348 @@ static int ti_sci_cmd_free_event_map(const struct 
> ti_sci_handle *handle,
>  ia_id, vint, global_event, vint_status_bit, 0);
>  }
>  
> +/**
> + * ti_sci_cmd_proc_request() - Command to request a physical processor 
> control
> + * @handle:  Pointer to TI SCI handle
> + * @proc_id: Processor ID this request is for
> + *
> + * Return: 0 if all went well, else returns appropriate error value.
> + */
> +static int ti_sci_cmd_proc_request(const struct ti_sci_handle *handle,
> +u8 proc_id)
> +{
> + struct ti_sci_msg_req_proc_request *req;
> + struct ti_sci_msg_hdr *resp;
> + struct ti_sci_info *info;
> + struct ti_sci_xfer *xfer;
> + struct device *dev;
> + int ret = 0;
> +
> + if (!handle)
> + return -EINVAL;
> + if (IS_ERR(handle))
> + return PTR_ERR(handle);
> +
> + info = handle_to_ti_sci_info(handle);
> + dev = info->dev;
> +
> + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_PROC_REQUEST,
> +TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
> +sizeof(*req), sizeof(*resp));
> + if (IS_ERR(xfer)) {
> + ret = PTR_ERR(xfer);
> + dev_err(dev, "Message alloc failed(%d)\n", ret);
> + return ret;
> + }
> + req = (struct ti_sci_msg_req_proc_request *)xfer->xfer_buf;
> + req->processor_id = proc_id;
> +
> + ret = ti_sci_do_xfer(info, xfer);
> + if (ret) {
> + dev_err(dev, "Mbox send fail %d\n", ret);
> + goto fail;
> + }
> +
> + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf;
> +
> + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
> +
> +fail:
> + ti_sci_put_one_xfer(>minfo, xfer);
> +
> + return ret;
> +}
> +
> +/**
> + * ti_sci_cmd_proc_release() - Command to release a physical processor 
> control
> + * @handle:  Pointer to TI SCI handle
> + * @proc_id: Processor ID this request is for
> + *
> + * Return: 0 if all went well, else returns appropriate error value.
> + */
> +static int ti_sci_cmd_proc_release(const struct ti_sci_handle *handle,
> +u8 proc_id)
> +{
> + struct ti_sci_msg_req_proc_release *req;
> + struct ti_sci_msg_hdr *resp;
> + struct ti_sci_info *info;
> + struct ti_sci_xfer *xfer;
> + struct device *dev;
> + int ret = 0;
> +
> + if (!handle)
> + return -EINVAL;
> + if (IS_ERR(handle))
> + return PTR_ERR(handle);
> +
> + info = handle_to_ti_sci_info(handle);
> + dev = info->dev;
> +
> + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_PROC_RELEASE,
> +TI_SCI_FLAG_REQ_ACK_ON_PROCESSED,
> +sizeof(*req), sizeof(*resp));
> + if (IS_ERR(xfer)) {
> + ret = PTR_ERR(xfer);
> + dev_err(dev, "Message alloc failed(%d)\n", ret);
> + return ret;
> + }
> + req = (struct ti_sci_msg_req_proc_release *)xfer->xfer_buf;
> + req->processor_id = proc_id;
> +
> + ret = ti_sci_do_xfer(info, xfer);
> + if (ret) {
> + dev_err(dev, "Mbox send fail %d\n", ret);
> + goto fail;
> + }
> +
> + resp = (struct ti_sci_msg_hdr *)xfer->tx_message.buf;
> +
> + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV;
> +
> +fail:
> + ti_sci_put_one_xfer(>minfo, xfer);
> +
> + return ret;
> +}
> +
> +/**
> + * ti_sci_cmd_proc_handover() - Command to handover a physical processor
> + *   control to a host 

Re: general protection fault in rb_erase (2)

2019-06-05 Thread syzbot

syzbot has found a reproducer for the following crash on:

HEAD commit:156c0591 Merge tag 'linux-kselftest-5.2-rc4' of git://git...
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=15790062a0
kernel config:  https://syzkaller.appspot.com/x/.config?x=60564cb52ab29d5b
dashboard link: https://syzkaller.appspot.com/bug?extid=e8c40862180d8949d624
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=11f031fea0

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+e8c40862180d8949d...@syzkaller.appspotmail.com

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault:  [#1] PREEMPT SMP KASAN
CPU: 0 PID: 9547 Comm: syz-executor.4 Not tainted 5.2.0-rc3+ #20
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

RIP: 0010:rb_set_parent_color include/linux/rbtree_augmented.h:119 [inline]
RIP: 0010:rb_erase_color lib/rbtree.c:363 [inline]
RIP: 0010:rb_erase+0x715/0x1c10 lib/rbtree.c:450
Code: 00 00 4c 89 f1 49 89 44 24 10 48 c1 e9 03 80 3c 19 00 0f 85 77 14 00  
00 48 89 c1 4d 89 e5 4d 89 67 08 48 c1 e9 03 49 83 cd 01 <80> 3c 19 00 0f  
85 63 0f 00 00 4c 89 e1 4c 89 28 48 c1 e9 03 80 3c

RSP: 0018:8880ae809d50 EFLAGS: 00010082
RAX: a25210b71f27 RBX: dc00 RCX: 144a4216e3e4
RDX: ed1015d04db8 RSI: 8880ae826dc0 RDI: 888079b6fac0
RBP: 8880ae809d98 R08: 888079b6fac8 R09: ed1015d06be0
R10: ed1015d06bdf R11: 8880ae835efb R12: 888079b6fab8
R13: 888079b6fab9 R14: 8880852c6048 R15: 8880852c6040
FS:  563c1940() GS:8880ae80() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 022f2e80 CR3: 8d675000 CR4: 001406f0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 
 timerqueue_del+0x86/0x150 lib/timerqueue.c:74
 __remove_hrtimer+0xa8/0x1c0 kernel/time/hrtimer.c:975
 __run_hrtimer kernel/time/hrtimer.c:1371 [inline]
 __hrtimer_run_queues+0x2a8/0xdd0 kernel/time/hrtimer.c:1451
 hrtimer_interrupt+0x314/0x770 kernel/time/hrtimer.c:1509
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1041 [inline]
 smp_apic_timer_interrupt+0x111/0x550 arch/x86/kernel/apic/apic.c:1066
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:806
 
Modules linked in:

==



Re: [PATCH] [RFC] dmaengine: add fifo_size member

2019-06-05 Thread Sameer Pujar

Sorry for late reply.
[Resending the reply since delivery failed for few recipients]

On 5/6/2019 9:20 PM, Vinod Koul wrote:

On 06-05-19, 18:34, Sameer Pujar wrote:

On 5/4/2019 3:53 PM, Vinod Koul wrote:

On 02-05-19, 18:59, Sameer Pujar wrote:

On 5/2/2019 5:55 PM, Vinod Koul wrote:

On 02-05-19, 16:23, Sameer Pujar wrote:

On 5/2/2019 11:34 AM, Vinod Koul wrote:

On 30-04-19, 17:00, Sameer Pujar wrote:

During the DMA transfers from memory to I/O, it was observed that transfers
were inconsistent and resulted in glitches for audio playback. It happened
because fifo size on DMA did not match with slave channel configuration.

currently 'dma_slave_config' structure does not have a field for fifo size.
Hence the platform pcm driver cannot pass the fifo size as a slave_config.
Note that 'snd_dmaengine_dai_dma_data' structure has fifo_size field which
cannot be used to pass the size info. This patch introduces fifo_size field
and the same can be populated on slave side. Users can set required size
for slave peripheral (multiple channels can be independently running with
different fifo sizes) and the corresponding sizes are programmed through
dma_slave_config on DMA side.

FIFO size is a hardware property not sure why you would want an
interface to program that?

On mismatch, I guess you need to take care of src/dst_maxburst..

Yes, FIFO size is a HW property. But it is SW configurable(atleast in my
case) on
slave side and can be set to different sizes. The src/dst_maxburst is

Are you sure, have you talked to HW folks on that? IIUC you are
programming the data to be used in FIFO not the FIFO length!

Yes, I mentioned about FIFO length.

1. MAX FIFO size is fixed in HW. But there is a way to limit the usage per
channel
     in multiples of 64 bytes.
2. Having a separate member would give independent control over MAX BURST
SIZE and
     FIFO SIZE.

programmed
for specific values, I think this depends on few factors related to
bandwidth
needs of client, DMA needs of the system etc.,

Precisely


In such cases how does DMA know the actual FIFO depth of slave peripheral?

Why should DMA know? Its job is to push/pull data as configured by
peripheral driver. The peripheral driver knows and configures DMA
accordingly.

I am not sure if there is any HW logic that mandates DMA to know the size
of configured FIFO depth on slave side. I will speak to HW folks and
would update here.

I still do not comprehend why dma would care about slave side
configuration. In the absence of patch which uses this I am not sure
what you are trying to do...

I am using DMA HW in cyclic mode for data transfers to Audio sub-system.
In such cases flow control on DMA transfers is essential, since I/O is

right and people use burst size for precisely that!


consuming/producing the data at slower rate. The DMA tranfer is enabled/
disabled during start/stop of audio playback/capture sessions through ALSA
callbacks and DMA runs in cyclic mode. Hence DMA is the one which is doing
flow control and it is necessary for it to know the peripheral FIFO depth
to avoid overruns/underruns.

not really, knowing that doesnt help anyway you have described! DMA
pushes/pulls data and that is controlled by burst configured by slave
(so it know what to expect and porgrams things accordingly)

you are really going other way around about the whole picture. FWIW that
is how *other* folks do audio with dmaengine!
I discussed this internally with HW folks and below is the reason why 
DMA needs

to know FIFO size.

- FIFOs reside in peripheral device(ADMAIF), which is the ADMA interface 
to the audio sub-system.
- ADMAIF has multiple channels and share FIFO buffer for individual 
operations. There is a provision
  to allocate specific fifo size for each individual ADMAIF channel 
from the shared buffer.
- Tegra Audio DMA(ADMA) architecture is different from the usual DMA 
engines, which you described earlier.
- The flow control logic is placed inside ADMA. Slave peripheral 
device(ADMAIF) signals ADMA whenever a
  read or write happens on the FIFO(per WORD basis). Please note that 
the signaling is per channel. There is

  no other signaling present from ADMAIF to ADMA.
- ADMA keeps a counter related to above signaling. Whenever a sufficient 
space is available, it initiates a transfer.
  But the question is, how does it know when to transfer. This is the 
reason, why ADMA has to be aware of FIFO
  depth of ADMAIF channel. Depending on the counters and FIFO depth, it 
knows exactly when a free space is available
  in the context of a specific channel. On ADMA, FIFO_SIZE is just a 
value which should match to actual FIFO_DEPTH/SIZE

  of ADMAIF channel.
- Now consider two cases based on above logic,
  * Case 1: when DMA_FIFO_SIZE > SLAVE_FIFO_SIZE
    In this case, ADMA thinks that there is enough space available for 
transfer, when actually the FIFO data

    on slave is not consumed yet. It would result in OVERRUN.
  * Case 2: when DMA_FIFO_SIZE < SLAVE_FIFO_SIZE
 

RE: [PATCH V2 2/2] mailbox: introduce ARM SMC based mailbox

2019-06-05 Thread Peng Fan
> Subject: Re: [PATCH V2 2/2] mailbox: introduce ARM SMC based mailbox
> 
> On 6/3/19 1:30 AM, peng@nxp.com wrote:
> > From: Peng Fan 
> >
> > This mailbox driver implements a mailbox which signals transmitted
> > data via an ARM smc (secure monitor call) instruction. The mailbox
> > receiver is implemented in firmware and can synchronously return data
> > when it returns execution to the non-secure world again.
> > An asynchronous receive path is not implemented.
> > This allows the usage of a mailbox to trigger firmware actions on SoCs
> > which either don't have a separate management processor or on which
> > such a core is not available. A user of this mailbox could be the SCP
> > interface.
> >
> > Modified from Andre Przywara's v2 patch
> > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore
> > .kernel.org%2Fpatchwork%2Fpatch%2F812999%2Fdata=02%7C01%7
> Cpeng.fa
> >
> n%40nxp.com%7Caa396ba11ba244111fe408d6e8411fba%7C686ea1d3bc2b4
> c6fa92cd
> >
> 99c5c301635%7C0%7C0%7C636951763738548621sdata=UlNESNg7I7
> 4TM9xp%2F
> > VMce4CSbMuJ95lh68cQw%2FnQMOw%3Dreserved=0
> >
> > Cc: Andre Przywara 
> > Signed-off-by: Peng Fan 
> > ---
> 
> [snip]
> 
> +#define ARM_SMC_MBOX_USB_IRQ BIT(1)
> 
> That flag appears unused.

I'll remove this in V3.

> 
> > +static int arm_smc_mbox_probe(struct platform_device *pdev) {
> > +   struct device *dev = >dev;
> > +   struct mbox_controller *mbox;
> > +   struct arm_smc_chan_data *chan_data;
> > +   const char *method;
> > +   bool use_hvc = false;
> > +   int ret, irq_count, i;
> > +   u32 val;
> > +
> > +   if (!of_property_read_u32(dev->of_node, "arm,num-chans", )) {
> > +   if (val < 1 || val > INT_MAX) {
> > +   dev_err(dev, "invalid arm,num-chans value %u of 
> > %pOFn\n",
> val, pdev->dev.of_node);
> > +   return -EINVAL;
> > +   }
> > +   }
> 
> Should not the upper bound check be done against UINT_MAX since val is an
> unsigned int?

Fix in V3.

> 
> > +
> > +   irq_count = platform_irq_count(pdev);
> > +   if (irq_count == -EPROBE_DEFER)
> > +   return irq_count;
> > +
> > +   if (irq_count && irq_count != val) {
> > +   dev_err(dev, "Interrupts not match num-chans\n");
> 
> Interrupts property does not match \"arm,num-chans\" would be more
> correct.

Fix in V3.

> 
> > +   return -EINVAL;
> > +   }
> > +
> > +   if (!of_property_read_string(dev->of_node, "method", )) {
> > +   if (!strcmp("hvc", method)) {
> > +   use_hvc = true;
> > +   } else if (!strcmp("smc", method)) {
> > +   use_hvc = false;
> > +   } else {
> > +   dev_warn(dev, "invalid \"method\" property: %s\n",
> > +method);
> > +
> > +   return -EINVAL;
> > +   }
> 
> Having at least one method specified does not seem to be checked later on in
> the code, so if I omitted to specify that property, we would still register 
> the
> mailbox and default to use "smc" since the ARM_SMC_MBOX_USE_HVC flag
> would not be set, would not we want to make sure that we do have in fact a
> valid method specified given the binding documents that property as
> mandatory?

When arm_smc_send_data, it will check ARM_SMC_MBOX_USE_HVC,
you mean there are other places needs this flag check?

> 
> [snip]
> 
> > +   mbox->txdone_poll = false;
> > +   mbox->txdone_irq = false;
> > +   mbox->ops = _smc_mbox_chan_ops;
> > +   mbox->dev = dev;
> > +
> > +   ret = mbox_controller_register(mbox);
> > +   if (ret)
> > +   return ret;
> > +
> > +   platform_set_drvdata(pdev, mbox);
> 
> I would move this above mbox_controller_register() that way there is no room
> for race conditions in case another part of the driver expects to have
> pdev->dev.drvdata set before the mbox controller is registered.

Right.

> Since you use devm_* functions for everything, you may even remove that
> call.

You mean remove " platform_set_drvdata(pdev, mbox);" ?

> 
> [snip]
> 
> > +#ifndef _LINUX_ARM_SMC_MAILBOX_H_
> > +#define _LINUX_ARM_SMC_MAILBOX_H_
> > +
> > +struct arm_smccc_mbox_cmd {
> > +   unsigned long a0, a1, a2, a3, a4, a5, a6, a7; };
> 
> Do you expect this to be used by other in-kernel users? If so, it might be 
> good
> to document how a0 can have a special meaning and be used as a substitute
> for the function_id?

This was to address comments here:
https://lore.kernel.org/patchwork/patch/812999/#1010433

Thanks,
Peng.

> --
> Florian


Re: Loan grant @affordable rate

2019-06-05 Thread Loan Grant
Hello Dear,

We are a large Company based in Istanbul Turkey and we offers flexible loans 
and funding for all projects for as low as 2% interest rate  per annum for a 
period of 5-15 yrs .

We invite you to partner with us and benefit in our new Loan and Project 
funding program.

We can approve a loan/funding for up to USD$1 Billion or more depending on the 
nature of business so If you need loan/ funding and for more details Send us 
email to customers email address only at: l...@mfigc.info

Note: We  also pay 1% agents fee for each client referred to us by agent 
therefore if you know any one who need genuine loan you can refer to us and get 
1%.

This is a Guaranteed loan/Funding  and please reply only to this email address 
at: l...@mfigc.info

Regards
Haider Dawber
Istanbul-Turkey
Email: l...@mfigc.info


RE: [PATCH V2 1/2] DT: mailbox: add binding doc for the ARM SMC mailbox

2019-06-05 Thread Peng Fan
> Subject: Re: [PATCH V2 1/2] DT: mailbox: add binding doc for the ARM SMC
> mailbox
> 
> On Mon, 3 Jun 2019 17:56:51 +0100
> Sudeep Holla  wrote:
> 
> Hi,
> 
> > On Mon, Jun 03, 2019 at 09:22:16AM -0700, Florian Fainelli wrote:
> > > On 6/3/19 1:30 AM, peng@nxp.com wrote:
> > > > From: Peng Fan 
> > > >
> > > > The ARM SMC mailbox binding describes a firmware interface to
> > > > trigger actions in software layers running in the EL2 or EL3 exception
> levels.
> > > > The term "ARM" here relates to the SMC instruction as part of the
> > > > ARM instruction set, not as a standard endorsed by ARM Ltd.
> > > >
> > > > Signed-off-by: Peng Fan 
> > > > ---
> > > >
> > > > V2:
> > > > Introduce interrupts as a property.
> > > >
> > > > V1:
> > > > arm,func-ids is still kept as an optional property, because there
> > > > is no defined SMC funciton id passed from SCMI. So in my test, I
> > > > still use arm,func-ids for ARM SIP service.
> > > >
> > > >  .../devicetree/bindings/mailbox/arm-smc.txt| 101
> +
> > > >  1 file changed, 101 insertions(+)  create mode 100644
> > > > Documentation/devicetree/bindings/mailbox/arm-smc.txt
> > > >
> > > > diff --git a/Documentation/devicetree/bindings/mailbox/arm-smc.txt
> > > > b/Documentation/devicetree/bindings/mailbox/arm-smc.txt
> > > > new file mode 100644
> > > > index ..401887118c09
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/mailbox/arm-smc.txt
> > > > @@ -0,0 +1,101 @@
> >
> > [...]
> >
> > > > +Optional properties:
> > > > +- arm,func-ids An array of 32-bit values specifying the 
> > > > function
> > > > +   IDs used by each mailbox channel. Those 
> > > > function IDs
> > > > +   follow the ARM SMC calling convention standard 
> > > > [1].
> > > > +   There is one identifier per channel and the 
> > > > number
> > > > +   of supported channels is determined by the 
> > > > length
> > > > +   of this array.
> > > > +- interrupts   SPI interrupts may be listed for notification,
> > > > +   each channel should use a dedicated interrupt
> > > > +   line.
> > >
> > > I would not go about defining a specific kind of interrupt, since
> > > SPI is a GIC terminology, this firmware interface could be used in
> > > premise with any parent interrupt controller, for which the concept
> > > of a SPI/PPI/SGI may not be relevant.
> > >
> >
> > While I agree the binding document may not contain specifics, I still
> > don't see how to use SGI with this. Also note it's generally reserved
> > for OS future use(IPC) and using this for other than IPC may be bit
> > challenging IMO. It opens up lots of questions.
> 
> Well, a PPI might be possible to use, although it's somewhat dodgy to hijack
> the GIC's (re-)distributor from EL3 to write to GICD_ISPENDR. Need to ask
> Marc about his feelings towards this. But it's definitely possible from a
> hypervisor to inject arbitrary interrupts into a guest.
> 
> But more importantly: is there any actual reason this needs to be a GIC
> interrupt? 

No. I just test ATF with SPI when I posting out this. Should not restrict to be 
GIC.

If I understand the code correctly, this could just be any interrupt,
> including one of an interrupt combiner or a GPIO chip. So why not just use the
> standard wording of: "exactly one interrupt specifier for each channel"?

Agree.

> 
> By the way: Shouldn't new bindings use the YAML format instead?

I'll convert to YAML in next version.

Thanks,
Peng.

> 
> Cheers,
> Andre.


Re: [PATCH 1/2] media: v4l2-core: Shifting signed 32-bit value by 31 bits error

2019-06-05 Thread Randy Dunlap
On 6/5/19 2:53 PM, Shuah Khan wrote:
> Fix the following cppcheck error:
> 
> Checking drivers/media/v4l2-core/v4l2-ioctl.c ...
> [drivers/media/v4l2-core/v4l2-ioctl.c:1370]: (error) Shifting signed 32-bit 
> value by 31 bits is undefined behaviour
> 
> Signed-off-by: Shuah Khan 
> ---
>  drivers/media/v4l2-core/v4l2-ioctl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c 
> b/drivers/media/v4l2-core/v4l2-ioctl.c
> index 6859bdac86fe..333e387bafeb 100644
> --- a/drivers/media/v4l2-core/v4l2-ioctl.c
> +++ b/drivers/media/v4l2-core/v4l2-ioctl.c
> @@ -1364,7 +1364,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
>   (char)((fmt->pixelformat >> 8) & 0x7f),
>   (char)((fmt->pixelformat >> 16) & 0x7f),
>   (char)((fmt->pixelformat >> 24) & 0x7f),
> - (fmt->pixelformat & (1 << 31)) ? "-BE" 
> : "");
> + (fmt->pixelformat & BIT(31)) ? "-BE" : 
> "");
>   break;
>   }
>   }
> 

If this builds, I guess #define BIT(x) got pulled in indirectly
since bits.h nor bitops.h is currently #included in that source file.

Documentation/process/submit-checklist.rst rule #1 says:
1) If you use a facility then #include the file that defines/declares
   that facility.  Don't depend on other header files pulling in ones
   that you use.

Please add #include 

-- 
~Randy


Re: [Question] panic when write file cpuset.cpus

2019-06-05 Thread Chen Zhou
+Cc linux-kernel@vger.kernel.org

On 2019/6/6 11:04, Chen Zhou wrote:
> Hi all,
> 
> I hit the following issue in linux 4.4 which is hard to reproduce.
> 
> [20190527221106][bsp_pci_device_get_bar]--- pBasePhyAddr :3a00800, 
> len:400  ---
> [20190527221106]Unable to handle kernel paging request at virtual address 
> 10010
> [20190527221107]pgd = ffd3c696b000
> [20190527221107][10010] *pgd=, *pud=
> [20190527221107]Internal error: Oops: 9605 [#1] PREEMPT SMP
> [20190527221107]Modules linked in: linux_user_bde(O) linux_kernel_bde(O) 
> cmac(O) nse(O) pp(O) tm(O) lfe(O) tipc(O) pcie_aer_hisi(O) brd_drv_lpu(O) 
> hi161x_glf(O) hi161x_glc(O) chip_sdk_adpt(O) bonding(O) memenv(O) iof_sal(O) 
> iof_dlog(O) iof_devent(O) iof_iomm(O) drv_bsp_pic(O) bsp_common(O) 
> pramdisk(O) bsp_proc(O) kdc_uio_log(O) vrp_env_log_area(O) drv_bsp_fmea(O) 
> Drv_LastWds_K(O) Drv_CpuDfxInfo_K(O) Drv_CpuDfx_K(O) v8_dfx_cpu(O) 
> Drv_Dfx_K(O) Drv_CpuRegInject_K(O) Drv_ResetCause_K(O) Drv_Kbox_K(O) 
> env_core(O) iof_data(O) Drv_L2flush_K(O) arm64_cache_dfx(O) mmapdev(O) 
> drv_extern_int(O) irq_monitor(O) drv_bsp_avs(O) Drv_Pmbus_K(O) Drv_Smc_K(O) 
> bsp_sal(O) Drv_Ipsec_K(O) Drv_Tsensor_K(O) pci_hisi(O) serdes(O) 
> Drv_CheckBoot_K(O) Drv_Djtag_K(O) addr_win(O) iof_cbb(O) Drv_I2c_K(O) 
> hns_uio_enet(O) hns_enet_drv(O) hns_dsaf(O) hnae(O) hns_mdio(O) mdio(O) 
> Drv_FlowCtrl_K(O) Drv_Gpio_K(O) Drv_SysClk_K(O) physmap_of(O) map_rom(O) 
> cfi_cmdset_0002(O) cfi_probe(O) cfi_util(O) gen_probe(O) chipreg(O) rsm(O) 
> rtos_snapshot(O) rtos_kbox_panic(O) bsp_wdt(O) drv_bsp_ddr(O) bsp_reg(O) 
> Drv_Dts_K(O) Drv_SysCtl_K(O) arm_sal_issu(O) ksecurec(PO) ext4 jbd2 ext2 
> mbcache ofpart i2c_dev i2c_core uio nand nand_ecc nand_ids cmdlinepart 
> mtdblock mtd_blkdevs mtd
> [20190527221107]CPU: 2 PID: 2656 Comm: monitor Tainted: PW  O
> 4.4.171 #1
> [20190527221107]Hardware name: Hisilicon chip6_16 Product Board (DT)
> [20190527221107]task: ffd3bf81c250 task.stack: ffd3bff0c000
> [20190527221107]PC is at rb_erase+0x14/0x320
> [20190527221107]LR is at erase_header+0x50/0x54
> [20190527221107]pc : [] lr : [] pstate: 
> 0145
> [20190527221107]sp : ffd3bff0f9a0
> [20190527221107]x29: ffd3bff0f9a0 x28: ff69fe8b1980
> [20190527221107]x27: 0001 x26: ff8008e7e340
> [20190527221107]x25: ff8008e05000 x24: ff8008e32e28
> [20190527221107]x23: ffd3c0542500 x22: ff8008e32000
> [20190527221107]x21: ff8008e05000 x20: ffd3c0542f00
> [20190527221107]x19: ffd3c0542fb8 x18: 000f
> [20190527221107]x17: 007f9bd20e10 x16: ff8008367108
> [20190527221107]x15: 1fee x14: 
> [20190527221107]x13:  x12: 
> [20190527221107]x11: 0001 x10: 0001
> [20190527221107]x9 : 0001 x8 : ff800894622d
> [20190527221107]x7 : ffd3c65dd7d0 x6 : 
> [20190527221107]x5 : ffd3bf81a740 x4 : 
> [20190527221107]x3 : 00010001 x2 : 0001
> [20190527221107]x1 : ffd3c0542550 x0 : ffd3c0542f58
> [20190527221107]Process monitor (pid: 2656, stack limit = 0xffd3bff0c000)
> [20190527221107]
> [20190527221107][] rb_erase+0x14/0x320
> [20190527221107][] drop_sysctl_table+0x17c/0x1d4
> [20190527221107][] drop_sysctl_table+0x1a4/0x1d4
> [20190527221107][] unregister_sysctl_table+0x9c/0xa8
> [20190527221107][] unregister_sysctl_table+0x60/0xa8
> [20190527221107][] partition_sched_domains+0x64/0x338
> [20190527221107][] rebuild_sched_domains_locked+0xe0/0x3c0
> [20190527221107][] cpuset_write_resmask+0x288/0x8cc 
> [20190527221107][] cgroup_file_write+0x64/0x128
> [20190527221107][] kernfs_fop_write+0x15c/0x1ac
> [20190527221107][] __vfs_write+0x60/0x124
> [20190527221107][] vfs_write+0xb0/0x184
> [20190527221107][] SyS_write+0x6c/0xcc
> [20190527221107][] __sys_trace_return+0x0/0x4
> 
> 
> The disassembler and the source code about the backtrace are as below:
> 
> rb_erase()->__rb_erase_augmented()->__rb_change_child()
> __rb_erase_augmented():
> ff800846e514:   a9409006ldp x6, x4, [x0, #8]
> ff800846e518:   b5000244cbnzx4, ff800846e560 
> 
> ff800846e51c:   f943ldr x3, [x0]
> __rb_change_child():
> ff800846e520:   f27ef462andsx2, x3, #0xfffc
> ff800846e524:   54000140b.eqff800846e54c 
>   // b.none
> ff800846e528:   f9400844ldr x4, [x2, #16]
> ff800846e52c:   eb04001fcmp x0, x4
> 
> ff800846e530:   54a1b.neff800846e544 
>   // b.any
> __write_once_size():
> ff800846e534:   f9000846str x6, [x2, #16]
> 
> rb_erase()->__rb_erase_augmented()->__rb_change_child()
> static __always_inline struct rb_node *
> __rb_erase_augmented(struct rb_node *node, struct rb_root *root,
>  

Re: [PATCH v3 net-next 00/17] PTP support for the SJA1105 DSA driver

2019-06-05 Thread Richard Cochran
On Wed, Jun 05, 2019 at 09:08:54PM +0300, Vladimir Oltean wrote:
> Currently I'm using a cyclecounter, but I *will* need actual PHC
> manipulations for the time-based shaping and policing features that
> the switch has in hardware.

Okay.

> On the other hand I get much tighter sync
> offset using the free-running counter than with hardware-corrected
> timestamps.

Why?  The time stamps come from the very same counter, don't they?

> So as far as I see it, I'll need to have two sets of
> operations.

I doubt very much that this will work well.

> How should I design such a dual-PHC device driver? Just register two
> separate clocks, one for the timestamping counter, the other for the
> scheduling/policing PTP clock, and have phc2sys keep them in sync
> externally to the driver?

But how would phc2sys do this?  By comparing clock_gettime() values?
That would surely introduce unnecessary time error.

> Or implement the hardware corrections
> alongside the timecounter ones, and expose a single PHC (and for
> clock_gettime, just pick one of the time sources)?

I would implement the hardware clock and drop the timecounter
altogether.

HTH,
Richard


[PATCH] ftrace: fix NULL pointer dereference in free_ftrace_func_mapper()

2019-06-05 Thread Wei Li
The mapper may be NULL when called from register_ftrace_function_probe()
with probe->data == NULL.

This issue can be reproduced as follow (it may be coverd by compiler
optimization sometime):

/ # cat /sys/kernel/debug/tracing/set_ftrace_filter 
 all functions enabled 
/ # echo foo_bar:dump > /sys/kernel/debug/tracing/set_ftrace_filter 
[  206.949100] Unable to handle kernel NULL pointer dereference at virtual 
address 
[  206.952402] Mem abort info:
[  206.952819]   ESR = 0x9606
[  206.955326]   Exception class = DABT (current EL), IL = 32 bits
[  206.955844]   SET = 0, FnV = 0
[  206.956272]   EA = 0, S1PTW = 0
[  206.956652] Data abort info:
[  206.957320]   ISV = 0, ISS = 0x0006
[  206.959271]   CM = 0, WnR = 0
[  206.959938] user pgtable: 4k pages, 48-bit VAs, pgdp=000419f3a000
[  206.960483] [] pgd=000411a87003, pud=000411a83003, 
pmd=
[  206.964953] Internal error: Oops: 9606 [#1] SMP
[  206.971122] Dumping ftrace buffer:
[  206.973677](ftrace buffer empty)
[  206.975258] Modules linked in:
[  206.976631] Process sh (pid: 281, stack limit = 0x(ptrval))
[  206.978449] CPU: 10 PID: 281 Comm: sh Not tainted 5.2.0-rc1+ #17
[  206.978955] Hardware name: linux,dummy-virt (DT)
[  206.979883] pstate: 6005 (nZCv daif -PAN -UAO)
[  206.980499] pc : free_ftrace_func_mapper+0x2c/0x118
[  206.980874] lr : ftrace_count_free+0x68/0x80
[  206.982539] sp : 182f3ab0
[  206.983102] x29: 182f3ab0 x28: 8003d0ec1700 
[  206.983632] x27: 13054b40 x26: 0001 
[  206.984000] x25: 1385f000 x24:  
[  206.984394] x23: 13453000 x22: 13054000 
[  206.984775] x21:  x20: 1385fe28 
[  206.986575] x19: 13872c30 x18:  
[  206.987111] x17:  x16:  
[  206.987491] x15: ffb0 x14:  
[  206.987850] x13: 0017430e x12: 0580 
[  206.988251] x11:  x10:  
[  206.988740] x9 :  x8 : 13917550 
[  206.990198] x7 : 12fac2e8 x6 : 12fac000 
[  206.991008] x5 : 103da588 x4 : 0001 
[  206.991395] x3 : 0001 x2 : 13872a28 
[  206.991771] x1 :  x0 :  
[  206.992557] Call trace:
[  206.993101]  free_ftrace_func_mapper+0x2c/0x118
[  206.994827]  ftrace_count_free+0x68/0x80
[  206.995238]  release_probe+0xfc/0x1d0
[  206.99]  register_ftrace_function_probe+0x4a8/0x868
[  206.995923]  ftrace_trace_probe_callback.isra.4+0xb8/0x180
[  206.996330]  ftrace_dump_callback+0x50/0x70
[  206.996663]  ftrace_regex_write.isra.29+0x290/0x3a8
[  206.997157]  ftrace_filter_write+0x44/0x60
[  206.998971]  __vfs_write+0x64/0xf0
[  206.999285]  vfs_write+0x14c/0x2f0
[  206.999591]  ksys_write+0xbc/0x1b0
[  206.999888]  __arm64_sys_write+0x3c/0x58
[  207.000246]  el0_svc_common.constprop.0+0x408/0x5f0
[  207.000607]  el0_svc_handler+0x144/0x1c8
[  207.000916]  el0_svc+0x8/0xc
[  207.003699] Code: aa0003f8 a9025bf5 aa0103f5 f946ea80 (f9400303) 
[  207.008388] ---[ end trace 7b6d11b5f542bdf1 ]---
[  207.010126] Kernel panic - not syncing: Fatal exception
[  207.011322] SMP: stopping secondary CPUs
[  207.013956] Dumping ftrace buffer:
[  207.014595](ftrace buffer empty)
[  207.015632] Kernel Offset: disabled
[  207.017187] CPU features: 0x002,20006008
[  207.017985] Memory Limit: none
[  207.019825] ---[ end Kernel panic - not syncing: Fatal exception ]---

Signed-off-by: Wei Li 
---
 kernel/trace/ftrace.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a12aff849c04..7e2488da69ac 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4221,10 +4221,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper 
*mapper,
struct ftrace_func_entry *entry;
struct ftrace_func_map *map;
struct hlist_head *hhd;
-   int size = 1 << mapper->hash.size_bits;
-   int i;
+   int size, i;
+
+   if (!mapper)
+   return;
 
if (free_func && mapper->hash.count) {
+   size = 1 << mapper->hash.size_bits;
for (i = 0; i < size; i++) {
hhd = >hash.buckets[i];
hlist_for_each_entry(entry, hhd, hlist) {
-- 
2.17.1



Re: [BISECTED REGRESSION] b43legacy broken on G4 PowerBook

2019-06-05 Thread Larry Finger

On 6/5/19 5:50 PM, Aaro Koskinen wrote:

Hi,

When upgrading from v5.0 -> v5.1 on G4 PowerBook, I noticed WLAN does
not work anymore:

[   42.004303] b43legacy-phy0: Loading firmware version 0x127, patch level 14 
(2005-04-18 02:36:27)
[   42.184837] b43legacy-phy0 debug: Chip initialized
[   42.184873] b43legacy-phy0 ERROR: The machine/kernel does not support the 
required 30-bit DMA mask

The same happens with the current mainline.

Bisected to:

commit 65a21b71f948406201e4f62e41f06513350ca390
Author: Christoph Hellwig 
Date:   Wed Feb 13 08:01:26 2019 +0100

powerpc/dma: remove dma_nommu_dma_supported

This function is largely identical to the generic version used
everywhere else.  Replace it with the generic version.

Signed-off-by: Christoph Hellwig 
Tested-by: Christian Zigotzky 
Signed-off-by: Michael Ellerman 


Aaro,

First of all, you have my sympathy for the laborious bisection on a PowerBook 
G4. I have done several myself. Thank you.


I confirm your results.

The ppc code has a maximum DMA size of 31 bits, thus a 32-bit request will fail. 
Why the 30-bit fallback fails in b43legacy fails while it works in b43 is a mystery.


Although dma_nommu_dma_supported() may be "largely identical" to 
dma_direct_supported(), they obviously differ. Routine dma_nommu_dma_supported() 
returns 1 for 32-bit systems, but I do not know what dma_direct_supported() returns.


I am trying to find a patch.

Larry


RE: [RFC PATCH v2 1/3] vfio: Use capability chains to handle device specific irq

2019-06-05 Thread Tian, Kevin
> From: kra...@redhat.com
> Sent: Wednesday, June 5, 2019 6:10 PM
> 
>   Hi,
> 
> > > Really need to split for different planes? I'd like a
> > > VFIO_IRQ_SUBTYPE_GFX_DISPLAY_EVENT
> > > so user space can probe change for all.
> 
> > User space can choose to user different handlers according to the
> > specific event. For example, user space might not want to handle every
> > cursor event due to performance consideration. Besides, it can reduce
> > the probe times, as we don't need to probe twice to make sure if both
> > cursor plane and primary plane have been updated.
> 
> I'd suggest to use the value passed via eventfd for that, i.e. instead
> of sending "1" unconditionally send a mask of changed planes.
> 

sounds reasonable.


[PATCH V1 2/6] USB: serial: f81232: Force F81534A with RS232 mode

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
Force F81534A series UARTs with RS232 mode in port_probe().

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 84efcc66aa56..75dfc0b9ef30 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -83,12 +83,22 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define F81232_F81232_TYPE 1
 #define F81232_F81534A_TYPE2
 
+/* Serial port self GPIO control, 2bytes [control data][input data] */
+#define F81534A_GPIO_REG   0x10e
+#define F81534A_GPIO_MODE2_DIR BIT(6) /* 1: input, 0: output */
+#define F81534A_GPIO_MODE1_DIR BIT(5)
+#define F81534A_GPIO_MODE0_DIR BIT(4)
+#define F81534A_GPIO_MODE2_OUTPUT  BIT(2)
+#define F81534A_GPIO_MODE1_OUTPUT  BIT(1)
+#define F81534A_GPIO_MODE0_OUTPUT  BIT(0)
+
 struct f81232_private {
struct mutex lock;
u8 modem_control;
u8 modem_status;
u8 shadow_lcr;
u8 device_type;
+   u8 gpio_mode;
speed_t baud_base;
struct work_struct lsr_work;
struct work_struct interrupt_work;
@@ -871,6 +881,11 @@ static int f81232_port_probe(struct usb_serial_port *port)
switch (priv->device_type) {
case F81232_F81534A_TYPE:
priv->process_read_urb = f81534a_process_read_urb;
+   priv->gpio_mode = F81534A_GPIO_MODE2_DIR;
+
+   /* tri-state with pull-high, default RS232 Mode */
+   status = f81232_set_register(port, F81534A_GPIO_REG,
+   priv->gpio_mode);
break;
 
case F81232_F81232_TYPE:
-- 
2.7.4



Re: [PATCH] sched/fair: Introduce fits_capacity()

2019-06-05 Thread Viresh Kumar
On 04-06-19, 08:59, Peter Oskolkov wrote:
> On Tue, Jun 4, 2019 at 12:02 AM Viresh Kumar  wrote:
> >
> > The same formula to check utilization against capacity (after
> > considering capacity_margin) is already used at 5 different locations.
> >
> > This patch creates a new macro, fits_capacity(), which can be used from
> > all these locations without exposing the details of it and hence
> > simplify code.
> >
> > All the 5 code locations are updated as well to use it..
> >
> > Signed-off-by: Viresh Kumar 
> > ---
> >  kernel/sched/fair.c | 14 +++---
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 7f8d477f90fe..db3a218b7928 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)
> >   * (default: ~20%)
> >   */
> >  static unsigned int capacity_margin= 1280;
> > +
> > +#define fits_capacity(cap, max)((cap) * capacity_margin < (max) * 
> > 1024)
> 
> Any reason to have this as a macro and not as an inline function?

I don't have any strong preference here, I used a macro as I didn't
feel that type-checking is really required on the parameters and
eventually this will get open coded anyway.

Though I would be fine to make it a routine if maintainers want it
that way.

Thanks Peter.

-- 
viresh


[PATCH V1 5/6] USB: serial: f81232: Use devm_kzalloc

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
Use devm_kzalloc() to replace kzalloc().

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 13 +
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 7d1ec8f9d168..708d85c7d822 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -1198,7 +1198,7 @@ static int f81232_port_probe(struct usb_serial_port *port)
struct f81232_private *priv;
int status = 0;
 
-   priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+   priv = devm_kzalloc(>dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
 
@@ -1234,16 +1234,6 @@ static int f81232_port_probe(struct usb_serial_port 
*port)
return status;
 }
 
-static int f81232_port_remove(struct usb_serial_port *port)
-{
-   struct f81232_private *priv;
-
-   priv = usb_get_serial_port_data(port);
-   kfree(priv);
-
-   return 0;
-}
-
 static int f81232_suspend(struct usb_serial *serial, pm_message_t message)
 {
struct usb_serial_port *port = serial->port[0];
@@ -1301,7 +1291,6 @@ static struct usb_serial_driver f81232_device = {
.process_read_urb = f81232_read_urb_proxy,
.read_int_callback =f81232_read_int_callback,
.port_probe =   f81232_port_probe,
-   .port_remove =  f81232_port_remove,
.suspend =  f81232_suspend,
.resume =   f81232_resume,
 };
-- 
2.7.4



[PATCH V1 6/6] USB: serial: f81232: Add gpiolib to GPIO device

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
The Fintek F81534A series contains 3 GPIOs per UART and The max GPIOs
is 12x3 = 36 GPIOs.

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 210 
 1 file changed, 210 insertions(+)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 708d85c7d822..a53240bc164a 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -132,6 +133,7 @@ struct f81232_private {
 
 struct f81534a_ctrl_private {
struct usb_interface *intf;
+   struct gpio_chip chip;
struct mutex lock;
int device_idx;
 };
@@ -1007,6 +1009,204 @@ static int f81534a_ctrl_set_register(struct usb_device 
*dev, u16 reg, u16 size,
return status;
 }
 
+static int f81534a_ctrl_set_mask_register(struct usb_device *dev, u16 reg,
+   u8 mask, u8 val)
+{
+   int status;
+   u8 tmp;
+
+   status = f81534a_ctrl_get_register(dev, reg, 1, );
+   if (status)
+   return status;
+
+
+   tmp = (tmp & ~mask) | (val & mask);
+
+   status = f81534a_ctrl_set_register(dev, reg, 1, );
+   if (status)
+   return status;
+
+   return 0;
+}
+
+#ifdef CONFIG_GPIOLIB
+static int f81534a_gpio_get(struct gpio_chip *chip, unsigned int gpio_num)
+{
+   struct f81534a_ctrl_private *priv = gpiochip_get_data(chip);
+   struct usb_interface *intf = priv->intf;
+   struct usb_device *dev = interface_to_usbdev(intf);
+   int status;
+   u8 tmp[2];
+   int set;
+   int idx;
+
+   set = gpio_num / F81534A_CTRL_GPIO_MAX_PIN;
+   idx = gpio_num % F81534A_CTRL_GPIO_MAX_PIN;
+
+   status = mutex_lock_interruptible(>lock);
+   if (status)
+   return -EINTR;
+
+   status = f81534a_ctrl_get_register(dev, F81534A_CTRL_GPIO_REG + set,
+   sizeof(tmp), tmp);
+   if (status) {
+   mutex_unlock(>lock);
+   return status;
+   }
+
+   mutex_unlock(>lock);
+
+   return !!(tmp[1] & BIT(idx));
+}
+
+static int f81534a_gpio_direction_in(struct gpio_chip *chip,
+   unsigned int gpio_num)
+{
+   struct f81534a_ctrl_private *priv = gpiochip_get_data(chip);
+   struct usb_interface *intf = priv->intf;
+   struct usb_device *dev = interface_to_usbdev(intf);
+   int status;
+   int set;
+   int idx;
+   u8 mask;
+
+   set = gpio_num / F81534A_CTRL_GPIO_MAX_PIN;
+   idx = gpio_num % F81534A_CTRL_GPIO_MAX_PIN;
+   mask = F81534A_GPIO_MODE0_DIR << idx;
+
+   status = mutex_lock_interruptible(>lock);
+   if (status)
+   return -EINTR;
+
+   status = f81534a_ctrl_set_mask_register(dev, F81534A_CTRL_GPIO_REG +
+   set, mask, mask);
+   if (status) {
+   mutex_unlock(>lock);
+   return status;
+   }
+
+   mutex_unlock(>lock);
+
+   return 0;
+}
+
+static int f81534a_gpio_direction_out(struct gpio_chip *chip,
+unsigned int gpio_num, int val)
+{
+   struct f81534a_ctrl_private *priv = gpiochip_get_data(chip);
+   struct usb_interface *intf = priv->intf;
+   struct usb_device *dev = interface_to_usbdev(intf);
+   int status;
+   int set;
+   int idx;
+   u8 mask;
+   u8 data;
+
+   set = gpio_num / F81534A_CTRL_GPIO_MAX_PIN;
+   idx = gpio_num % F81534A_CTRL_GPIO_MAX_PIN;
+   mask = (F81534A_GPIO_MODE0_DIR << idx) | BIT(idx);
+   data = val ? BIT(idx) : 0;
+
+   status = mutex_lock_interruptible(>lock);
+   if (status)
+   return -EINTR;
+
+   status = f81534a_ctrl_set_mask_register(dev, F81534A_CTRL_GPIO_REG +
+   set, mask, data);
+   if (status) {
+   mutex_unlock(>lock);
+   return status;
+   }
+
+   mutex_unlock(>lock);
+
+   return 0;
+}
+
+static void f81534a_gpio_set(struct gpio_chip *chip, unsigned int gpio_num,
+   int val)
+{
+   f81534a_gpio_direction_out(chip, gpio_num, val);
+}
+
+static int f81534a_gpio_get_direction(struct gpio_chip *chip,
+   unsigned int gpio_num)
+{
+   struct f81534a_ctrl_private *priv = gpiochip_get_data(chip);
+   struct usb_interface *intf = priv->intf;
+   struct usb_device *dev = interface_to_usbdev(intf);
+   int status;
+   u8 tmp[2];
+   int set;
+   int idx;
+   u8 mask;
+
+   set = gpio_num / F81534A_CTRL_GPIO_MAX_PIN;
+   idx = gpio_num % F81534A_CTRL_GPIO_MAX_PIN;
+   mask = F81534A_GPIO_MODE0_DIR << idx;
+
+   status = mutex_lock_interruptible(>lock);
+   if (status)
+   return -EINTR;
+
+   status = f81534a_ctrl_get_register(dev, F81534A_CTRL_GPIO_REG 

[PATCH V1 1/6] USB: serial: f81232: Add F81534A support

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
The Fintek F81532A/534A/535/536 is USB-to-2/4/8/12 serial ports device.
It's most same with F81232, the UART device is difference as follow:
1. TX/RX bulk size is 128/512bytes
2. RX bulk layout change:
F81232: [LSR(1Byte)+DATA(1Byte)][LSR(1Byte)+DATA(1Byte)]...
F81534A:[LEN][Data.][LSR]

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 153 +---
 1 file changed, 144 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 43fa1f0716b7..84efcc66aa56 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Fintek F81232 USB to serial adaptor driver
+ * Fintek F81532A/534A/535/536 USB to 2/4/8/12 serial adaptor driver
  *
  * Copyright (C) 2012 Greg Kroah-Hartman (gre...@linuxfoundation.org)
  * Copyright (C) 2012 Linux Foundation
@@ -22,7 +23,20 @@
 #include 
 
 static const struct usb_device_id id_table[] = {
+   /* F81232 */
{ USB_DEVICE(0x1934, 0x0706) },
+
+   /* F81532A/534A/535/536 */
+   { USB_DEVICE(0x2c42, 0x1602) }, /* In-Box 2 port UART device */
+   { USB_DEVICE(0x2c42, 0x1604) }, /* In-Box 4 port UART device */
+   { USB_DEVICE(0x2c42, 0x1605) }, /* In-Box 8 port UART device */
+   { USB_DEVICE(0x2c42, 0x1606) }, /* In-Box 12 port UART device */
+   { USB_DEVICE(0x2c42, 0x1608) }, /* Non-Flash type */
+
+   { USB_DEVICE(0x2c42, 0x1632) }, /* 2 port UART device */
+   { USB_DEVICE(0x2c42, 0x1634) }, /* 4 port UART device */
+   { USB_DEVICE(0x2c42, 0x1635) }, /* 8 port UART device */
+   { USB_DEVICE(0x2c42, 0x1636) }, /* 12 port UART device */
{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, id_table);
@@ -61,15 +75,25 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define F81232_CLK_14_77_MHZ   (BIT(1) | BIT(0))
 #define F81232_CLK_MASKGENMASK(1, 0)
 
+#define F81534A_MODE_CONF_REG  0x107
+#define F81534A_TRIGGER_MASK   GENMASK(3, 2)
+#define F81534A_TRIGGER_MULTPILE_4XBIT(3)
+#define F81534A_FIFO_128BYTE   (BIT(1) | BIT(0))
+
+#define F81232_F81232_TYPE 1
+#define F81232_F81534A_TYPE2
+
 struct f81232_private {
struct mutex lock;
u8 modem_control;
u8 modem_status;
u8 shadow_lcr;
+   u8 device_type;
speed_t baud_base;
struct work_struct lsr_work;
struct work_struct interrupt_work;
struct usb_serial_port *port;
+   void (*process_read_urb)(struct urb *urb);
 };
 
 static u32 const baudrate_table[] = { 115200, 921600, 1152000, 150 };
@@ -376,6 +400,78 @@ static void f81232_process_read_urb(struct urb *urb)
tty_flip_buffer_push(>port);
 }
 
+static void f81534a_process_read_urb(struct urb *urb)
+{
+   struct usb_serial_port *port = urb->context;
+   struct f81232_private *priv = usb_get_serial_port_data(port);
+   unsigned char *data = urb->transfer_buffer;
+   char tty_flag;
+   unsigned int i;
+   u8 lsr;
+   u8 len;
+
+   if (urb->status) {
+   dev_err(>dev, "urb->status: %d\n", urb->status);
+   return;
+   }
+
+   if (!urb->actual_length) {
+   dev_err(>dev, "urb->actual_length == 0\n");
+   return;
+   }
+
+   len = data[0];
+   if (len != urb->actual_length) {
+   dev_err(>dev, "len(%d) != urb->actual_length(%d)\n", len,
+   urb->actual_length);
+   return;
+   }
+
+   /* bulk-in data: [LEN][Data.][LSR] */
+   tty_flag = TTY_NORMAL;
+
+   lsr = data[len - 1];
+   if (lsr & UART_LSR_BRK_ERROR_BITS) {
+   if (lsr & UART_LSR_BI) {
+   tty_flag = TTY_BREAK;
+   port->icount.brk++;
+   usb_serial_handle_break(port);
+   } else if (lsr & UART_LSR_PE) {
+   tty_flag = TTY_PARITY;
+   port->icount.parity++;
+   } else if (lsr & UART_LSR_FE) {
+   tty_flag = TTY_FRAME;
+   port->icount.frame++;
+   }
+
+   if (lsr & UART_LSR_OE) {
+   port->icount.overrun++;
+   schedule_work(>lsr_work);
+   tty_insert_flip_char(>port, 0, TTY_OVERRUN);
+   }
+   }
+
+   for (i = 1; i < urb->actual_length - 1; i++) {
+   if (port->port.console && port->sysrq) {
+   if (usb_serial_handle_sysrq_char(port, data[i]))
+   continue;
+   }
+
+   tty_insert_flip_char(>port, data[i], tty_flag);
+   }
+
+   

[PATCH V1 3/6] USB: serial: f81232: Add generator for F81534A

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
The Fintek F81534A series is contains 1 HUB / 1 GPIO device / n UARTs,
but the UART is default disable and need enabled by GPIO device(2c42/16F8).
When F81534A plug to host, we can only see 1 HUB & 1 GPIO device, add
GPIO device USB interface to device_list and trigger generate worker,
f81534a_generate_worker to run f81534a_ctrl_generate_ports().

The operation in f81534a_ctrl_generate_ports() as following:
1: Write 0x8fff to F81534A_CMD_ENABLE_PORT register for enable all
   UART device.

2: Read port existence & current status from F81534A_CMD_PORT_STATUS
   register. the higher 16bit will indicate the UART existence. If the
   UART is existence, we'll check it GPIO mode as long as not default
   value (default is all input mode).

3: 1 GPIO device will check with max 15s and check next GPIO device when
   timeout. (F81534A_CTRL_RETRY * F81534A_CTRL_TIMER)

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 356 +++-
 1 file changed, 355 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 75dfc0b9ef30..e9470fb0d691 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -41,6 +41,12 @@ static const struct usb_device_id id_table[] = {
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
+static const struct usb_device_id f81534a_ctrl_id_table[] = {
+   { USB_DEVICE(0x2c42, 0x16f8) }, /* Global control device */
+   { } /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, f81534a_ctrl_id_table);
+
 /* Maximum baudrate for F81232 */
 #define F81232_MAX_BAUDRATE150
 #define F81232_DEF_BAUDRATE9600
@@ -49,6 +55,10 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define F81232_REGISTER_REQUEST0xa0
 #define F81232_GET_REGISTER0xc0
 #define F81232_SET_REGISTER0x40
+#define F81534A_REGISTER_REQUEST   F81232_REGISTER_REQUEST
+#define F81534A_GET_REGISTER   F81232_GET_REGISTER
+#define F81534A_SET_REGISTER   F81232_SET_REGISTER
+#define F81534A_ACCESS_REG_RETRY   2
 
 #define SERIAL_BASE_ADDRESS0x0120
 #define RECEIVE_BUFFER_REGISTER(0x00 + SERIAL_BASE_ADDRESS)
@@ -83,6 +93,10 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define F81232_F81232_TYPE 1
 #define F81232_F81534A_TYPE2
 
+#define F81534A_MAX_PORT   12
+#define F81534A_CTRL_TIMER 1000
+#define F81534A_CTRL_RETRY 15
+
 /* Serial port self GPIO control, 2bytes [control data][input data] */
 #define F81534A_GPIO_REG   0x10e
 #define F81534A_GPIO_MODE2_DIR BIT(6) /* 1: input, 0: output */
@@ -92,6 +106,16 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define F81534A_GPIO_MODE1_OUTPUT  BIT(1)
 #define F81534A_GPIO_MODE0_OUTPUT  BIT(0)
 
+#define F81534A_CMD_ENABLE_PORT0x116
+#define F81534A_CMD_PORT_STATUS0x117
+
+/*
+ * Control device global GPIO control,
+ * 2bytes [control data][input data]
+ */
+#define F81534A_CTRL_GPIO_REG  0x1601
+#define F81534A_CTRL_GPIO_MAX_PIN  3
+
 struct f81232_private {
struct mutex lock;
u8 modem_control;
@@ -106,10 +130,27 @@ struct f81232_private {
void (*process_read_urb)(struct urb *urb);
 };
 
+struct f81534a_ctrl_private {
+   struct usb_interface *intf;
+   struct mutex lock;
+   int device_idx;
+};
+
+struct f81534a_device {
+   struct list_head list;
+   struct usb_interface *intf;
+   int check_index;
+   int check_retry;
+};
+
 static u32 const baudrate_table[] = { 115200, 921600, 1152000, 150 };
 static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ,
F81232_CLK_18_46_MHZ, F81232_CLK_24_MHZ };
 
+struct delayed_work f81534a_generate_worker;
+static DEFINE_MUTEX(device_mutex);
+static LIST_HEAD(device_list);
+
 static int calc_baud_divisor(speed_t baudrate, speed_t clockrate)
 {
if (!baudrate)
@@ -859,6 +900,281 @@ static void f81232_lsr_worker(struct work_struct *work)
dev_warn(>dev, "read LSR failed: %d\n", status);
 }
 
+static int f81534a_ctrl_get_register(struct usb_device *dev, u16 reg, u16 size,
+   void *val)
+{
+   int retry = F81534A_ACCESS_REG_RETRY;
+   int status;
+   u8 *tmp;
+
+   tmp = kmalloc(size, GFP_KERNEL);
+   if (!tmp)
+   return -ENOMEM;
+
+   while (retry--) {
+   status = usb_control_msg(dev,
+   usb_rcvctrlpipe(dev, 0),
+   F81534A_REGISTER_REQUEST,
+   F81534A_GET_REGISTER,
+   reg,
+   0,
+   

[PATCH V1 4/6] USB: serial: f81232: Add tx_empty function

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
Add tx_empty() function for F81232 & F81534A series.

Signed-off-by: Ji-Ze Hong (Peter Hong) 
---
 drivers/usb/serial/f81232.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index e9470fb0d691..7d1ec8f9d168 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -850,6 +850,24 @@ static void f81232_dtr_rts(struct usb_serial_port *port, 
int on)
f81232_set_mctrl(port, 0, TIOCM_DTR | TIOCM_RTS);
 }
 
+static bool f81232_tx_empty(struct usb_serial_port *port)
+{
+   int status;
+   u8 tmp;
+   u8 both_empty = UART_LSR_TEMT | UART_LSR_THRE;
+
+   status = f81232_get_register(port, LINE_STATUS_REGISTER, );
+   if (status) {
+   dev_err(>dev, "get LSR status failed: %d\n", status);
+   return false;
+   }
+
+   if ((tmp & both_empty) != both_empty)
+   return false;
+
+   return true;
+}
+
 static int f81232_carrier_raised(struct usb_serial_port *port)
 {
u8 msr;
@@ -1279,6 +1297,7 @@ static struct usb_serial_driver f81232_device = {
.tiocmget = f81232_tiocmget,
.tiocmset = f81232_tiocmset,
.tiocmiwait =   usb_serial_generic_tiocmiwait,
+   .tx_empty = f81232_tx_empty,
.process_read_urb = f81232_read_urb_proxy,
.read_int_callback =f81232_read_int_callback,
.port_probe =   f81232_port_probe,
-- 
2.7.4



[PATCH V1 0/6] USB: serial: f81232: Add F81534A support

2019-06-05 Thread Ji-Ze Hong (Peter Hong)
This series patches will add Fintek F81532A/534A/535/536 support and
refactoring some source code.

The Fintek F81532A/534A/535/536 is USB-to-2/4/8/12 serial ports device.
It cotains a HUB, a GPIO device and 2/4/8/12 serial ports. The F81534A
series will default enable only HUB & GPIO device when plugged and disable
UARTs as default. We need control GPIO device to enable serial port with
special sequence.

The most serial port features of F81534A series is same with F81232.
That's the difference with following:
1. More RX FIFO and cache. (128byte FIFO + max to 128bytes*4 cache)
2. up to 3MBits baudrate.
3. 3x GPIOs per port to control transceiver.
4. UART devices need enabled by GPIO device register.

Ji-Ze Hong (Peter Hong) (6):
  USB: serial: f81232: Add F81534A support
  USB: serial: f81232: Force F81534A with RS232 mode
  USB: serial: f81232: Add generator for F81534A
  USB: serial: f81232: Add tx_empty function
  USB: serial: f81232: Use devm_kzalloc
  USB: serial: f81232: Add gpiolib to GPIO device

 drivers/usb/serial/f81232.c | 760 ++--
 1 file changed, 741 insertions(+), 19 deletions(-)

-- 
2.7.4



Re: [PATCH v6 09/10] usb: roles: add USB Type-B GPIO connector driver

2019-06-05 Thread Chunfeng Yun
On Wed, 2019-06-05 at 11:45 +0300, Andy Shevchenko wrote:
> On Wed, May 29, 2019 at 10:44 AM Chunfeng Yun  
> wrote:
> >
> > Due to the requirement of usb-connector.txt binding, the old way
> > using extcon to support USB Dual-Role switch is now deprecated
> > when use Type-B connector.
> > This patch introduces a driver of Type-B connector which typically
> > uses an input GPIO to detect USB ID pin, and try to replace the
> > function provided by extcon-usb-gpio driver
> 
> > +static SIMPLE_DEV_PM_OPS(usb_conn_pm_ops,
> > +usb_conn_suspend, usb_conn_resume);
> > +
> > +#define DEV_PMS_OPS (IS_ENABLED(CONFIG_PM_SLEEP) ? _conn_pm_ops : NULL)
> 
> Why this macro is needed?
Want to set .pm as NULL when CONFIG_PM_SLEEP is not enabled.

Thanks
> 




[RESEND][PATCH v3 2/2] usb: dwc3: Add workaround for host mode VBUS glitch when boot

2019-06-05 Thread Ran Wang
When DWC3 is set to host mode by programming register DWC3_GCTL, VBUS
(or its control signal) will be turned on immediately on related Root Hub
ports. Then, the VBUS is turned off for a little while(15us) when do xhci
reset (conducted by xhci driver) and back to normal finally, we can
observe a negative glitch of related signal happen.

This VBUS glitch might cause some USB devices enumeration fail if kernel
boot with them connected. Such as LS1012AFWRY/LS1043ARDB/LX2160AQDS
/LS1088ARDB with Kingston 16GB USB2.0/Kingston USB3.0/JetFlash Transcend
4GB USB2.0 drives. The fail cases include enumerated as full-speed device
or report wrong device descriptor, etc.

One SW workaround which can fix this is by programing all xhci PORTSC[PP]
to 0 to turn off VBUS immediately after setting host mode in DWC3 driver
(per signal measurement result, it will be too late to do it in
xhci-plat.c or xhci.c). Then, after xhci reset complete in xhci driver,
PORTSC[PP]s' value will back to 1 automatically and VBUS on at that time,
no glitch happen and normal enumeration process has no impact.

Signed-off-by: Ran Wang 
---
Changes in v3:
  - Add macro PORT_REGS_SIZE to replace 0x10
  - Change initial value of i to 0 for the for loop
  - Cosmetic changes

Changes in v2:
  - Rename related property to 'snps,host-vbus-glitches'
  - Rename related dwc member to 'host_vbus_glitches'
  - Add member 'host_vbus_glitches' description in 'dwc3'

 drivers/usb/dwc3/core.c |   48 +++
 drivers/usb/dwc3/core.h |   12 +++
 2 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index a1b126f..dd80e3d 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -100,6 +100,42 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc)
return 0;
 }
 
+/*
+ * dwc3_power_of_all_roothub_ports - Power off all Root hub ports
+ * @dwc3: Pointer to our controller context structure
+ */
+static void dwc3_power_off_all_roothub_ports(struct dwc3 *dwc)
+{
+   int i, port_num;
+   u32 reg, op_regs_base, offset;
+   void __iomem*xhci_regs;
+
+   /* xhci regs is not mapped yet, do it temperary here */
+   if (dwc->xhci_resources[0].start) {
+   xhci_regs = ioremap(dwc->xhci_resources[0].start,
+   DWC3_XHCI_REGS_END);
+   if (IS_ERR(xhci_regs)) {
+   dev_err(dwc->dev, "Failed to ioremap xhci_regs\n");
+   return;
+   }
+
+   op_regs_base = HC_LENGTH(readl(xhci_regs));
+   reg = readl(xhci_regs + XHCI_HCSPARAMS1);
+   port_num = HCS_MAX_PORTS(reg);
+
+   for (i = 0; i < port_num; i++) {
+   offset = op_regs_base + XHCI_PORTSC_BASE +
+   PORT_REGS_SIZE * i;
+   reg = readl(xhci_regs + offset);
+   reg &= ~PORT_POWER;
+   writel(reg, xhci_regs + offset);
+   }
+
+   iounmap(xhci_regs);
+   } else
+   dev_err(dwc->dev, "xhci base reg invalid\n");
+}
+
 void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
 {
u32 reg;
@@ -109,6 +145,15 @@ void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
reg |= DWC3_GCTL_PRTCAPDIR(mode);
dwc3_writel(dwc->regs, DWC3_GCTL, reg);
 
+   /*
+* We have to power off all Root hub ports immediately after DWC3 set
+* to host mode to avoid VBUS glitch happen when xhci get reset later.
+*/
+   if (dwc->host_vbus_glitches) {
+   if (mode == DWC3_GCTL_PRTCAP_HOST)
+   dwc3_power_off_all_roothub_ports(dwc);
+   }
+
dwc->current_dr_role = mode;
 }
 
@@ -1306,6 +1351,9 @@ static void dwc3_get_properties(struct dwc3 *dwc)
dwc->dis_metastability_quirk = device_property_read_bool(dev,
"snps,dis_metastability_quirk");
 
+   dwc->host_vbus_glitches = device_property_read_bool(dev,
+   "snps,host-vbus-glitches");
+
dwc->lpm_nyet_threshold = lpm_nyet_threshold;
dwc->tx_de_emphasis = tx_de_emphasis;
 
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index df87641..c2dee0b 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -606,6 +606,15 @@
 #define DWC3_OSTS_VBUSVLD  BIT(1)
 #define DWC3_OSTS_CONIDSTS BIT(0)
 
+/* Partial XHCI Register and Bit fields for quirk */
+#define XHCI_HCSPARAMS10x4
+#define XHCI_PORTSC_BASE   0x400
+#define PORT_REGS_SIZE 0x10
+#define PORT_POWER (1 << 9)
+#define HCS_MAX_PORTS(p)   (((p) >> 24) & 0x7f)
+#define XHCI_HC_LENGTH(p)  (((p)>>00)&0x00ff)
+#define HC_LENGTH(p)   XHCI_HC_LENGTH(p)
+
 /* Structures */
 
 struct dwc3_trb;
@@ -1024,6 +1033,8 @@ struct dwc3_scratchpad_array {
  * 2   

[RESEND][PATCH v3 1/2] usb: dwc3: Add avoiding vbus glitch happen during xhci reset

2019-06-05 Thread Ran Wang
When DWC3 is set to host mode by programming register DWC3_GCTL, VBUS
(or its control signal) will turn on immediately on related Root Hub
ports. Then the VBUS will be de-asserted for a little while during xhci
reset (conducted by xhci driver) for a little while and back to normal.

This VBUS glitch might cause some USB devices emuration fail if kernel
boot with them connected. One SW workaround which can fix this is to
program all PORTSC[PP] to 0 to turn off VBUS immediately after setting
host mode in DWC3 driver(per signal measurement result, it will be too
late to do it in xhci-plat.c or xhci.c).

Signed-off-by: Ran Wang 
Reviewed-by: Rob Herring 
---
Changes in v3:
  - None

Changes in v2:
  - Correct typos
  - Shorten the name to snps,host-vbus-glitches

 Documentation/devicetree/bindings/usb/dwc3.txt |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt 
b/Documentation/devicetree/bindings/usb/dwc3.txt
index 8e5265e..453f562 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -106,6 +106,9 @@ Optional properties:
When just one value, which means INCRX burst mode 
enabled. When
more than one value, which means undefined length INCR 
burst type
enabled. The values can be 1, 4, 8, 16, 32, 64, 128 and 
256.
+ - snps,host-vbus-glitches: Power off all Root Hub ports immediately after
+   setting host mode to avoid vbus (negative) glitch 
happen in later
+   xhci reset. And the vbus will back to 5V automatically 
when reset done.
 
  - in addition all properties from usb-xhci.txt from the current directory are
supported as well
-- 
1.7.1



Re: [PATCH] sched/fair: Introduce fits_capacity()

2019-06-05 Thread Viresh Kumar
On 05-06-19, 10:16, Quentin Perret wrote:
> Hi Viresh,
> 
> On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote:
> > The same formula to check utilization against capacity (after
> > considering capacity_margin) is already used at 5 different locations.
> > 
> > This patch creates a new macro, fits_capacity(), which can be used from
> > all these locations without exposing the details of it and hence
> > simplify code.
> > 
> > All the 5 code locations are updated as well to use it..
> > 
> > Signed-off-by: Viresh Kumar 
> > ---
> >  kernel/sched/fair.c | 14 +++---
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 7f8d477f90fe..db3a218b7928 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)
> >   * (default: ~20%)
> >   */
> >  static unsigned int capacity_margin= 1280;
> > +
> > +#define fits_capacity(cap, max)((cap) * capacity_margin < (max) * 1024)
> >  #endif
> >  
> >  #ifdef CONFIG_CFS_BANDWIDTH
> > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct 
> > task_struct *p, bool task_sleep)
> >  
> >  static inline int task_fits_capacity(struct task_struct *p, long capacity)
> >  {
> > -   return capacity * 1024 > task_util_est(p) * capacity_margin;
> > +   return fits_capacity(task_util_est(p), capacity);
> >  }
> >  
> >  static inline void update_misfit_status(struct task_struct *p, struct rq 
> > *rq)
> > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu);
> >  
> >  static inline bool cpu_overutilized(int cpu)
> >  {
> > -   return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
> > +   return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
> 
> This ...
> 
> >  }
> >  
> >  static inline void update_overutilized_status(struct rq *rq)
> > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct 
> > task_struct *p, int prev_cpu)
> > /* Skip CPUs that will be overutilized. */
> > util = cpu_util_next(cpu, p, cpu);
> > cpu_cap = capacity_of(cpu);
> > -   if (cpu_cap * 1024 < util * capacity_margin)
> > +   if (!fits_capacity(util, cpu_cap))
> 
> ... and this isn't _strictly_ equivalent to the existing code but I
> guess we can live with the difference :-)

Yes, I missed the == part it seems. Good catch. Though as you said,
maybe we don't need to take that into account and can live with the
new macro :)

> 
> > continue;
> >  
> > /* Always use prev_cpu as a candidate. */
> > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct 
> > sg_lb_stats *sgs)
> >  static inline bool
> >  group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group 
> > *ref)
> >  {
> > -   return sg->sgc->min_capacity * capacity_margin <
> > -   ref->sgc->min_capacity * 1024;
> > +   return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);
> >  }
> >  
> >  /*
> > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group 
> > *sg, struct sched_group *ref)
> >  static inline bool
> >  group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group 
> > *ref)
> >  {
> > -   return sg->sgc->max_capacity * capacity_margin <
> > -   ref->sgc->max_capacity * 1024;
> > +   return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);
> >  }
> >  
> >  static inline enum
> > -- 
> > 2.21.0.rc0.269.g1a574e7a288b
> > 
> 
> Also, since we're talking about making the capacity_margin code more
> consistent, one small thing I had in mind: we have a capacity margin
> in sugov too, which happens to be 1.25 has well (see map_util_freq()).
> Conceptually, capacity_margin in fair.c and the sugov margin are both
> about answering: "do I have enough CPU capacity to serve X of util, or
> do I need more ?"
> 
> So perhaps we should factorize the capacity_margin code some more to use
> it in both places in a consistent way ? This could be done in a separate
> patch, though.

Hmm, even if the values are same currently I am not sure if we want
the same for ever. I will write a patch for it though, if Peter/Rafael
feel the same as you.

Thanks Quentin.

-- 
viresh


Re: [PATCH V2 1/2] DT: mailbox: add binding doc for the ARM SMC mailbox

2019-06-05 Thread Florian Fainelli



On 6/3/2019 10:18 AM, Andre Przywara wrote:
> On Mon, 3 Jun 2019 17:56:51 +0100
> Sudeep Holla  wrote:
> 
> Hi,
> 
>> On Mon, Jun 03, 2019 at 09:22:16AM -0700, Florian Fainelli wrote:
>>> On 6/3/19 1:30 AM, peng@nxp.com wrote:  
 From: Peng Fan 

 The ARM SMC mailbox binding describes a firmware interface to trigger
 actions in software layers running in the EL2 or EL3 exception levels.
 The term "ARM" here relates to the SMC instruction as part of the ARM
 instruction set, not as a standard endorsed by ARM Ltd.

 Signed-off-by: Peng Fan 
 ---

 V2:
 Introduce interrupts as a property.

 V1:
 arm,func-ids is still kept as an optional property, because there is no
 defined SMC funciton id passed from SCMI. So in my test, I still use
 arm,func-ids for ARM SIP service.

  .../devicetree/bindings/mailbox/arm-smc.txt| 101 
 +
  1 file changed, 101 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/mailbox/arm-smc.txt

 diff --git a/Documentation/devicetree/bindings/mailbox/arm-smc.txt 
 b/Documentation/devicetree/bindings/mailbox/arm-smc.txt
 new file mode 100644
 index ..401887118c09
 --- /dev/null
 +++ b/Documentation/devicetree/bindings/mailbox/arm-smc.txt
 @@ -0,0 +1,101 @@  
>>
>> [...]
>>
 +Optional properties:
 +- arm,func-idsAn array of 32-bit values specifying the 
 function
 +  IDs used by each mailbox channel. Those function IDs
 +  follow the ARM SMC calling convention standard [1].
 +  There is one identifier per channel and the number
 +  of supported channels is determined by the length
 +  of this array.
 +- interrupts  SPI interrupts may be listed for notification,
 +  each channel should use a dedicated interrupt
 +  line.  
>>>
>>> I would not go about defining a specific kind of interrupt, since SPI is
>>> a GIC terminology, this firmware interface could be used in premise with
>>> any parent interrupt controller, for which the concept of a SPI/PPI/SGI
>>> may not be relevant.
>>>  
>>
>> While I agree the binding document may not contain specifics, I still
>> don't see how to use SGI with this. Also note it's generally reserved
>> for OS future use(IPC) and using this for other than IPC may be bit
>> challenging IMO. It opens up lots of questions.
> 
> Well, a PPI might be possible to use, although it's somewhat dodgy to hijack 
> the GIC's (re-)distributor from EL3 to write to GICD_ISPENDR. Need to ask 
> Marc about his feelings towards this. But it's definitely possible from a 
> hypervisor to inject arbitrary interrupts into a guest.
> 
> But more importantly: is there any actual reason this needs to be a GIC 
> interrupt? If I understand the code correctly, this could just be any 
> interrupt, including one of an interrupt combiner or a GPIO chip. So why not 
> just use the standard wording of: "exactly one interrupt specifier for each 
> channel"?

That was my point, I am not stuck on using an SGI, or PPI, or anything
(even if that's what we have been using at the moment), any interrupt
would/should do here so the wording should be exactly as you indicated.
-- 
Florian


Re: [PATCH RESEND] arm64: dts: imx8mm: Move gic node into soc node

2019-06-05 Thread Shawn Guo
On Thu, Jun 06, 2019 at 10:39:36AM +0800, anson.hu...@nxp.com wrote:
> From: Anson Huang 
> 
> GIC is inside of SoC from architecture perspective, it should
> be located inside of soc node in DT.
> 
> Signed-off-by: Anson Huang 

Applied, thanks.


Re: [RFC V2] mm: Generalize notify_page_fault()

2019-06-05 Thread Anshuman Khandual



On 06/05/2019 04:53 PM, Matthew Wilcox wrote:
> On Wed, Jun 05, 2019 at 09:19:22PM +1000, Michael Ellerman wrote:
>> Anshuman Khandual  writes:
>>> Similar notify_page_fault() definitions are being used by architectures
>>> duplicating much of the same code. This attempts to unify them into a
>>> single implementation, generalize it and then move it to a common place.
>>> kprobes_built_in() can detect CONFIG_KPROBES, hence notify_page_fault()
>>> need not be wrapped again within CONFIG_KPROBES. Trap number argument can
>>> now contain upto an 'unsigned int' accommodating all possible platforms.
>> ...
>>
>> You've changed several of the architectures from something like above,
>> where it disables preemption around the call into the below:
>>
>>
>> Which skips everything if we're preemptible. Is that an equivalent
>> change? If so can you please explain why in more detail.
> 
> See the discussion in v1 of this patch, which you were cc'd on.
> 
> I agree the description here completely fails to mention why the change.
> It should mention commit a980c0ef9f6d8c.

I will update the commit message to include an explanation for this new
preempt behavior in the generic definition.


RE: [PATCH] arm64: dts: imx8mm: Move gic node into soc node

2019-06-05 Thread Anson Huang
Hi, Shawn

> -Original Message-
> From: Shawn Guo 
> Sent: Thursday, June 6, 2019 10:18 AM
> To: Anson Huang 
> Cc: robh...@kernel.org; mark.rutl...@arm.com; s.ha...@pengutronix.de;
> ker...@pengutronix.de; feste...@gmail.com; Leonard Crestez
> ; Aisheng Dong ;
> viresh.ku...@linaro.org; Jacky Bai ;
> devicet...@vger.kernel.org; linux-arm-ker...@lists.infradead.org; linux-
> ker...@vger.kernel.org; dl-linux-imx 
> Subject: Re: [PATCH] arm64: dts: imx8mm: Move gic node into soc node
> 
> On Mon, Jun 03, 2019 at 09:50:20AM +0800, anson.hu...@nxp.com wrote:
> > From: Anson Huang 
> >
> > GIC is inside of SoC from architecture perspective, it should be
> > located inside of soc node in DT.
> >
> > Signed-off-by: Anson Huang 
> 
> It doesn't apply to my imx/dt64 branch.  Please generate it against that
> branch for my for-next.

OK, just resent the patch based on the correct branch.

Thanks,
Anson.

> 
> Shawn
> 
> > ---
> >  arch/arm64/boot/dts/freescale/imx8mm.dtsi | 18 +-
> >  1 file changed, 9 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> > b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> > index dc99f45..429312e 100644
> > --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> > +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> > @@ -169,15 +169,6 @@
> > clock-output-names = "clk_ext4";
> > };
> >
> > -   gic: interrupt-controller@3880 {
> > -   compatible = "arm,gic-v3";
> > -   reg = <0x0 0x3880 0 0x1>, /* GIC Dist */
> > - <0x0 0x3888 0 0xC>; /* GICR (RD_base +
> SGI_base) */
> > -   #interrupt-cells = <3>;
> > -   interrupt-controller;
> > -   interrupts = ;
> > -   };
> > -
> > psci {
> > compatible = "arm,psci-1.0";
> > method = "smc";
> > @@ -739,6 +730,15 @@
> > dma-names = "rx-tx";
> > status = "disabled";
> > };
> > +
> > +   gic: interrupt-controller@3880 {
> > +   compatible = "arm,gic-v3";
> > +   reg = <0x3880 0x1>, /* GIC Dist */
> > + <0x3888 0xc>; /* GICR (RD_base +
> SGI_base) */
> > +   #interrupt-cells = <3>;
> > +   interrupt-controller;
> > +   interrupts = ;
> > +   };
> > };
> >
> > usbphynop1: usbphynop1 {
> > --
> > 2.7.4
> >


[PATCH RESEND] arm64: dts: imx8mm: Move gic node into soc node

2019-06-05 Thread Anson . Huang
From: Anson Huang 

GIC is inside of SoC from architecture perspective, it should
be located inside of soc node in DT.

Signed-off-by: Anson Huang 
---
Resend the patch based on Shawn's imx/dt64 branch.
---
 arch/arm64/boot/dts/freescale/imx8mm.dtsi | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi 
b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index 2128644..dcae59d 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -169,15 +169,6 @@
clock-output-names = "clk_ext4";
};
 
-   gic: interrupt-controller@3880 {
-   compatible = "arm,gic-v3";
-   reg = <0x0 0x3880 0 0x1>, /* GIC Dist */
- <0x0 0x3888 0 0xC>; /* GICR (RD_base + SGI_base) 
*/
-   #interrupt-cells = <3>;
-   interrupt-controller;
-   interrupts = ;
-   };
-
psci {
compatible = "arm,psci-1.0";
method = "smc";
@@ -810,5 +801,14 @@
dma-names = "rx-tx";
status = "disabled";
};
+
+   gic: interrupt-controller@3880 {
+   compatible = "arm,gic-v3";
+   reg = <0x3880 0x1>, /* GIC Dist */
+ <0x3888 0xc>; /* GICR (RD_base + 
SGI_base) */
+   #interrupt-cells = <3>;
+   interrupt-controller;
+   interrupts = ;
+   };
};
 };
-- 
2.7.4



Re: [RFC V2] mm: Generalize notify_page_fault()

2019-06-05 Thread Anshuman Khandual
On 06/05/2019 04:49 PM, Michael Ellerman wrote:
> Anshuman Khandual  writes:
>> Similar notify_page_fault() definitions are being used by architectures
>> duplicating much of the same code. This attempts to unify them into a
>> single implementation, generalize it and then move it to a common place.
>> kprobes_built_in() can detect CONFIG_KPROBES, hence notify_page_fault()
>> need not be wrapped again within CONFIG_KPROBES. Trap number argument can
>> now contain upto an 'unsigned int' accommodating all possible platforms.
> ...
>> diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
>> index 58f69fa..1bc3b18 100644
>> --- a/arch/arm/mm/fault.c
>> +++ b/arch/arm/mm/fault.c
>> @@ -30,28 +30,6 @@
>>  
>>  #ifdef CONFIG_MMU
>>  
>> -#ifdef CONFIG_KPROBES
>> -static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
>> -{
>> -int ret = 0;
>> -
>> -if (!user_mode(regs)) {
>> -/* kprobe_running() needs smp_processor_id() */
>> -preempt_disable();
>> -if (kprobe_running() && kprobe_fault_handler(regs, fsr))
>> -ret = 1;
>> -preempt_enable();
>> -}
>> -
>> -return ret;
>> -}
>> -#else
> 
> You've changed several of the architectures from something like above,
> where it disables preemption around the call into the below:
> 
>> +int __kprobes notify_page_fault(struct pt_regs *regs, unsigned int trap)
>> +{
>> +int ret = 0;
>> +
>> +/*
>> + * To be potentially processing a kprobe fault and to be allowed
>> + * to call kprobe_running(), we have to be non-preemptible.
>> + */
>> +if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
>> +if (kprobe_running() && kprobe_fault_handler(regs, trap))
>> +ret = 1;
>> +}
>> +return ret;
>> +}
> 
> Which skips everything if we're preemptible. Is that an equivalent

Right.

> change? If so can you please explain why in more detail.

It is probably not an equivalent change. The following explanation is extracted 
from
RFC V1 discussions (https://patchwork.kernel.org/patch/10968273/). Will explain 
the
rational for this behavior change in the commit message next time around.


a980c0ef9f6d ("x86/kprobes: Refactor kprobes_fault() like 
kprobe_exceptions_notify()")
b506a9d08bae ("x86: code clarification patch to Kprobes arch code")

In particular the later one (b506a9d08bae). It explains how the invoking context
in itself should be non-preemptible for the kprobes processing context 
irrespective
of whether kprobe_running() or perhaps smp_processor_id() is safe or not. Hence 
it
does not make much sense to continue when original invoking context is 
preemptible.
Instead just bail out earlier. This seems to be making more sense than preempt
disable-enable pair. If there are no concerns about this change from other 
platforms,
I will change the preemption behavior in proposed generic function next time 
around.


Do you see any concern changing preempt behavior in the x86 way ?

> 
> Also why not have it return bool?

Just that all architectures (except powerpc) had 'int' as return type. But we 
can
change that to 'bool'.


Re: [PATCH v3] USB: move usb debugfs directory creation to the usb common core

2019-06-05 Thread Chunfeng Yun
On Wed, 2019-06-05 at 14:44 +0200, Greg Kroah-Hartman wrote:
> The USB gadget subsystem wants to use the USB debugfs root directory, so
> move it to the common "core" USB code so that it is properly initialized
> and removed as needed.
> 
> In order to properly do this, we need to load the common code before the
> usb core code, when everything is linked into the kernel, so reorder the
> link order of the code.
> 
> Also as the usb common code has the possibility of the led trigger logic
> to be merged into it, handle the build option properly by only having
> one module init/exit function and have the common code initialize the
> led trigger if needed.
> 
> Reported-by: Chunfeng Yun 
> Cc: Felipe Balbi 
> Signed-off-by: Greg Kroah-Hartman 
> ---
> Chunfeng, can you try testing this again?

Tested-by: Chunfeng Yun 

Thank you, Greg, Felipe

> 
> v3: Fix __init and __exit error when building into the tree as reported
> by Chunfeng
> Fix Reported-by: line as reported
> v2: handle led common code link error reported by kbuild
> handle subsys_initcall issue pointed out by Chunfeng
> 
>  drivers/usb/Makefile|  3 +--
>  drivers/usb/common/common.c | 21 +
>  drivers/usb/common/common.h | 14 ++
>  drivers/usb/common/led.c|  9 +++--
>  drivers/usb/core/usb.c  | 10 --
>  5 files changed, 43 insertions(+), 14 deletions(-)
>  create mode 100644 drivers/usb/common/common.h
> 
> diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
> index 7d1b8c82b208..ecc2de1ffaae 100644
> --- a/drivers/usb/Makefile
> +++ b/drivers/usb/Makefile
> @@ -5,6 +5,7 @@
>  
>  # Object files in subdirectories
>  
> +obj-$(CONFIG_USB_COMMON) += common/
>  obj-$(CONFIG_USB)+= core/
>  obj-$(CONFIG_USB_SUPPORT)+= phy/
>  
> @@ -60,8 +61,6 @@ obj-$(CONFIG_USB_CHIPIDEA)  += chipidea/
>  obj-$(CONFIG_USB_RENESAS_USBHS)  += renesas_usbhs/
>  obj-$(CONFIG_USB_GADGET) += gadget/
>  
> -obj-$(CONFIG_USB_COMMON) += common/
> -
>  obj-$(CONFIG_USBIP_CORE) += usbip/
>  
>  obj-$(CONFIG_TYPEC)  += typec/
> diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c
> index 18f5dcf58b0d..1433260d99b4 100644
> --- a/drivers/usb/common/common.c
> +++ b/drivers/usb/common/common.c
> @@ -15,6 +15,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include "common.h"
>  
>  static const char *const ep_type_names[] = {
>   [USB_ENDPOINT_XFER_CONTROL] = "ctrl",
> @@ -291,4 +293,23 @@ struct device *usb_of_get_companion_dev(struct device 
> *dev)
>  EXPORT_SYMBOL_GPL(usb_of_get_companion_dev);
>  #endif
>  
> +struct dentry *usb_debug_root;
> +EXPORT_SYMBOL_GPL(usb_debug_root);
> +
> +static int __init usb_common_init(void)
> +{
> + usb_debug_root = debugfs_create_dir("usb", NULL);
> + ledtrig_usb_init();
> + return 0;
> +}
> +
> +static void __exit usb_common_exit(void)
> +{
> + ledtrig_usb_exit();
> + debugfs_remove_recursive(usb_debug_root);
> +}
> +
> +subsys_initcall(usb_common_init);
> +module_exit(usb_common_exit);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/drivers/usb/common/common.h b/drivers/usb/common/common.h
> new file mode 100644
> index ..424a91316a4b
> --- /dev/null
> +++ b/drivers/usb/common/common.h
> @@ -0,0 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __LINUX_USB_COMMON_H
> +#define __LINUX_USB_COMMON_H
> +
> +#if defined(CONFIG_USB_LED_TRIG)
> +void ledtrig_usb_init(void);
> +void ledtrig_usb_exit(void);
> +#else
> +static inline void ledtrig_usb_init(void) { }
> +static inline void ledtrig_usb_exit(void) { }
> +#endif
> +
> +#endif   /* __LINUX_USB_COMMON_H */
> diff --git a/drivers/usb/common/led.c b/drivers/usb/common/led.c
> index 7bd81166b77d..0865dd44a80a 100644
> --- a/drivers/usb/common/led.c
> +++ b/drivers/usb/common/led.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include "common.h"
>  
>  #define BLINK_DELAY 30
>  
> @@ -36,18 +37,14 @@ void usb_led_activity(enum usb_led_event ev)
>  EXPORT_SYMBOL_GPL(usb_led_activity);
>  
> 
> -static int __init ledtrig_usb_init(void)
> +void __init ledtrig_usb_init(void)
>  {
>   led_trigger_register_simple("usb-gadget", _usb_gadget);
>   led_trigger_register_simple("usb-host", _usb_host);
> - return 0;
>  }
>  
> -static void __exit ledtrig_usb_exit(void)
> +void __exit ledtrig_usb_exit(void)
>  {
>   led_trigger_unregister_simple(ledtrig_usb_gadget);
>   led_trigger_unregister_simple(ledtrig_usb_host);
>  }
> -
> -module_init(ledtrig_usb_init);
> -module_exit(ledtrig_usb_exit);
> diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
> index 7fcb9f782931..5a0df527a8ca 100644
> --- a/drivers/usb/core/usb.c
> +++ b/drivers/usb/core/usb.c
> @@ -1185,19 +1185,17 @@ static struct notifier_block usb_bus_nb = {
>   .notifier_call = usb_bus_notify,
>  };
>  
> -struct dentry *usb_debug_root;
> -EXPORT_SYMBOL_GPL(usb_debug_root);
> +static struct dentry 

Re: [PATCH] Input: alps: Drop unlikely before IS_ERR(_OR_NULL)

2019-06-05 Thread Joe Perches
On Thu, 2019-06-06 at 09:08 +0800, Kefeng Wang wrote:
> On 2019/6/5 22:42, Pali Rohár wrote:
> > On Wednesday 05 June 2019 22:24:28 Kefeng Wang wrote:
> > > IS_ERR(_OR_NULL) already contain an 'unlikely' compiler flag,
> > > so no need to do that again from its callers. Drop it.
> > Hi! I already reviewed this patch and rejected it, see:
> > https://patchwork.kernel.org/patch/10817475/
> OK, please ignore it.

I think the stated reason of better readability isn't
particularly sensible as the object code produced is
actually slightly larger.

x86-64 defconfig (gcc 8.3.0)

$ size drivers/input/mouse/alps.o*
   textdata bss dec hex filename
  29416  56   0   294727320 drivers/input/mouse/alps.o.new
  29432  56   0   294887330 drivers/input/mouse/alps.o.old

Also if this unlikely is _really_ useful, perhaps the
!IS_ERR immediately after could also use likely as the
test seems only done for an OOM condition.

> > > Cc: "Pali Rohár" 
> > > Cc: Dmitry Torokhov 
> > > Cc: linux-in...@vger.kernel.org
> > > Signed-off-by: Kefeng Wang 
> > > ---
> > >  drivers/input/mouse/alps.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
> > > index 0a6f7ca883e7..791ef0f826c5 100644
> > > --- a/drivers/input/mouse/alps.c
> > > +++ b/drivers/input/mouse/alps.c
> > > @@ -1478,7 +1478,7 @@ static void alps_report_bare_ps2_packet(struct 
> > > psmouse *psmouse,
> > >   /* On V2 devices the DualPoint Stick reports bare packets */
> > >   dev = priv->dev2;
> > >   dev2 = psmouse->dev;
> > > - } else if (unlikely(IS_ERR_OR_NULL(priv->dev3))) {
> > > + } else if (IS_ERR_OR_NULL(priv->dev3)) {
> > >   /* Register dev3 mouse if we received PS/2 packet first time */
> > >   if (!IS_ERR(priv->dev3))
> > >   psmouse_queue_work(psmouse, >dev3_register_work,



[PATCH] alignment:fetch pc-instr before irq_enable

2019-06-05 Thread xiaoqian
When the instruction code under PC address is read through
_probe_kernel_read in do_alignment,if the pte page corresponding
to the code segment of PC address is reclaimed exactly at this time,
the address mapping cannot be reconstructed because page fault_disable()
is executed in _probe_kernel_read function,and the failure to obtain
the instruction code of PC finally results in the unsuccessful repair
operation.
Thus we can modify the implementation of reading user-mode PC instruction
before local_irq_enable to avoid the above risk.
At the same time, adjust the sequence of code processing and optimize the
process.

Signed-off-by: xiaoqian 
Cc: sta...@vger.kernel.org
---
 arch/arm/mm/alignment.c | 81 +
 1 file changed, 55 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index e376883ab35b..4124b9ce3c70 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -76,6 +76,11 @@
 #define IS_T32(hi16) \
(((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
 
+#define INVALID_INSTR_MODE 0
+#define ARM_INSTR_MODE 1
+#define THUMB_INSTR_MODE   2
+#define THUMB2_INSTR_MODE  3
+
 static unsigned long ai_user;
 static unsigned long ai_sys;
 static void *ai_sys_last_pc;
@@ -705,6 +710,48 @@ thumb2arm(u16 tinstr)
}
 }
 
+static unsigned int
+fetch_usr_pc_instr(struct pt_regs *regs, unsigned long *pc_instrptr)
+{
+   unsigned int fault;
+   unsigned long instrptr;
+   unsigned long instr_mode = INVALID_INSTR_MODE;
+
+   instrptr = instruction_pointer(regs);
+
+   if (thumb_mode(regs)) {
+   u16 tinstr = 0;
+   u16 *ptr = (u16 *)(instrptr & ~1);
+
+   fault = probe_kernel_address(ptr, tinstr);
+   if (!fault) {
+   tinstr = __mem_to_opcode_thumb16(tinstr);
+   if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
+   IS_T32(tinstr)) {
+   /* Thumb-2 32-bit */
+   u16 tinstr2 = 0;
+
+   fault = probe_kernel_address(ptr + 1, tinstr2);
+   if (!fault) {
+   tinstr2 = 
__mem_to_opcode_thumb16(tinstr2);
+   *pc_instrptr = 
__opcode_thumb32_compose(tinstr, tinstr2);
+   instr_mode = THUMB2_INSTR_MODE;
+   }
+   } else {
+   *pc_instrptr = thumb2arm(tinstr);
+   instr_mode = THUMB_INSTR_MODE;
+   }
+   }
+   } else {
+   fault = probe_kernel_address((void *)instrptr, *pc_instrptr);
+   if (!fault) {
+   *pc_instrptr = __mem_to_opcode_arm(*pc_instrptr);
+   instr_mode = ARM_INSTR_MODE;
+   }
+   }
+   return instr_mode;
+}
+
 /*
  * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
  * handlable by ARM alignment handler, also find the corresponding handler,
@@ -775,42 +822,24 @@ do_alignment(unsigned long addr, unsigned int fsr, struct 
pt_regs *regs)
unsigned long instr = 0, instrptr;
int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs 
*regs);
unsigned int type;
-   unsigned int fault;
u16 tinstr = 0;
int isize = 4;
int thumb2_32b = 0;
+   unsigned long pc_instr_mode;
+
+   pc_instr_mode = fetch_usr_pc_instr(regs, );
 
if (interrupts_enabled(regs))
local_irq_enable();
 
instrptr = instruction_pointer(regs);
-
-   if (thumb_mode(regs)) {
-   u16 *ptr = (u16 *)(instrptr & ~1);
-   fault = probe_kernel_address(ptr, tinstr);
-   tinstr = __mem_to_opcode_thumb16(tinstr);
-   if (!fault) {
-   if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
-   IS_T32(tinstr)) {
-   /* Thumb-2 32-bit */
-   u16 tinst2 = 0;
-   fault = probe_kernel_address(ptr + 1, tinst2);
-   tinst2 = __mem_to_opcode_thumb16(tinst2);
-   instr = __opcode_thumb32_compose(tinstr, 
tinst2);
-   thumb2_32b = 1;
-   } else {
-   isize = 2;
-   instr = thumb2arm(tinstr);
-   }
-   }
-   } else {
-   fault = probe_kernel_address((void *)instrptr, instr);
-   instr = __mem_to_opcode_arm(instr);
-   }
-
-   if (fault) {
+   if (pc_instr_mode == INVALID_INSTR_MODE) {
type = TYPE_FAULT;
goto bad_or_fault;
+   } else if 

Re: [PATCH 1/2] mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist

2019-06-05 Thread Nicholas Piggin
Andrew Morton's on June 6, 2019 7:22 am:
> On Thu,  6 Jun 2019 00:48:13 +1000 Nicholas Piggin  wrote:
> 
>> The kernel currently clamps large system hashes to MAX_ORDER when
>> hashdist is not set, which is rather arbitrary.
>> 
>> vmalloc space is limited on 32-bit machines, but this shouldn't
>> result in much more used because of small physical memory limiting
>> system hash sizes.
>> 
>> Include "vmalloc" or "linear" in the kernel log message.
>> 
>> Signed-off-by: Nicholas Piggin 
>> ---
>> 
>> This is a better solution than the previous one for the case of !NUMA
>> systems running on CONFIG_NUMA kernels, we can clear the default
>> hashdist early and have everything allocated out of the linear map.
>> 
>> The hugepage vmap series I will post later, but it's quite
>> independent from this improvement.
>> 
>> ...
>>
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -7966,6 +7966,7 @@ void *__init alloc_large_system_hash(const char 
>> *tablename,
>>  unsigned long log2qty, size;
>>  void *table = NULL;
>>  gfp_t gfp_flags;
>> +bool virt;
>>  
>>  /* allow the kernel cmdline to have a say */
>>  if (!numentries) {
>> @@ -8022,6 +8023,7 @@ void *__init alloc_large_system_hash(const char 
>> *tablename,
>>  
>>  gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
>>  do {
>> +virt = false;
>>  size = bucketsize << log2qty;
>>  if (flags & HASH_EARLY) {
>>  if (flags & HASH_ZERO)
>> @@ -8029,26 +8031,26 @@ void *__init alloc_large_system_hash(const char 
>> *tablename,
>>  else
>>  table = memblock_alloc_raw(size,
>> SMP_CACHE_BYTES);
>> -} else if (hashdist) {
>> +} else if (get_order(size) >= MAX_ORDER || hashdist) {
>>  table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
>> +virt = true;
>>  } else {
>>  /*
>>   * If bucketsize is not a power-of-two, we may free
>>   * some pages at the end of hash table which
>>   * alloc_pages_exact() automatically does
>>   */
>> -if (get_order(size) < MAX_ORDER) {
>> -table = alloc_pages_exact(size, gfp_flags);
>> -kmemleak_alloc(table, size, 1, gfp_flags);
>> -}
>> +table = alloc_pages_exact(size, gfp_flags);
>> +kmemleak_alloc(table, size, 1, gfp_flags);
>>  }
>>  } while (!table && size > PAGE_SIZE && --log2qty);
>>  
>>  if (!table)
>>  panic("Failed to allocate %s hash table\n", tablename);
>>  
>> -pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
>> -tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
>> +pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n",
>> +tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size,
>> +virt ? "vmalloc" : "linear");
> 
> Could remove `bool virt' and use is_vmalloc_addr() in the printk?
> 

It can run before mem_init() and it looks like some archs set
VMALLOC_START/END (high_memory) there (e.g., x86-32, ppc32).

Thanks,
Nick



Re: [PATCHv3 1/2] mm/gup: fix omission of check on FOLL_LONGTERM in get_user_pages_fast()

2019-06-05 Thread Pingfan Liu
On Thu, Jun 6, 2019 at 5:49 AM Andrew Morton  wrote:
>
> On Wed,  5 Jun 2019 17:10:19 +0800 Pingfan Liu  wrote:
>
> > As for FOLL_LONGTERM, it is checked in the slow path
> > __gup_longterm_unlocked(). But it is not checked in the fast path, which
> > means a possible leak of CMA page to longterm pinned requirement through
> > this crack.
> >
> > Place a check in the fast path.
>
> I'm not actually seeing a description (in either the existing code or
> this changelog or patch) an explanation of *why* we wish to exclude CMA
> pages from longterm pinning.
>
What about a short description like this:
FOLL_LONGTERM suggests a pin which is going to be given to hardware
and can't move. It would truncate CMA permanently and should be
excluded.

> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -2196,6 +2196,26 @@ static int __gup_longterm_unlocked(unsigned long 
> > start, int nr_pages,
> >   return ret;
> >  }
> >
> > +#ifdef CONFIG_CMA
> > +static inline int reject_cma_pages(int nr_pinned, struct page **pages)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < nr_pinned; i++)
> > + if (is_migrate_cma_page(pages[i])) {
> > + put_user_pages(pages + i, nr_pinned - i);
> > + return i;
> > + }
> > +
> > + return nr_pinned;
> > +}
>
> There's no point in inlining this.
OK, will drop it in V4.

>
> The code seems inefficient.  If it encounters a single CMA page it can
> end up discarding a possibly significant number of non-CMA pages.  I
The trick is the page is not be discarded, in fact, they are still be
referrenced by pte. We just leave the slow path to pick up the non-CMA
pages again.

> guess that doesn't matter much, as get_user_pages(FOLL_LONGTERM) is
> rare.  But could we avoid this (and the second pass across pages[]) by
> checking for a CMA page within gup_pte_range()?
It will spread the same logic to hugetlb pte and normal pte. And no
improvement in performance due to slow path. So I think maybe it is
not worth.

>
> > +#else
> > +static inline int reject_cma_pages(int nr_pinned, struct page **pages)
> > +{
> > + return nr_pinned;
> > +}
> > +#endif
> > +
> >  /**
> >   * get_user_pages_fast() - pin user pages in memory
> >   * @start:   starting user address
> > @@ -2236,6 +2256,9 @@ int get_user_pages_fast(unsigned long start, int 
> > nr_pages,
> >   ret = nr;
> >   }
> >
> > + if (unlikely(gup_flags & FOLL_LONGTERM) && nr)
> > + nr = reject_cma_pages(nr, pages);
> > +
>
> This would be a suitable place to add a comment explaining why we're
> doing this...
Would add one comment "FOLL_LONGTERM suggests a pin given to hardware
and rarely returned."

Thanks for your kind review.

Regards,
  Pingfan


Re: [PATCH] arm64: dts: imx8mm: Move gic node into soc node

2019-06-05 Thread Shawn Guo
On Mon, Jun 03, 2019 at 09:50:20AM +0800, anson.hu...@nxp.com wrote:
> From: Anson Huang 
> 
> GIC is inside of SoC from architecture perspective, it should
> be located inside of soc node in DT.
> 
> Signed-off-by: Anson Huang 

It doesn't apply to my imx/dt64 branch.  Please generate it against that
branch for my for-next.

Shawn

> ---
>  arch/arm64/boot/dts/freescale/imx8mm.dtsi | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi 
> b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> index dc99f45..429312e 100644
> --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
> @@ -169,15 +169,6 @@
>   clock-output-names = "clk_ext4";
>   };
>  
> - gic: interrupt-controller@3880 {
> - compatible = "arm,gic-v3";
> - reg = <0x0 0x3880 0 0x1>, /* GIC Dist */
> -   <0x0 0x3888 0 0xC>; /* GICR (RD_base + SGI_base) 
> */
> - #interrupt-cells = <3>;
> - interrupt-controller;
> - interrupts = ;
> - };
> -
>   psci {
>   compatible = "arm,psci-1.0";
>   method = "smc";
> @@ -739,6 +730,15 @@
>   dma-names = "rx-tx";
>   status = "disabled";
>   };
> +
> + gic: interrupt-controller@3880 {
> + compatible = "arm,gic-v3";
> + reg = <0x3880 0x1>, /* GIC Dist */
> +   <0x3888 0xc>; /* GICR (RD_base + 
> SGI_base) */
> + #interrupt-cells = <3>;
> + interrupt-controller;
> + interrupts = ;
> + };
>   };
>  
>   usbphynop1: usbphynop1 {
> -- 
> 2.7.4
> 


[RFC PATCH v2 2/5] x86/sgx: Require userspace to define enclave pages' protection bits

2019-06-05 Thread Sean Christopherson
Existing Linux Security Module policies restrict userspace's ability to
map memory, e.g. may require priveleged permissions to map a page that
is simultaneously writable and executable.  Said permissions are often
tied to the file which backs the mapped memory, i.e. vm_file.

For reasons explained below, SGX does not allow LSMs to enforce policies
using existing LSM hooks such as file_mprotect().  Explicitly track the
protection bits for an enclave page (separate from the vma/pte bits) and
require userspace to explicit define each page's protection bit when the
page is added to the enclave.  Enclave page protection bits pave the way
adding security_enclave_load() as an SGX equivalent to file_mprotect(),
e.g. SGX can pass the page's protection bits and source vma to LSMs.
The source vma will allow LSMs to tie permissions to files, e.g. the
file containing the enclave's code and initial data, and the protection
bits will allow LSMs to make decisions based on the capabilities of the
enclave, e.g. if a page can be converted from RW to RX.

Due to the nature of the Enclave Page Cache, and because the EPC is
manually managed by SGX, all enclave vmas are backed by the same file,
i.e. /dev/sgx/enclave.  Specifically, a single file allows SGX to use
file op hooks to move pages in/out of the EPC.

Furthermore, EPC pages for any given enclave are fundamentally shared
between processes, i.e. CoW semantics are not possible with EPC pages
due to hardware restrictions such as 1:1 mappings between virtual and
physical addresses (within the enclave).

Lastly, all real world enclaves will need read, write and execute
permissions to EPC pages.

As a result, SGX does not play nice with existing LSM behavior as it is
impossible to apply policies to enclaves with reasonable granularity,
e.g. an LSM can deny access to EPC altogether, but can't deny
potentially unwanted behavior such as mapping pages RW->RW or RWX.

For example, because all (practical) enclaves need RW pages for data and
RX pages for code, SELinux's existing policies will require all enclaves
to have FILE__READ, FILE__WRITE and FILE__EXECUTE permissions on
/dev/sgx/enclave.  Witholding FILE__WRITE or FILE__EXECUTE in an attempt
to deny RW->RX or RWX would prevent running *any* enclave, even those
that cleanly separate RW and RX pages.  And because /dev/sgx/enclave
requires MAP_SHARED, the anonymous/CoW checks that would trigger
FILE__EXECMOD or PROCESS__EXECMEM permissions will never fire.

Taking protection bits has a second use in that it can be used to
prevent loading an enclave from a noexec file system.  On SGX2 hardware,
regardless of kernel support for SGX2, userspace could EADD a page from
a noexec path using read-only permissions and later mprotect() and
ENCLU[EMODPE] the page to gain execute permissions.  By requiring
the enclave's page protections up front, SGX will be able to enforce
noexec paths when building enclaves.

To prevent userspace from circumventing the allowed protections, do not
allow PROT_{READ,WRITE,EXEC} mappings to an enclave without an
associated enclave page, i.e. prevent creating a mapping with unchecked
protection bits.

Alternatively, SGX could pre-check what transitions are/aren't allowed
using some form of proxy for the enclave, e.g. its sigstruct, and
dynamically track protections in the SGX driver.  Dynamically tracking
protections and pre-checking permissions has several drawbacks:

  - Complicates the SGX implementation due to the need to coordinate
tracking across multiple mm structs and vmas.

  - LSM auditing would log denials that never manifest in failure.

  - Requires additional SGX specific flags/definitions be passed to/from
LSMs.

A second alternative would be to again use sigstruct as a proxy for the
enclave when performing access control checks, but hold a reference to
the sigstruct file and perform LSM checks during mmap()/mmprotect() as
opposed to pre-checking permissions at enclave build time.  The big
downside to this approach is that it effecitvely requires userspace to
place sigstruct in a file, and the SGX driver must "pin" said file by
holding a reference to the file for the lifetime of the enclave.

A third alternative would be to pull the protection bits from the page's
SECINFO, i.e. make decisions based on the protections enforced by
hardware.  However, with SGX2, userspace can extend the hardware-
enforced protections via ENCLU[EMODPE], e.g. can add a page as RW and
later convert it to RX.  With SGX2, making a decision based on the
initial protections would either create a security hole or force SGX to
dynamically track "dirty" pages (see first alternative above).

Signed-off-by: Sean Christopherson 
---
 arch/x86/include/uapi/asm/sgx.h|  2 +
 arch/x86/kernel/cpu/sgx/driver/ioctl.c | 14 +--
 arch/x86/kernel/cpu/sgx/driver/main.c  |  5 +++
 arch/x86/kernel/cpu/sgx/encl.c | 53 ++
 arch/x86/kernel/cpu/sgx/encl.h |  4 ++
 5 files changed, 74 

[RFC PATCH v2 0/5] security: x86/sgx: SGX vs. LSM

2019-06-05 Thread Sean Christopherson
This series is the result of a rather absurd amount of discussion over
how to get SGX to play nice with LSM policies, without having to resort
to evil shenanigans or put undue burden on userspace.  Discussions are
still ongoing, e.g. folks are exploring alternatives to changing the
proposed SGX UAPI, but I wanted to get this updated version of the code
posted to show a fairly minimal implemenation(from a kernel perspective),
e.g. the diff stats aren't too scary, especially considering 50% of the
added lines are comments.

This series is a delta to Jarkko's ongoing SGX series and applies on
Jarkko's current master at https://github.com/jsakkine-intel/linux-sgx.git:

  dfc89a83b5bc ("docs: x86/sgx: Document the enclave API")

The basic gist of the approach is to track an enclave's page protections
separately from any vmas that map the page, and separate from the hardware
enforced protections.  The SGX UAPI is modified to require userspace to
explicitly define the protections for each enclave page, i.e. the ioctl
to add pages to an enclave is extended to take PROT_{READ,WRITE,EXEC}
flags.

An enclave page's protections are the maximal protections that userspace
can use to map the page, e.g. mprotect() and mmap() are rejected if the
protections for the vma would be more permissible than those of the
associated enclave page.

Tracking protections for an enclave page (in additional to vmas) allows
SGX to invoke LSM upcalls while the enclave is being built.  This is
critical to enabling LSMs to implement policies for enclave pages that
are functionally equivalent to existing policies for normal pages.

v1: 
https://lkml.kernel.org/r/20190531233159.30992-1-sean.j.christopher...@intel.com

v2:
  - Dropped the patch(es) to extend the SGX UAPI to allow adding multiple
enclave pages in a single syscall [Jarkko].

  - Reject ioctl() immediately on LSM denial [Stephen].

  - Rework SELinux code to avoid checking EXEMEM multiple times [Stephen].

  - Adding missing equivalents to existing selinux_file_protect() checks
[Stephen].

  - Hold mmap_sem across copy_to_user() to prevent a TOCTOU race when
checking the source vma [Stephen].

  - Stubify security_enclave_load() if !CONFIG_SECURITY [Stephen].

  - Make flags a 32-bit field [Andy].

  - Don't validate the SECINFO protection flags against the enclave
page's protection flags [Andy].

  - Rename mprotect() hook to may_mprotect() [Andy].

  - Test 'vma->vm_flags & VM_MAYEXEC' instead of manually checking for
a noexec path [Jarkko].

  - Drop the SGX defined flags (use PROT_*) [Jarkko].

  - Improve comments and changelogs [Jarkko].

Sean Christopherson (5):
  mm: Introduce vm_ops->may_mprotect()
  x86/sgx: Require userspace to define enclave pages' protection bits
  x86/sgx: Enforce noexec filesystem restriction for enclaves
  LSM: x86/sgx: Introduce ->enclave_load() hook for Intel SGX
  security/selinux: Add enclave_load() implementation

 arch/x86/include/uapi/asm/sgx.h|  2 +
 arch/x86/kernel/cpu/sgx/driver/ioctl.c | 57 ++---
 arch/x86/kernel/cpu/sgx/driver/main.c  |  5 ++
 arch/x86/kernel/cpu/sgx/encl.c | 53 
 arch/x86/kernel/cpu/sgx/encl.h |  4 ++
 include/linux/lsm_hooks.h  | 13 +
 include/linux/mm.h |  2 +
 include/linux/security.h   | 12 +
 mm/mprotect.c  | 15 --
 security/security.c|  7 +++
 security/selinux/hooks.c   | 69 ++
 11 files changed, 228 insertions(+), 11 deletions(-)

-- 
2.21.0



[RFC PATCH v2 1/5] mm: Introduce vm_ops->may_mprotect()

2019-06-05 Thread Sean Christopherson
SGX will use the may_mprotect() hook to prevent userspace from
circumventing various security checks, e.g. Linux Security Modules.
Naming it may_mprotect() instead of simply mprotect() is intended to
reflect the hook's purpose as a way to gate mprotect() as opposed to
a wholesale replacement.

Enclaves are built by copying data from normal memory into the Enclave
Page Cache (EPC).  Due to the nature of SGX, the EPC is represented by a
single file that must be MAP_SHARED, i.e. mprotect() only ever sees a
MAP_SHARED vm_file that references single file path.  Furthermore, all
enclaves will need read, write and execute pages in the EPC.

As a result, LSM policies cannot be meaningfully applied, e.g. an LSM
can deny access to the EPC as a whole, but can't deny PROT_EXEC on page
that originated in a non-EXECUTE file (which is long gone by the time
mprotect() is called).

By hooking mprotect(), SGX can make explicit LSM upcalls while an
enclave is being built, i.e. when the kernel has a handle to origin of
each enclave page, and enforce the result of the LSM policy whenever
userspace maps the enclave page in the future.

Alternatively, SGX could play games with MAY_{READ,WRITE,EXEC}, but
that approach is quite ugly, e.g. would require userspace to call an
SGX ioctl() prior to using mprotect() to extend a page's protections.

Signed-off-by: Sean Christopherson 
---
 include/linux/mm.h |  2 ++
 mm/mprotect.c  | 15 +++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..a697996040ac 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -458,6 +458,8 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
int (*split)(struct vm_area_struct * area, unsigned long addr);
int (*mremap)(struct vm_area_struct * area);
+   int (*may_mprotect)(struct vm_area_struct * area, unsigned long start,
+   unsigned long end, unsigned long prot);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bf38dfbbb4b4..18732543b295 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -547,13 +547,20 @@ static int do_mprotect_pkey(unsigned long start, size_t 
len,
goto out;
}
 
-   error = security_file_mprotect(vma, reqprot, prot);
-   if (error)
-   goto out;
-
tmp = vma->vm_end;
if (tmp > end)
tmp = end;
+
+   if (vma->vm_ops && vma->vm_ops->may_mprotect) {
+   error = vma->vm_ops->may_mprotect(vma, nstart, tmp, 
prot);
+   if (error)
+   goto out;
+   }
+
+   error = security_file_mprotect(vma, reqprot, prot);
+   if (error)
+   goto out;
+
error = mprotect_fixup(vma, , nstart, tmp, newflags);
if (error)
goto out;
-- 
2.21.0



[RFC PATCH v2 5/5] security/selinux: Add enclave_load() implementation

2019-06-05 Thread Sean Christopherson
The goal of selinux_enclave_load() is to provide a facsimile of the
existing selinux_file_mprotect() and file_map_prot_check() policies,
but tailored to the unique properties of SGX.

For example, an enclave page is technically backed by a MAP_SHARED file,
but the "file" is essentially shared memory that is never persisted
anywhere and also requires execute permissions (for some pages).

The basic concept is to require appropriate execute permissions on the
source of the enclave for pages that are requesting PROT_EXEC, e.g. if
an enclave page is being loaded from a regular file, require
FILE__EXECUTE and/or FILE__EXECMOND, and if it's coming from an
anonymous/private mapping, require PROCESS__EXECMEM since the process
is essentially executing from the mapping, albeit in a roundabout way.

Note, FILE__READ and FILE__WRITE are intentionally not required even if
the source page is backed by a regular file.  Writes to the enclave page
are contained to the EPC, i.e. never hit the original file, and read
permissions have already been vetted (or the VMA doesn't have PROT_READ,
in which case loading the page into the enclave will fail).

Signed-off-by: Sean Christopherson 
---
 security/selinux/hooks.c | 69 
 1 file changed, 69 insertions(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3ec702cf46ca..3c5418edf51c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6726,6 +6726,71 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux 
*aux)
 }
 #endif
 
+#ifdef CONFIG_INTEL_SGX
+int selinux_enclave_load(struct vm_area_struct *vma, unsigned long prot)
+{
+   const struct cred *cred = current_cred();
+   u32 sid = cred_sid(cred);
+   int ret;
+
+   /* SGX is supported only in 64-bit kernels. */
+   WARN_ON_ONCE(!default_noexec);
+
+   /* Only executable enclave pages are restricted in any way. */
+   if (!(prot & PROT_EXEC))
+   return 0;
+
+   /*
+* The source page is exectuable, i.e. has already passed SELinux's
+* checks, and userspace is not requesting RW->RX capabilities.
+*/
+   if ((vma->vm_flags & VM_EXEC) && !(prot & PROT_WRITE))
+   return 0;
+
+   /*
+* The source page is not executable, or userspace is requesting the
+* ability to do a RW->RX conversion.  Permissions are required as
+* follows, in order of increasing privelege:
+*
+* EXECUTE - Load an executable enclave page without RW->RX intent from
+*   a non-executable vma that is backed by a shared mapping to
+*   a regular file that has not undergone COW.
+*
+* EXECMOD - Load an executable enclave page without RW->RX intent from
+*   a non-executable vma that is backed by a shared mapping to
+*   a regular file that *has* undergone COW.
+*
+* - Load an enclave page *with* RW->RX intent from a shared
+*   mapping to a regular file.
+*
+* EXECMEM - Load an exectuable enclave page from an anonymous mapping.
+*
+* - Load an exectuable enclave page from a private file, e.g.
+*   from a shared mapping to a hugetlbfs file.
+*
+* - Load an enclave page *with* RW->RX intent from a private
+*   mapping to a regular file.
+*
+* Note, this hybrid EXECMOD and EXECMEM behavior is intentional and
+* reflects the nature of enclaves and the EPC, e.g. EPC is effectively
+* a non-persistent shared file, but each enclave is a private domain
+* within that shared file, so delegate to the source of the enclave.
+*/
+   if (vma->vm_file && !IS_PRIVATE(file_inode(vma->vm_file) &&
+   ((vma->vm_flags & VM_SHARED) || !(prot & PROT_WRITE {
+   if (!vma->anon_vma && !(prot & PROT_WRITE))
+   ret = file_has_perm(cred, vma->vm_file, FILE__EXECUTE);
+   else
+   ret = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
+   } else {
+   ret = avc_has_perm(_state,
+  sid, sid, SECCLASS_PROCESS,
+  PROCESS__EXECMEM, NULL);
+   }
+   return ret;
+}
+#endif
+
 struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
.lbs_cred = sizeof(struct task_security_struct),
.lbs_file = sizeof(struct file_security_struct),
@@ -6968,6 +7033,10 @@ static struct security_hook_list selinux_hooks[] 
__lsm_ro_after_init = {
LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
 #endif
+
+#ifdef CONFIG_INTEL_SGX
+   LSM_HOOK_INIT(enclave_load, selinux_enclave_load),
+#endif
 };
 
 static __init int selinux_init(void)
-- 
2.21.0



[RFC PATCH v2 3/5] x86/sgx: Enforce noexec filesystem restriction for enclaves

2019-06-05 Thread Sean Christopherson
Do not allow an enclave page to be mapped with PROT_EXEC if the source
vma does not have VM_MAYEXEC.  This effectively enforces noexec as
do_mmap() clears VM_MAYEXEC if the vma is being loaded from a noexec
path, i.e. prevents executing a file by loading it into an enclave.
Checking noexec indirectly by way of VM_MAYEXEC naturally handles any
other cases that clear VM_MAYEXEC to deny execute permissions.

Signed-off-by: Sean Christopherson 
---
 arch/x86/kernel/cpu/sgx/driver/ioctl.c | 47 +++---
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/driver/ioctl.c 
b/arch/x86/kernel/cpu/sgx/driver/ioctl.c
index ef5c2ce0f37b..44b2d73de7c3 100644
--- a/arch/x86/kernel/cpu/sgx/driver/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/driver/ioctl.c
@@ -577,6 +577,44 @@ static int sgx_encl_add_page(struct sgx_encl *encl, 
unsigned long addr,
return ret;
 }
 
+static int sgx_encl_page_copy(void *dst, unsigned long src, unsigned long prot)
+{
+   struct vm_area_struct *vma;
+   int ret;
+
+   if (!(prot & VM_EXEC))
+   return 0;
+
+   /* Hold mmap_sem across copy_from_user() to avoid a TOCTOU race. */
+   down_read(>mm->mmap_sem);
+
+   vma = find_vma(current->mm, src);
+   if (!vma) {
+   ret = -EFAULT;
+   goto out;
+   }
+
+   /*
+* Query VM_MAYEXEC as an indirect path_noexec() check (see do_mmap()),
+* but with some future proofing against other cases that may deny
+* execute permissions.
+*/
+   if (!(vma->vm_flags & VM_MAYEXEC)) {
+   ret = -EACCES;
+   goto out;
+   }
+
+   if (copy_from_user(dst, (void __user *)src, PAGE_SIZE))
+   ret = -EFAULT;
+   else
+   ret = 0;
+
+out:
+   up_read(>mm->mmap_sem);
+
+   return ret;
+}
+
 /**
  * sgx_ioc_enclave_add_page - handler for %SGX_IOC_ENCLAVE_ADD_PAGE
  *
@@ -616,13 +654,12 @@ static long sgx_ioc_enclave_add_page(struct file *filep, 
unsigned int cmd,
 
data = kmap(data_page);
 
-   if (copy_from_user((void *)data, (void __user *)addp->src, PAGE_SIZE)) {
-   ret = -EFAULT;
-   goto out;
-   }
-
prot = addp->flags & (PROT_READ | PROT_WRITE | PROT_EXEC);
 
+   ret = sgx_encl_page_copy(data, addp->src, prot);
+   if (ret)
+   goto out;
+
ret = sgx_encl_add_page(encl, addp->addr, data, , addp->mrmask,
prot);
if (ret)
-- 
2.21.0



[RFC PATCH v2 4/5] LSM: x86/sgx: Introduce ->enclave_load() hook for Intel SGX

2019-06-05 Thread Sean Christopherson
enclave_load() is roughly analogous to the existing file_mprotect().

Due to the nature of SGX and its Enclave Page Cache (EPC), all enclave
VMAs are backed by a single file, i.e. /dev/sgx/enclave, that must be
MAP_SHARED.  Furthermore, all enclaves need read, write and execute
VMAs.  As a result, the existing/standard call to file_mprotect() does
not provide any meaningful security for enclaves since an LSM can only
deny/grant access to the EPC as a whole.

security_enclave_load() is called when SGX is first loading an enclave
page, i.e. copying a page from normal memory into the EPC.  Although
the prototype for enclave_load() is similar to file_mprotect(), e.g.
SGX could theoretically use file_mprotect() and set reqprot=prot, a
separate hook is desirable as the semantics of an enclave's protection
bits are different than those of vmas, e.g. an enclave page tracks the
maximal set of protections, whereas file_mprotect() operates on the
actual protections being provided.  In other words, LSMs will likely
want to implement different policies for enclave page protections.

Note, extensive discussion yielded no sane alternative to some form of
SGX specific LSM hook[1].

[1] 
https://lkml.kernel.org/r/CALCETrXf8mSK45h7sTK5Wf+pXLVn=bjsc_rlpgo-h-qdzbr...@mail.gmail.com

Signed-off-by: Sean Christopherson 
---
 arch/x86/kernel/cpu/sgx/driver/ioctl.c | 12 ++--
 include/linux/lsm_hooks.h  | 13 +
 include/linux/security.h   | 12 
 security/security.c|  7 +++
 4 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/driver/ioctl.c 
b/arch/x86/kernel/cpu/sgx/driver/ioctl.c
index 44b2d73de7c3..29c0df672250 100644
--- a/arch/x86/kernel/cpu/sgx/driver/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/driver/ioctl.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -582,9 +583,6 @@ static int sgx_encl_page_copy(void *dst, unsigned long src, 
unsigned long prot)
struct vm_area_struct *vma;
int ret;
 
-   if (!(prot & VM_EXEC))
-   return 0;
-
/* Hold mmap_sem across copy_from_user() to avoid a TOCTOU race. */
down_read(>mm->mmap_sem);
 
@@ -599,15 +597,17 @@ static int sgx_encl_page_copy(void *dst, unsigned long 
src, unsigned long prot)
 * but with some future proofing against other cases that may deny
 * execute permissions.
 */
-   if (!(vma->vm_flags & VM_MAYEXEC)) {
+   if ((prot & VM_EXEC) && !(vma->vm_flags & VM_MAYEXEC)) {
ret = -EACCES;
goto out;
}
 
+   ret = security_enclave_load(vma, prot);
+   if (ret)
+   goto out;
+
if (copy_from_user(dst, (void __user *)src, PAGE_SIZE))
ret = -EFAULT;
-   else
-   ret = 0;
 
 out:
up_read(>mm->mmap_sem);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 47f58cfb6a19..c6f47a7eef70 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1446,6 +1446,12 @@
  * @bpf_prog_free_security:
  * Clean up the security information stored inside bpf prog.
  *
+ * Security hooks for Intel SGX enclaves.
+ *
+ * @enclave_load:
+ * @vma: the source memory region of the enclave page being loaded.
+ * @prot: the (maximal) protections of the enclave page.
+ * Return 0 if permission is granted.
  */
 union security_list_options {
int (*binder_set_context_mgr)(struct task_struct *mgr);
@@ -1807,6 +1813,10 @@ union security_list_options {
int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
 #endif /* CONFIG_BPF_SYSCALL */
+
+#ifdef CONFIG_INTEL_SGX
+   int (*enclave_load)(struct vm_area_struct *vma, unsigned long prot);
+#endif /* CONFIG_INTEL_SGX */
 };
 
 struct security_hook_heads {
@@ -2046,6 +2056,9 @@ struct security_hook_heads {
struct hlist_head bpf_prog_alloc_security;
struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
+#ifdef CONFIG_INTEL_SGX
+   struct hlist_head enclave_load;
+#endif /* CONFIG_INTEL_SGX */
 } __randomize_layout;
 
 /*
diff --git a/include/linux/security.h b/include/linux/security.h
index 659071c2e57c..0b6d1eb7368b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1829,5 +1829,17 @@ static inline void security_bpf_prog_free(struct 
bpf_prog_aux *aux)
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_BPF_SYSCALL */
 
+#ifdef CONFIG_INTEL_SGX
+#ifdef CONFIG_SECURITY
+int security_enclave_load(struct vm_area_struct *vma, unsigned long prot);
+#else
+static inline int security_enclave_load(struct vm_area_struct *vma,
+   unsigned long prot)
+{
+   return 0;
+}
+#endif /* CONFIG_SECURITY */
+#endif /* CONFIG_INTEL_SGX */
+
 #endif /* ! __LINUX_SECURITY_H */
 
diff --git a/security/security.c 

Re: [PATCH v2] ARM: configs: Remove useless UEVENT_HELPER_PATH

2019-06-05 Thread Shawn Guo
On Tue, Jun 04, 2019 at 10:14:53AM +0200, Krzysztof Kozlowski wrote:
> Remove the CONFIG_UEVENT_HELPER_PATH because:
> 1. It is disabled since commit 1be01d4a5714 ("driver: base: Disable
>CONFIG_UEVENT_HELPER by default") as its dependency (UEVENT_HELPER) was
>made default to 'n',
> 2. It is not recommended (help message: "This should not be used today
>[...] creates a high system load") and was kept only for ancient
>userland,
> 3. Certain userland specifically requests it to be disabled (systemd
>README: "Legacy hotplug slows down the system and confuses udev").
> 
> Signed-off-by: Krzysztof Kozlowski 
> Acked-by: Geert Uytterhoeven 
> 
> ---
> 
> Changes since v2:
> 1. Remove unrelated files.
> 2. Add Geert's ack.
> ---
...
>  arch/arm/configs/imx_v4_v5_defconfig  | 1 -

Acked-by: Shawn Guo 


Re: KASAN: use-after-free Read in tomoyo_realpath_from_path

2019-06-05 Thread Tetsuo Handa
Here is a reproducer.

The problem is that TOMOYO is accessing already freed socket from 
security_file_open()
which later fails with -ENXIO (because we can't get file descriptor of sockets 
via
/proc/pid/fd/n interface), and the file descriptor is getting released before
security_file_open() completes because we do not raise "struct file"->f_count of
the file which is accessible via /proc/pid/fd/n interface. We can avoid this 
problem
if we can avoid calling security_file_open() which after all fails with -ENXIO.
How should we handle this race? Let LSM modules check if security_file_open() 
was
called on a socket?


diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..995ffcb37128 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -765,6 +765,12 @@ static int do_dentry_open(struct file *f,
error = security_file_open(f);
if (error)
goto cleanup_all;
+   if (!strcmp(current->comm, "a.out") &&
+   f->f_path.dentry->d_sb->s_magic == SOCKFS_MAGIC) {
+   printk("Start open(socket) delay\n");
+   schedule_timeout_killable(HZ * 5);
+   printk("End open(socket) delay\n");
+   }
 
error = break_lease(locks_inode(f), f->f_flags);
if (error)



#include 
#include 
#include 
#include 
#include 
#include 

int main(int argc, char *argv[])
{
pid_t pid = getpid();
int fd = socket(AF_ISDN, SOCK_RAW, 0);
char buffer[128] = { };
if (fork() == 0) {
close(fd);
snprintf(buffer, sizeof(buffer) - 1, "/proc/%u/fd/%u", pid, fd);
open(buffer, 3);
_exit(0);
}
sleep(2);
close(fd);
return 0;
}



getpid()= 32504
socket(AF_ISDN, SOCK_RAW, 0)= 3
clone(strace: Process 32505 attached
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, 
child_tidptr=0x7efea30dda10) = 32505
[pid 32504] rt_sigprocmask(SIG_BLOCK, [CHLD],  
[pid 32505] close(3 
[pid 32504] <... rt_sigprocmask resumed> [], 8) = 0
[pid 32505] <... close resumed> )   = 0
[pid 32504] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[pid 32505] open("/proc/32504/fd/3", O_ACCMODE 
[pid 32504] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid 32504] nanosleep({2, 0}, 0x7ffd3c608150) = 0
[pid 32504] close(3)= 0
[pid 32504] exit_group(0)   = ?
[pid 32504] +++ exited with 0 +++
<... open resumed> )= -1 ENXIO (No such device or address)
exit_group(0)   = ?



[   95.109628] Start open(socket) delay
[   97.113150] base_sock_release(506a3239) sk=016d0ceb
[  100.142235] End open(socket) delay



Re: [RFC PATCH 8/9] LSM: x86/sgx: Introduce ->enclave_load() hook for Intel SGX

2019-06-05 Thread Sean Christopherson
On Tue, Jun 04, 2019 at 02:43:09PM -0700, Xing, Cedric wrote:
> > From: Christopherson, Sean J
> > Sent: Tuesday, June 04, 2019 1:37 PM
> > 
> > On Tue, Jun 04, 2019 at 01:29:10PM -0700, Andy Lutomirski wrote:
> > > On Fri, May 31, 2019 at 4:32 PM Sean Christopherson
> > >  wrote:
> > > >  static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long
> > > > addr, diff --git a/include/linux/lsm_hooks.h
> > > > b/include/linux/lsm_hooks.h index 47f58cfb6a19..0562775424a0 100644
> > > > --- a/include/linux/lsm_hooks.h
> > > > +++ b/include/linux/lsm_hooks.h
> > > > @@ -1446,6 +1446,14 @@
> > > >   * @bpf_prog_free_security:
> > > >   * Clean up the security information stored inside bpf prog.
> > > >   *
> > > > + * Security hooks for Intel SGX enclaves.
> > > > + *
> > > > + * @enclave_load:
> > > > + * On success, returns 0 and optionally adjusts @allowed_prot
> > > > + * @vma: the source memory region of the enclave page being
> > loaded.
> > > > + * @prot: the initial protection of the enclave page.
> > >
> > > What do you mean "initial"?  The page is always mapped PROT_NONE when
> > > this is called, right?  I feel like I must be missing something here.
> > 
> > Initial protection in the EPCM.  Yet another reason to ignore SECINFO.
> 
> I know you guys are talking in the background that all pages are mmap()'ed
> PROT_NONE. But that's an unnecessary limitation.

Not all pages have to be mmap()'d PROT_NONE, only pages that do not have
an associated enclave page.

> And @prot here should be @target_vma->vm_flags&(VM_READ|VM_WRITE|VM_EXEC). 

I don't follow, there is no target_vma at this point.


Re: [RFC V2] mm: Generalize notify_page_fault()

2019-06-05 Thread Anshuman Khandual



On 06/05/2019 03:23 AM, Matthew Wilcox wrote:
> On Tue, Jun 04, 2019 at 12:04:06PM +0530, Anshuman Khandual wrote:
>> +++ b/arch/x86/mm/fault.c
>> @@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
>>  return 0;
>>  }
>>  
>> -static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
>> -{
> ...
>> -}
> 
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 0e8834a..c5a8dcf 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1778,6 +1778,7 @@ static inline int pte_devmap(pte_t pte)
>>  }
>>  #endif
>>  
>> +int notify_page_fault(struct pt_regs *regs, unsigned int trap);
> 
> Why is it now out-of-line?  

Did not get it. AFAICS it is the same from last version and does not cross
80 characters limit on that line.

> 
>> +++ b/mm/memory.c
>> +int __kprobes notify_page_fault(struct pt_regs *regs, unsigned int trap)
>> +{
>> +int ret = 0;
>> +
>> +/*
>> + * To be potentially processing a kprobe fault and to be allowed
>> + * to call kprobe_running(), we have to be non-preemptible.
>> + */
>> +if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
>> +if (kprobe_running() && kprobe_fault_handler(regs, trap))
>> +ret = 1;
>> +}
>> +return ret;
>> +}
>> +
> 
> I would argue this should be in kprobes.h as a static nokprobe_inline.

We can do that. Though it will be a stand alone (not inside #ifdef) as it
already takes care of CONFIG_KPROBES via kprobes_built_in(). Will change
it and in which case the above declaration in mm.h would not be required.


Re: [PATCH] lib: objagg: Use struct_size() in kzalloc()

2019-06-05 Thread David Miller
From: "Gustavo A. R. Silva" 
Date: Wed, 5 Jun 2019 09:45:16 -0500

> One of the more common cases of allocation size calculations is finding
> the size of a structure that has a zero-sized array at the end, along
> with memory for some number of elements for that array. For example:
> 
> struct objagg_stats {
>   ...
> struct objagg_obj_stats_info stats_info[];
> };
> 
> size = sizeof(*objagg_stats) + sizeof(objagg_stats->stats_info[0]) * count;
> instance = kzalloc(size, GFP_KERNEL);
> 
> Instead of leaving these open-coded and prone to type mistakes, we can
> now use the new struct_size() helper:
> 
> instance = kzalloc(struct_size(instance, stats_info, count), GFP_KERNEL);
> 
> Notice that, in this case, variable alloc_size is not necessary, hence it
> is removed.
> 
> This code was detected with the help of Coccinelle.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied.


Re: [PATCH v2 6/7] perf diff: Print the basic block cycles diff

2019-06-05 Thread Jin, Yao




On 6/5/2019 7:44 PM, Jiri Olsa wrote:

On Mon, Jun 03, 2019 at 10:36:16PM +0800, Jin Yao wrote:

SNIP


-   break;
return setup_compute_opt(option);
}
  
@@ -949,6 +953,14 @@ hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt,

  }
  
  static int64_t

+hist_entry__cmp_cycles(struct perf_hpp_fmt *fmt __maybe_unused,
+  struct hist_entry *left __maybe_unused,
+  struct hist_entry *right __maybe_unused)
+{
+   return 0;
+}


we have hist_entry__cmp_nop for that

SNIP


default:
BUG_ON(1);
}
@@ -1407,6 +1452,12 @@ static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
  }
  
+static int hpp__color_cycles(struct perf_hpp_fmt *fmt,

+struct perf_hpp *hpp, struct hist_entry *he)
+{
+   return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES);
+}
+
  static void
  hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
  {
@@ -1608,6 +1659,10 @@ static void data__hpp_register(struct data__file *d, int 
idx)
fmt->color = hpp__color_delta;
fmt->sort  = hist_entry__cmp_delta_abs;
break;
+   case PERF_HPP_DIFF__CYCLES:
+   fmt->color = hpp__color_cycles;
+   fmt->sort  = hist_entry__cmp_cycles;


also please explain in comment why it's nop

jirka



Got it, I will update the patch.

Thanks
Jin Yao


[PATCH v2 3/3] arm64: dts: qcom: msm8998: Add gpucc node

2019-06-05 Thread Jeffrey Hugo
Add MSM8998 GPU Clock Controller DT node.

Signed-off-by: Jeffrey Hugo 
---
 arch/arm64/boot/dts/qcom/msm8998.dtsi | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi 
b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index 574be78a936e..cf00bfeec6b3 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -3,6 +3,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -763,6 +764,20 @@
reg = <0x1f4 0x2>;
};
 
+   gpucc: clock-controller@5065000 {
+   compatible = "qcom,gpucc-msm8998";
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   #power-domain-cells = <1>;
+   reg = <0x05065000 0x9000>;
+
+   clocks = < RPM_SMD_XO_CLK_SRC>,
+< GPLL0_OUT_MAIN>;
+   clock-names = "xo",
+ "gpll0";
+   };
+
+
apcs_glb: mailbox@982 {
compatible = "qcom,msm8998-apcs-hmss-global";
reg = <0x17911000 0x1000>;
-- 
2.17.1



[PATCH v2 2/3] clk: qcom: Add MSM8998 GPU Clock Controller (GPUCC) driver

2019-06-05 Thread Jeffrey Hugo
The GPUCC manages the clocks for the Adreno GPU found on MSM8998.

Signed-off-by: Jeffrey Hugo 
---
 drivers/clk/qcom/Kconfig |   8 +
 drivers/clk/qcom/Makefile|   1 +
 drivers/clk/qcom/gpucc-msm8998.c | 356 +++
 3 files changed, 365 insertions(+)
 create mode 100644 drivers/clk/qcom/gpucc-msm8998.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index e1ff83cc361e..e992682fb9eb 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -222,6 +222,14 @@ config MSM_GCC_8998
  Say Y if you want to use peripheral devices such as UART, SPI,
  i2c, USB, UFS, SD/eMMC, PCIe, etc.
 
+config MSM_GPUCC_8998
+   tristate "MSM8998 Graphics Clock Controller"
+   select MSM_GCC_8998
+   help
+ Support for the graphics clock controller on MSM8998 devices.
+ Say Y if you want to support graphics controller devices and
+ functionality such as 3D graphics.
+
 config QCS_GCC_404
tristate "QCS404 Global Clock Controller"
help
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index f0768fb1f037..b8b6ffbdbd62 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_MSM_GCC_8994) += gcc-msm8994.o
 obj-$(CONFIG_MSM_GCC_8996) += gcc-msm8996.o
 obj-$(CONFIG_MSM_LCC_8960) += lcc-msm8960.o
 obj-$(CONFIG_MSM_GCC_8998) += gcc-msm8998.o
+obj-$(CONFIG_MSM_GPUCC_8998) += gpucc-msm8998.o
 obj-$(CONFIG_MSM_MMCC_8960) += mmcc-msm8960.o
 obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
diff --git a/drivers/clk/qcom/gpucc-msm8998.c b/drivers/clk/qcom/gpucc-msm8998.c
new file mode 100644
index ..34516b34710f
--- /dev/null
+++ b/drivers/clk/qcom/gpucc-msm8998.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, Jeffrey Hugo
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "clk-alpha-pll.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+#include "gdsc.h"
+
+enum {
+   P_XO,
+   P_GPLL0,
+   P_GPUPLL0_OUT_EVEN,
+};
+
+/* Instead of going directly to the block, XO is routed through this branch */
+static struct clk_branch gpucc_cxo_clk = {
+   .halt_reg = 0x1020,
+   .clkr = {
+   .enable_reg = 0x1020,
+   .enable_mask = BIT(0),
+   .hw.init = &(struct clk_init_data){
+   .name = "gpucc_cxo_clk",
+   .parent_data = &(const struct clk_parent_data){
+   .fw_name = "xo",
+   .name = "xo"
+   },
+   .num_parents = 1,
+   .ops = _branch2_ops,
+   .flags = CLK_IS_CRITICAL,
+   },
+   },
+};
+
+static const struct clk_div_table post_div_table_fabia_even[] = {
+   { 0x0, 1 },
+   { 0x1, 2 },
+   { 0x3, 4 },
+   { 0x7, 8 },
+   { }
+};
+
+static struct clk_alpha_pll gpupll0 = {
+   .offset = 0x0,
+   .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "gpupll0",
+   .parent_hws = (const struct clk_hw *[]){ _cxo_clk.clkr.hw 
},
+   .num_parents = 1,
+   .ops = _alpha_pll_fixed_fabia_ops,
+   },
+};
+
+static struct clk_alpha_pll_postdiv gpupll0_out_even = {
+   .offset = 0x0,
+   .post_div_shift = 8,
+   .post_div_table = post_div_table_fabia_even,
+   .num_post_div = ARRAY_SIZE(post_div_table_fabia_even),
+   .width = 4,
+   .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+   .clkr.hw.init = &(struct clk_init_data){
+   .name = "gpupll0_out_even",
+   .parent_hws = (const struct clk_hw *[]){  },
+   .num_parents = 1,
+   .ops = _alpha_pll_postdiv_fabia_ops,
+   },
+};
+
+static const struct parent_map gpu_xo_gpll0_map[] = {
+   { P_XO, 0 },
+   { P_GPLL0, 5 },
+};
+
+static const struct clk_parent_data gpu_xo_gpll0[] = {
+   { .hw = _cxo_clk.clkr.hw },
+   { .fw_name = "gpll0", .name = "gpll0" },
+};
+
+static const struct parent_map gpu_xo_gpupll0_map[] = {
+   { P_XO, 0 },
+   { P_GPUPLL0_OUT_EVEN, 1 },
+};
+
+static const struct clk_parent_data gpu_xo_gpupll0[] = {
+   { .hw = _cxo_clk.clkr.hw },
+   { .hw = _out_even.clkr.hw },
+};
+
+static const struct freq_tbl ftbl_rbcpr_clk_src[] = {
+   F(1920, P_XO, 1, 0, 0),
+   F(5000, P_GPLL0, 12, 0, 0),
+   { }
+};
+
+static struct clk_rcg2 rbcpr_clk_src = {
+   .cmd_rcgr = 0x1030,
+   .hid_width = 5,
+   .parent_map = gpu_xo_gpll0_map,
+   .freq_tbl = ftbl_rbcpr_clk_src,
+   

[PATCH v2 1/3] dt-bindings: clock: Document gpucc for msm8998

2019-06-05 Thread Jeffrey Hugo
The GPU for msm8998 has its own clock controller.  Document it.

Signed-off-by: Jeffrey Hugo 
---
 .../devicetree/bindings/clock/qcom,gpucc.txt  |  4 ++-
 .../dt-bindings/clock/qcom,gpucc-msm8998.h| 29 +++
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 include/dt-bindings/clock/qcom,gpucc-msm8998.h

diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.txt 
b/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
index 4e5215ef1acd..269afe8a757e 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.txt
@@ -2,13 +2,15 @@ Qualcomm Graphics Clock & Reset Controller Binding
 --
 
 Required properties :
-- compatible : shall contain "qcom,sdm845-gpucc"
+- compatible : shall contain "qcom,sdm845-gpucc" or "qcom,msm8998-gpucc"
 - reg : shall contain base register location and length
 - #clock-cells : from common clock binding, shall contain 1
 - #reset-cells : from common reset binding, shall contain 1
 - #power-domain-cells : from generic power domain binding, shall contain 1
 - clocks : shall contain the XO clock
+  shall contain the gpll0 out main clock (msm8998)
 - clock-names : shall be "xo"
+   shall be "gpll0" (msm8998)
 
 Example:
gpucc: clock-controller@509 {
diff --git a/include/dt-bindings/clock/qcom,gpucc-msm8998.h 
b/include/dt-bindings/clock/qcom,gpucc-msm8998.h
new file mode 100644
index ..2623570ee974
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gpucc-msm8998.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Jeffrey Hugo
+ */
+
+#ifndef _DT_BINDINGS_CLK_MSM_GPUCC_8998_H
+#define _DT_BINDINGS_CLK_MSM_GPUCC_8998_H
+
+#define GPUPLL00
+#define GPUPLL0_OUT_EVEN   1
+#define RBCPR_CLK_SRC  2
+#define GFX3D_CLK_SRC  3
+#define RBBMTIMER_CLK_SRC  4
+#define GFX3D_ISENSE_CLK_SRC   5
+#define RBCPR_CLK  6
+#define GFX3D_CLK  7
+#define RBBMTIMER_CLK  8
+#define GFX3D_ISENSE_CLK   9
+#define GPUCC_CXO_CLK  10
+
+#define GPU_CX_BCR 0
+#define RBCPR_BCR  1
+#define GPU_GX_BCR 2
+#define GPU_ISENSE_BCR 3
+
+#define GPU_CX_GDSC1
+#define GPU_GX_GDSC2
+
+#endif
-- 
2.17.1



Re: [PATCH v2] ARM: config: Remove left-over BACKLIGHT_LCD_SUPPORT

2019-06-05 Thread Shawn Guo
On Tue, Jun 04, 2019 at 09:24:21AM +0200, Krzysztof Kozlowski wrote:
> The CONFIG_BACKLIGHT_LCD_SUPPORT was removed in commit 8c5dc8d9f19c
> ("video: backlight: Remove useless BACKLIGHT_LCD_SUPPORT kernel
> symbol"). Options protected by CONFIG_BACKLIGHT_LCD_SUPPORT are now
> available directly.
> 
> Signed-off-by: Krzysztof Kozlowski 
> 
> ---
> 
> Changes since v1:
> 1. Change also mini2440_defconfig.
> ---
...
>  arch/arm/configs/mxs_defconfig| 1 -

Acked-by: Shawn Guo 


[PATCH v2 0/3] MSM8998 GPUCC Support

2019-06-05 Thread Jeffrey Hugo
The Adreno GPU on MSM8998 has its own clock controller, which is a
dependency for bringing up the GPU.  This series gets the gpucc all in
place as another step on the road to getting the GPU enabled.

v2:
-drop desd code

Jeffrey Hugo (3):
  dt-bindings: clock: Document gpucc for msm8998
  clk: qcom: Add MSM8998 GPU Clock Controller (GPUCC) driver
  arm64: dts: qcom: msm8998: Add gpucc node

 .../devicetree/bindings/clock/qcom,gpucc.txt  |   4 +-
 arch/arm64/boot/dts/qcom/msm8998.dtsi |  15 +
 drivers/clk/qcom/Kconfig  |   8 +
 drivers/clk/qcom/Makefile |   1 +
 drivers/clk/qcom/gpucc-msm8998.c  | 364 ++
 .../dt-bindings/clock/qcom,gpucc-msm8998.h|  29 ++
 6 files changed, 420 insertions(+), 1 deletion(-)
 create mode 100644 drivers/clk/qcom/gpucc-msm8998.c
 create mode 100644 include/dt-bindings/clock/qcom,gpucc-msm8998.h

-- 
2.17.1



[PATCH] staging: rtl8723bs: CleanUp to remove the error reported by checkpatch

2019-06-05 Thread Shobhit Kukreti
Cleaned up the code to remove the error "(foo*)" should be "(foo *)"
reported by checkpatch from the file rtl8723bs/os_dep/ioctl_linux.c

Signed-off-by: Shobhit Kukreti 
---
 drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 32 +-
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c 
b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index 236a462..0be8288 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -55,7 +55,7 @@ void rtw_indicate_wx_assoc_event(struct adapter *padapter)
struct  mlme_priv *pmlmepriv = >mlmepriv;
struct mlme_ext_priv *pmlmeext = >mlmeextpriv;
struct mlme_ext_info *pmlmeinfo = &(pmlmeext->mlmext_info);
-   struct wlan_bssid_ex*pnetwork = (struct 
wlan_bssid_ex*)(&(pmlmeinfo->network));
+   struct wlan_bssid_ex*pnetwork = (struct wlan_bssid_ex 
*)(&(pmlmeinfo->network));
 
memset(, 0, sizeof(union iwreq_data));
 
@@ -946,7 +946,7 @@ static int rtw_wx_set_pmkid(struct net_device *dev,
u8  j, blInserted = false;
int intReturn = false;
struct security_priv *psecuritypriv = >securitypriv;
-struct iw_pmksa*  pPMK = (struct iw_pmksa*)extra;
+struct iw_pmksa*  pPMK = (struct iw_pmksa *)extra;
 u8 strZeroMacAddress[ ETH_ALEN ] = { 0x00 };
 u8 strIssueBssid[ ETH_ALEN ] = { 0x00 };
 
@@ -2054,7 +2054,7 @@ static int rtw_wx_set_auth(struct net_device *dev,
   union iwreq_data *wrqu, char *extra)
 {
struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
-   struct iw_param *param = (struct iw_param*)&(wrqu->param);
+   struct iw_param *param = (struct iw_param *)&(wrqu->param);
int ret = 0;
 
switch (param->flags & IW_AUTH_INDEX) {
@@ -2337,8 +2337,8 @@ static int rtw_wx_read_rf(struct net_device *dev,
u32 path, addr, data32;
 
 
-   path = *(u32*)extra;
-   addr = *((u32*)extra + 1);
+   path = *(u32 *)extra;
+   addr = *((u32 *)extra + 1);
data32 = rtw_hal_read_rfreg(padapter, path, addr, 0xF);
/*
 * IMPORTANT!!
@@ -2358,9 +2358,9 @@ static int rtw_wx_write_rf(struct net_device *dev,
u32 path, addr, data32;
 
 
-   path = *(u32*)extra;
-   addr = *((u32*)extra + 1);
-   data32 = *((u32*)extra + 2);
+   path = *(u32 *)extra;
+   addr = *((u32 *)extra + 1);
+   data32 = *((u32 *)extra + 2);
 /* DBG_871X("%s: path =%d addr = 0x%02x data = 0x%05x\n", __func__, path, 
addr, data32); */
rtw_hal_write_rfreg(padapter, path, addr, 0xF, data32);
 
@@ -2584,7 +2584,7 @@ static int rtw_wps_start(struct net_device *dev,
goto exit;
}
 
-   uintRet = copy_from_user((void*)_start, pdata->pointer, 4);
+   uintRet = copy_from_user((void *)_start, pdata->pointer, 4);
if (u32wps_start == 0)
u32wps_start = *extra;
 
@@ -2694,7 +2694,7 @@ static int rtw_dbg_port(struct net_device *dev,
struct sta_priv *pstapriv = >stapriv;
 
 
-   pdata = (u32*)>data;
+   pdata = (u32 *)>data;
 
val32 = *pdata;
arg = (u16)(val32&0x);
@@ -3420,7 +3420,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, 
struct iw_point *p)
 
case IEEE_CMD_SET_WPA_IE:
/* ret = wpa_set_wpa_ie(dev, param, p->length); */
-   ret =  rtw_set_wpa_ie((struct adapter *)rtw_netdev_priv(dev), 
(char*)param->u.wpa_ie.data, (u16)param->u.wpa_ie.len);
+   ret =  rtw_set_wpa_ie((struct adapter *)rtw_netdev_priv(dev), 
(char *)param->u.wpa_ie.data, (u16)param->u.wpa_ie.len);
break;
 
case IEEE_CMD_SET_ENCRYPTION:
@@ -3824,7 +3824,7 @@ static int rtw_add_sta(struct net_device *dev, struct 
ieee_param *param)
if (WLAN_STA_HT) {
psta->htpriv.ht_option = true;
psta->qos_option = 1;
-   memcpy((void*)>htpriv.ht_cap, 
(void*)>u.add_sta.ht_cap, sizeof(struct rtw_ieee80211_ht_cap));
+   memcpy((void *)>htpriv.ht_cap, (void 
*)>u.add_sta.ht_cap, sizeof(struct rtw_ieee80211_ht_cap));
} else {
psta->htpriv.ht_option = false;
}
@@ -4368,7 +4368,7 @@ static int rtw_wx_set_priv(struct net_device *dev,
char *ext;
 
struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
-   struct iw_point *dwrq = (struct iw_point*)awrq;
+   struct iw_point *dwrq = (struct iw_point *)awrq;
 
/* RT_TRACE(_module_rtl871x_ioctl_os_c, _drv_notice_, 
("+rtw_wx_set_priv\n")); */
if (dwrq->length == 0)
@@ -4540,7 +4540,7 @@ static int rtw_test(
}
DBG_871X("%s: string =\"%s\"\n", __func__, pbuf);
 
-   ptmp = (char*)pbuf;
+   ptmp = (char *)pbuf;
  

Re: [PATCH v2 4/7] perf diff: Use hists to manage basic blocks per symbol

2019-06-05 Thread Jin, Yao




On 6/5/2019 7:44 PM, Jiri Olsa wrote:

On Mon, Jun 03, 2019 at 10:36:14PM +0800, Jin Yao wrote:

SNIP


data__for_each_file_new(i, d) {
pair = get_pair_data(he, d);
if (!pair)
@@ -510,6 +683,9 @@ static void hists__precompute(struct hists *hists)
case COMPUTE_WEIGHTED_DIFF:
compute_wdiff(he, pair);
break;
+   case COMPUTE_CYCLES:
+   process_block_per_sym(pair, d);
+   break;
default:
BUG_ON(1);
}
@@ -713,6 +889,14 @@ hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt 
__maybe_unused,
   sort_compute);
  }
  
+static int64_t

+hist_entry__cmp_cycles_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+  struct hist_entry *left __maybe_unused,
+  struct hist_entry *right __maybe_unused)
+{
+   return 0;
+}


this is hist_entry__cmp_nop.. please use it instead and
explain in comment why for COMPUTE_CYCLES we need the
default sort

jirka



fmt->sort should be set otherwise since fmt->sort will be called without 
checking valid, the crash happens. Yes, I should use hist_entry__cmp_nop 
instead, and will add some comments for COMPUTE_CYCLES.


Thanks
Jin Yao




[PATCH RFC 00/10] RDMA/FS DAX truncate proposal

2019-06-05 Thread ira . weiny
From: Ira Weiny 

... V1,000,000   ;-)

Pre-requisites:
John Hubbard's put_user_pages() patch series.[1]
Jan Kara's ext4_break_layouts() fixes[2]

Based on the feedback from LSFmm and the LWN article which resulted.  I've
decided to take a slightly different tack on this problem.

The real issue is that there is no use case for a user to have RDMA pinn'ed
memory which is then truncated.  So really any solution we present which:

A) Prevents file system corruption or data leaks
...and...
B) Informs the user that they did something wrong

Should be an acceptable solution.

Because this is slightly new behavior.  And because this is gonig to be
specific to DAX (because of the lack of a page cache) we have made the user
"opt in" to this behavior.

The following patches implement the following solution.

1) The user has to opt in to allowing GUP pins on a file with a layout lease
   (now made visible).
2) GUP will fail (EPERM) if a layout lease is not taken
3) Any truncate or hole punch operation on a GUP'ed DAX page will fail.
4) The user has the option of holding the layout lease to receive a SIGIO for
   notification to the original thread that another thread has tried to delete
   their data.  Furthermore this indicates that if the user needs to GUP the
   file again they will need to retake the Layout lease before doing so.


NOTE: If the user releases the layout lease or if it has been broken by another
operation further GUP operations on the file will fail without re-taking the
lease.  This means that if a user would like to register pieces of a file and
continue to register other pieces later they would be advised to keep the
layout lease, get a SIGIO notification, and retake the lease.

NOTE2: Truncation of pages which are not actively pinned will succeed.  Similar
to accessing an mmap to this area GUP pins of that memory may fail.


A general overview follows for background.

It should be noted that one solution for this problem is to use RDMA's On
Demand Paging (ODP).  There are 2 big reasons this may not work.

1) The hardware being used for RDMA may not support ODP
2) ODP may be detrimental to the over all network (cluster or cloud)
   performance

Therefore, in order to support RDMA to File system pages without On Demand
Paging (ODP) a number of things need to be done.

1) GUP "longterm" users need to inform the other subsystems that they have
   taken a pin on a page which may remain pinned for a very "long time".[3]

2) Any page which is "controlled" by a file system needs to have special
   handling.  The details of the handling depends on if the page is page cache
   fronted or not.

   2a) A page cache fronted page which has been pinned by GUP long term can use 
a
   bounce buffer to allow the file system to write back snap shots of the page.
   This is handled by the FS recognizing the GUP long term pin and making a copy
   of the page to be written back.
NOTE: this patch set does not address this path.

   2b) A FS "controlled" page which is not page cache fronted is either easier
   to deal with or harder depending on the operation the filesystem is trying
   to do.

2ba) [Hard case] If the FS operation _is_ a truncate or hole punch the
FS can no longer use the pages in question until the pin has been
removed.  This patch set presents a solution to this by introducing
some reasonable restrictions on user space applications.

2bb) [Easy case] If the FS operation is _not_ a truncate or hole punch
then there is nothing which need be done.  Data is Read or Written
directly to the page.  This is an easy case which would currently work
if not for GUP long term pins being disabled.  Therefore this patch set
need not change access to the file data but does allow for GUP pins
after 2ba above is dealt with.


This patch series and presents a solution for problem 2ba)

[1] https://github.com/johnhubbard/linux/tree/gup_dma_core

[2] ext4/dev branch:

- https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/log/?h=dev

Specific patches:

[2a] ext4: wait for outstanding dio during truncate in nojournal mode

- 
https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/commit/?h=dev=82a25b027ca48d7ef197295846b352345853dfa8

[2b] ext4: do not delete unlinked inode from orphan list on failed 
truncate

- 
https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/commit/?h=dev=ee0ed02ca93ef1ecf8963ad96638795d55af2c14

[2c] ext4: gracefully handle ext4_break_layouts() failure during 
truncate

- 
https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git/commit/?h=dev=b9c1c26739ec2d4b4fb70207a0a9ad6747e43f4c

[3] The definition of long time is debatable but it has been established
that RDMAs use of pages, minutes or hours after the pin is the extreme case
which makes this problem most severe.


Ira Weiny 

[PATCH RFC 06/10] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range

2019-06-05 Thread ira . weiny
From: Ira Weiny 

Callers of dax_layout_busy_page() are only rarely operating on the
entire file of concern.

Teach dax_layout_busy_page() to operate on a sub-range of the
address_space provided.  Specifying 0 - ULONG_MAX however, will continue
to operate on the "entire file" and XFS is split out to a separate patch
by this method.

This could potentially speed up dax_layout_busy_page() as well.

Signed-off-by: Ira Weiny 
---
 fs/dax.c| 15 +++
 fs/ext4/ext4.h  |  2 +-
 fs/ext4/extents.c   |  6 +++---
 fs/ext4/inode.c | 19 ---
 fs/xfs/xfs_file.c   |  3 ++-
 include/linux/dax.h |  3 ++-
 6 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 29ff3b683657..abd77b184879 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -568,8 +568,11 @@ bool dax_mapping_is_dax(struct address_space *mapping)
 EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
 
 /**
- * dax_layout_busy_page - find first pinned page in @mapping
+ * dax_layout_busy_page - find first pinned page in @mapping within
+ *the range @off - @off + @len
  * @mapping: address space to scan for a page with ref count > 1
+ * @off: offset to start at
+ * @len: length to scan through
  *
  * DAX requires ZONE_DEVICE mapped pages. These pages are never
  * 'onlined' to the page allocator so they are considered idle when
@@ -582,9 +585,13 @@ EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
  * to be able to run unmap_mapping_range() and subsequently not race
  * mapping_mapped() becoming true.
  */
-struct page *dax_layout_busy_page(struct address_space *mapping)
+struct page *dax_layout_busy_page(struct address_space *mapping,
+ loff_t off, loff_t len)
 {
-   XA_STATE(xas, >i_pages, 0);
+   unsigned long start_idx = off >> PAGE_SHIFT;
+   unsigned long end_idx = (len == ULONG_MAX) ? ULONG_MAX
+   : start_idx + (len >> PAGE_SHIFT);
+   XA_STATE(xas, >i_pages, start_idx);
void *entry;
unsigned int scanned = 0;
struct page *page = NULL;
@@ -607,7 +614,7 @@ struct page *dax_layout_busy_page(struct address_space 
*mapping)
unmap_mapping_range(mapping, 0, 0, 1);
 
xas_lock_irq();
-   xas_for_each(, entry, ULONG_MAX) {
+   xas_for_each(, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
if (unlikely(dax_is_locked(entry)))
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1cb67859e051..ba5920c21023 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2530,7 +2530,7 @@ extern int ext4_get_inode_loc(struct inode *, struct 
ext4_iloc *);
 extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern int ext4_truncate(struct inode *);
-extern int ext4_break_layouts(struct inode *);
+extern int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int 
nblocks);
 extern void ext4_set_inode_flags(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d40ed940001e..9ddb117d8beb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4736,7 +4736,7 @@ static long ext4_zero_range(struct file *file, loff_t 
offset,
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret) {
up_write(_I(inode)->i_mmap_sem);
goto out_mutex;
@@ -5419,7 +5419,7 @@ int ext4_collapse_range(struct inode *inode, loff_t 
offset, loff_t len)
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret)
goto out_mmap;
 
@@ -5572,7 +5572,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, 
loff_t len)
 */
down_write(_I(inode)->i_mmap_sem);
 
-   ret = ext4_break_layouts(inode);
+   ret = ext4_break_layouts(inode, offset, len);
if (ret)
goto out_mmap;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c7c99f51961f..75f543f384e4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4232,7 +4232,7 @@ static void ext4_wait_dax_page(struct ext4_inode_info *ei)
down_write(>i_mmap_sem);
 }
 
-int ext4_break_layouts(struct inode *inode)
+int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len)
 {
struct ext4_inode_info *ei = EXT4_I(inode);
struct page *page;
@@ -4246,7 +4246,7 @@ int ext4_break_layouts(struct inode *inode)
break_layout(inode, true);
 
do {
-   page = dax_layout_busy_page(inode->i_mapping);
+   page = 

[PATCH RFC 02/10] fs/locks: Export F_LAYOUT lease to user space

2019-06-05 Thread ira . weiny
From: Ira Weiny 

GUP longterm pins of non-pagecache file system pages (eg FS DAX) are
currently disallowed because they are unsafe.

The danger for pinning these pages comes from the fact that hole punch
and/or truncate of those files results in the pages being mapped and
pinned by a user space process while DAX has potentially allocated those
pages to other processes.

Most (All) users who are mapping FS DAX pages for long term pin purposes
(such as RDMA) are not going to want to deallocate these pages while
those pages are in use.  To do so would mean the application would lose
data.  So the use case for allowing truncate operations of such pages
is limited.

However, the kernel must protect itself and users from potential
mistakes and/or malicious user space code.  Rather than disabling long
term pins as is done now.   Allow for users who know they are going to
be pinning this memory to alert the file system of this intention.
Furthermore, allow users to be alerted such that they can react if a
truncate operation occurs for some reason.

Example user space pseudocode for a user using RDMA and wanting to allow
a truncate would look like this:

lease_break_sigio_handler() {
...
if (sigio.fd == rdma_fd) {
complete_rdma_operations(...);
ibv_dereg_mr(mr);
close(rdma_fd);
fcntl(rdma_fd, F_SETLEASE, F_UNLCK);
}
}

setup_rdma_to_dax_file() {
...
rdma_fd = open(...)
fcntl(rdma_fd, F_SETLEASE, F_LAYOUT);
sigaction(SIGIO, ...  lease_break ...);
ptr = mmap(rdma_fd, ...);
mr = ibv_reg_mr(ptr, ...);
do_rdma_stuff(...);
}

Follow on patches implement the notification of the lease holder on
truncate as well as failing the truncate if the GUP pin is not released.

This first patch exports the F_LAYOUT lease type and allows the user to set
and get it.

After the complete series:

1) Failure to obtain a F_LAYOUT lease on an open FS DAX file will result
   in a failure to GUP pin any pages in that file.  An example of a call
   which results in GUP pin is ibv_reg_mr().
2) While the GUP pin is in place (eg MR is in use) truncates of the
   affected pages will fail.
3) If the user registers a sigaction they will be notified of the
   truncate so they can react.  Failure to react will result in the
   lease being revoked after /lease-break-time seconds.  After
   this time new GUP pins will fail without a new lease being taken.
4) A truncate will work if the pages being truncated are not actively
   pinned at the time of truncate.  Attempts to pin these pages after
   will result in a failure.

Signed-off-by: Ira Weiny 
---
 fs/locks.c   | 36 +++-
 include/linux/fs.h   |  2 +-
 include/uapi/asm-generic/fcntl.h |  3 +++
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 0cc2b9f30e22..de9761c068de 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -191,6 +191,8 @@ static int target_leasetype(struct file_lock *fl)
return F_UNLCK;
if (fl->fl_flags & FL_DOWNGRADE_PENDING)
return F_RDLCK;
+   if (fl->fl_flags & FL_LAYOUT)
+   return F_LAYOUT;
return fl->fl_type;
 }
 
@@ -611,7 +613,8 @@ static const struct lock_manager_operations 
lease_manager_ops = {
 /*
  * Initialize a lease, use the default lock manager operations
  */
-static int lease_init(struct file *filp, long type, struct file_lock *fl)
+static int lease_init(struct file *filp, long type, unsigned int flags,
+ struct file_lock *fl)
 {
if (assign_type(fl, type) != 0)
return -EINVAL;
@@ -621,6 +624,8 @@ static int lease_init(struct file *filp, long type, struct 
file_lock *fl)
 
fl->fl_file = filp;
fl->fl_flags = FL_LEASE;
+   if (flags & FL_LAYOUT)
+   fl->fl_flags |= FL_LAYOUT;
fl->fl_start = 0;
fl->fl_end = OFFSET_MAX;
fl->fl_ops = NULL;
@@ -629,7 +634,8 @@ static int lease_init(struct file *filp, long type, struct 
file_lock *fl)
 }
 
 /* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, long type)
+static struct file_lock *lease_alloc(struct file *filp, long type,
+unsigned int flags)
 {
struct file_lock *fl = locks_alloc_lock();
int error = -ENOMEM;
@@ -637,7 +643,7 @@ static struct file_lock *lease_alloc(struct file *filp, 
long type)
if (fl == NULL)
return ERR_PTR(error);
 
-   error = lease_init(filp, type, fl);
+   error = lease_init(filp, type, flags, fl);
if (error) {
locks_free_lock(fl);
return ERR_PTR(error);
@@ -1588,7 +1594,7 @@ int __break_lease(struct inode *inode, unsigned int mode, 
unsigned int type)
int want_write = (mode & O_ACCMODE) != O_RDONLY;
LIST_HEAD(dispose);

[PATCH RFC 05/10] fs/ext4: Teach ext4 to break layout leases

2019-06-05 Thread ira . weiny
From: Ira Weiny 

ext4 needs to break a layout lease if it is held to inform a user
holding a layout lease that a truncate is about to happen.  This allows
the user knowledge of, and choice in how to handle, some other thread
attempting to modify a file they are actively using.

Split out the logic to determine if a mapping is DAX, export it, and then
break layout leases if a mapping is DAX.

Signed-off-by: Ira Weiny 
---
 fs/dax.c| 23 ---
 fs/ext4/inode.c |  4 
 include/linux/dax.h |  6 ++
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index f74386293632..29ff3b683657 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -552,6 +552,21 @@ static void *grab_mapping_entry(struct xa_state *xas,
return xa_mk_internal(VM_FAULT_FALLBACK);
 }
 
+bool dax_mapping_is_dax(struct address_space *mapping)
+{
+   /*
+* In the 'limited' case get_user_pages() for dax is disabled.
+*/
+   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+   return false;
+
+   if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+   return false;
+
+   return true;
+}
+EXPORT_SYMBOL_GPL(dax_mapping_is_dax);
+
 /**
  * dax_layout_busy_page - find first pinned page in @mapping
  * @mapping: address space to scan for a page with ref count > 1
@@ -574,13 +589,7 @@ struct page *dax_layout_busy_page(struct address_space 
*mapping)
unsigned int scanned = 0;
struct page *page = NULL;
 
-   /*
-* In the 'limited' case get_user_pages() for dax is disabled.
-*/
-   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
-   return NULL;
-
-   if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+   if (!dax_mapping_is_dax(mapping))
return NULL;
 
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c16071547c9c..c7c99f51961f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4241,6 +4241,10 @@ int ext4_break_layouts(struct inode *inode)
if (WARN_ON_ONCE(!rwsem_is_locked(>i_mmap_sem)))
return -EINVAL;
 
+   /* Break layout leases if active */
+   if (dax_mapping_is_dax(inode->i_mapping))
+   break_layout(inode, true);
+
do {
page = dax_layout_busy_page(inode->i_mapping);
if (!page)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index becaea5f4488..ee6cbd56ddc4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -106,6 +106,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device 
*bdev);
 int dax_writeback_mapping_range(struct address_space *mapping,
struct block_device *bdev, struct writeback_control *wbc);
 
+bool dax_mapping_is_dax(struct address_space *mapping);
 struct page *dax_layout_busy_page(struct address_space *mapping);
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
@@ -137,6 +138,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct 
block_device *bdev)
return NULL;
 }
 
+bool dax_mapping_is_dax(struct address_space *mapping)
+{
+   return false;
+}
+
 static inline struct page *dax_layout_busy_page(struct address_space *mapping)
 {
return NULL;
-- 
2.20.1



[PATCH RFC 07/10] fs/ext4: Fail truncate if pages are GUP pinned

2019-06-05 Thread ira . weiny
From: Ira Weiny 

If pages are actively gup pinned fail the truncate operation.

Signed-off-by: Ira Weiny 
---
 fs/ext4/inode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 75f543f384e4..1ded83ec08c0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4250,6 +4250,9 @@ int ext4_break_layouts(struct inode *inode, loff_t 
offset, loff_t len)
if (!page)
return 0;
 
+   if (page_gup_pinned(page))
+   return -ETXTBSY;
+
error = ___wait_var_event(>_refcount,
atomic_read(>_refcount) == 1,
TASK_INTERRUPTIBLE, 0, 0,
-- 
2.20.1



[PATCH RFC 09/10] fs/xfs: Fail truncate if pages are GUP pinned

2019-06-05 Thread ira . weiny
From: Ira Weiny 

If pages are actively gup pinned fail the truncate operation.  To
support an application who wishes to removing a pin upon SIGIO reception
we must change the order of breaking layout leases with respect to DAX
layout leases.

Check for a GUP pin on the page being truncated and return ETXTBSY if it
is GUP pinned.

Change the order of XFS break leased layouts and break DAX layouts.

Select EXPORT_BLOCK_OPS for FS_DAX to ensure that
xfs_break_lease_layouts() is defined for FS_DAX as well as pNFS.

Update comment for xfs_break_lease_layouts()

Signed-off-by: Ira Weiny 
---
 fs/Kconfig|  1 +
 fs/xfs/xfs_file.c |  8 ++--
 fs/xfs/xfs_pnfs.c | 14 +++---
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index f1046cf6ad85..c54b0b88abbf 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -49,6 +49,7 @@ config FS_DAX
select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
select FS_IOMAP
select DAX
+   select EXPORTFS_BLOCK_OPS
help
  Direct Access (DAX) can be used on memory-backed block devices.
  If the block device supports DAX and the filesystem supports DAX,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 350eb5546d36..1dc61c98f7cd 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -756,6 +756,9 @@ xfs_break_dax_layouts(
if (!page)
return 0;
 
+   if (page_gup_pinned(page))
+   return -ETXTBSY;
+
*retry = true;
return ___wait_var_event(>_refcount,
atomic_read(>_refcount) == 1, TASK_INTERRUPTIBLE,
@@ -779,10 +782,11 @@ xfs_break_layouts(
retry = false;
switch (reason) {
case BREAK_UNMAP:
-   error = xfs_break_dax_layouts(inode, , off, len);
+   error = xfs_break_leased_layouts(inode, iolock, );
if (error || retry)
break;
-   /* fall through */
+   error = xfs_break_dax_layouts(inode, , off, len);
+   break;
case BREAK_WRITE:
error = xfs_break_leased_layouts(inode, iolock, );
break;
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index bde2c9f56a46..e70d24d12cbf 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -21,14 +21,14 @@
 #include "xfs_pnfs.h"
 
 /*
- * Ensure that we do not have any outstanding pNFS layouts that can be used by
- * clients to directly read from or write to this inode.  This must be called
- * before every operation that can remove blocks from the extent map.
- * Additionally we call it during the write operation, where aren't concerned
- * about exposing unallocated blocks but just want to provide basic
+ * Ensure that we do not have any outstanding pNFS or longterm GUP layouts that
+ * can be used by clients to directly read from or write to this inode.  This
+ * must be called before every operation that can remove blocks from the extent
+ * map.  Additionally we call it during the write operation, where aren't
+ * concerned about exposing unallocated blocks but just want to provide basic
  * synchronization between a local writer and pNFS clients.  mmap writes would
- * also benefit from this sort of synchronization, but due to the tricky 
locking
- * rules in the page fault path we don't bother.
+ * also benefit from this sort of synchronization, but due to the tricky
+ * locking rules in the page fault path we don't bother.
  */
 int
 xfs_break_leased_layouts(
-- 
2.20.1



[PATCH RFC 10/10] mm/gup: Remove FOLL_LONGTERM DAX exclusion

2019-06-05 Thread ira . weiny
From: Ira Weiny 

Now that there is a mechanism for users to safely take LONGTERM pins on
FS DAX pages, remove the FS DAX exclusion from GUP with FOLL_LONGTERM.

Special processing remains in effect for CONFIG_CMA

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 78 ++--
 1 file changed, 8 insertions(+), 70 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index d06cc5b14c0b..4f6e5606b81e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1392,26 +1392,6 @@ long get_user_pages_remote(struct task_struct *tsk, 
struct mm_struct *mm,
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
-#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
-static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
-{
-   long i;
-   struct vm_area_struct *vma_prev = NULL;
-
-   for (i = 0; i < nr_pages; i++) {
-   struct vm_area_struct *vma = vmas[i];
-
-   if (vma == vma_prev)
-   continue;
-
-   vma_prev = vma;
-
-   if (vma_is_fsdax(vma))
-   return true;
-   }
-   return false;
-}
-
 #ifdef CONFIG_CMA
 static struct page *new_non_cma_page(struct page *page, unsigned long private)
 {
@@ -1542,18 +1522,6 @@ static long check_and_migrate_cma_pages(struct 
task_struct *tsk,
 
return nr_pages;
 }
-#else
-static long check_and_migrate_cma_pages(struct task_struct *tsk,
-   struct mm_struct *mm,
-   unsigned long start,
-   unsigned long nr_pages,
-   struct page **pages,
-   struct vm_area_struct **vmas,
-   unsigned int gup_flags)
-{
-   return nr_pages;
-}
-#endif
 
 /*
  * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
@@ -1567,49 +1535,28 @@ static long __gup_longterm_locked(struct task_struct 
*tsk,
  struct vm_area_struct **vmas,
  unsigned int gup_flags)
 {
-   struct vm_area_struct **vmas_tmp = vmas;
unsigned long flags = 0;
-   long rc, i;
+   long rc;
 
-   if (gup_flags & FOLL_LONGTERM) {
-   if (!pages)
-   return -EINVAL;
-
-   if (!vmas_tmp) {
-   vmas_tmp = kcalloc(nr_pages,
-  sizeof(struct vm_area_struct *),
-  GFP_KERNEL);
-   if (!vmas_tmp)
-   return -ENOMEM;
-   }
+   if (flags & FOLL_LONGTERM)
flags = memalloc_nocma_save();
-   }
 
rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
-vmas_tmp, NULL, gup_flags);
+vmas, NULL, gup_flags);
 
if (gup_flags & FOLL_LONGTERM) {
memalloc_nocma_restore(flags);
if (rc < 0)
goto out;
 
-   if (check_dax_vmas(vmas_tmp, rc)) {
-   for (i = 0; i < rc; i++)
-   put_page(pages[i]);
-   rc = -EOPNOTSUPP;
-   goto out;
-   }
-
rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
-vmas_tmp, gup_flags);
+vmas, gup_flags);
}
 
 out:
-   if (vmas_tmp != vmas)
-   kfree(vmas_tmp);
return rc;
 }
-#else /* !CONFIG_FS_DAX && !CONFIG_CMA */
+#else /* !CONFIG_CMA */
 static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
  struct mm_struct *mm,
  unsigned long start,
@@ -1621,7 +1568,7 @@ static __always_inline long __gup_longterm_locked(struct 
task_struct *tsk,
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
   NULL, flags);
 }
-#endif /* CONFIG_FS_DAX || CONFIG_CMA */
+#endif /* CONFIG_CMA */
 
 /*
  * This is the same as get_user_pages_remote(), just with a
@@ -1882,9 +1829,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
goto pte_unmap;
 
if (pte_devmap(pte)) {
-   if (unlikely(flags & FOLL_LONGTERM))
-   goto pte_unmap;
-
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
undo_dev_pagemap(nr, nr_start, pages);
@@ -2057,12 +2001,9 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, 
unsigned long addr,
if (!pmd_access_permitted(orig, flags & FOLL_WRITE))

[PATCH RFC 08/10] fs/xfs: Teach xfs to use new dax_layout_busy_page()

2019-06-05 Thread ira . weiny
From: Ira Weiny 

dax_layout_busy_page() can now operate on a sub-range of the
address_space provided.

Have xfs specify the sub range to dax_layout_busy_page()

Signed-off-by: Ira Weiny 
---
 fs/xfs/xfs_file.c  | 19 +--
 fs/xfs/xfs_inode.h |  5 +++--
 fs/xfs/xfs_ioctl.c | 15 ---
 fs/xfs/xfs_iops.c  | 14 ++
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebddf911644c..350eb5546d36 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -300,7 +300,11 @@ xfs_file_aio_write_checks(
if (error <= 0)
return error;
 
-   error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
+   /*
+* BREAK_WRITE ignores offset/len tuple just specify the whole file
+* (0 - ULONG_MAX to be safe.
+*/
+   error = xfs_break_layouts(inode, iolock, 0, ULONG_MAX, BREAK_WRITE);
if (error)
return error;
 
@@ -740,14 +744,15 @@ xfs_wait_dax_page(
 static int
 xfs_break_dax_layouts(
struct inode*inode,
-   bool*retry)
+   bool*retry,
+   loff_t   off,
+   loff_t   len)
 {
struct page *page;
 
ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
 
-   /* We default to the "whole file" */
-   page = dax_layout_busy_page(inode->i_mapping, 0, ULONG_MAX);
+   page = dax_layout_busy_page(inode->i_mapping, off, len);
if (!page)
return 0;
 
@@ -761,6 +766,8 @@ int
 xfs_break_layouts(
struct inode*inode,
uint*iolock,
+   loff_t   off,
+   loff_t   len,
enum layout_break_reason reason)
 {
boolretry;
@@ -772,7 +779,7 @@ xfs_break_layouts(
retry = false;
switch (reason) {
case BREAK_UNMAP:
-   error = xfs_break_dax_layouts(inode, );
+   error = xfs_break_dax_layouts(inode, , off, len);
if (error || retry)
break;
/* fall through */
@@ -814,7 +821,7 @@ xfs_file_fallocate(
return -EOPNOTSUPP;
 
xfs_ilock(ip, iolock);
-   error = xfs_break_layouts(inode, , BREAK_UNMAP);
+   error = xfs_break_layouts(inode, , offset, len, BREAK_UNMAP);
if (error)
goto out_unlock;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 558173f95a03..1b0948f5267c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -475,8 +475,9 @@ enum xfs_prealloc_flags {
 
 intxfs_update_prealloc_flags(struct xfs_inode *ip,
  enum xfs_prealloc_flags flags);
-intxfs_break_layouts(struct inode *inode, uint *iolock,
-   enum layout_break_reason reason);
+int xfs_break_layouts(struct inode *inode, uint *iolock,
+ loff_t off, loff_t len,
+ enum layout_break_reason reason);
 
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d7dfc13f30f5..a702e44a63b8 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -605,6 +605,7 @@ xfs_ioc_space(
enum xfs_prealloc_flags flags = 0;
uintiolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
int error;
+   loff_t  break_length;
 
if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
return -EPERM;
@@ -625,9 +626,6 @@ xfs_ioc_space(
return error;
 
xfs_ilock(ip, iolock);
-   error = xfs_break_layouts(inode, , BREAK_UNMAP);
-   if (error)
-   goto out_unlock;
 
switch (bf->l_whence) {
case 0: /*SEEK_SET*/
@@ -673,6 +671,17 @@ xfs_ioc_space(
goto out_unlock;
}
 
+   /* break layout for the whole file if len ends up 0 */
+   if (bf->l_len == 0)
+   break_length = ULONG_MAX;
+   else
+   break_length = bf->l_len;
+
+   error = xfs_break_layouts(inode, , bf->l_start, break_length,
+ BREAK_UNMAP);
+   if (error)
+   goto out_unlock;
+
switch (cmd) {
case XFS_IOC_ZERO_RANGE:
flags |= XFS_PREALLOC_SET;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 74047bd0c1ae..5529bc7a516b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1052,10 +1052,16 @@ xfs_vn_setattr(
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 
-   error = xfs_break_layouts(inode, , BREAK_UNMAP);
-   if (error) {
-   xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
-   return error;
+

[PATCH RFC 04/10] mm/gup: Ensure F_LAYOUT lease is held prior to GUP'ing pages

2019-06-05 Thread ira . weiny
From: Ira Weiny 

On FS DAX files users must inform the file system they intend to take
long term GUP pins on the file pages.  Failure to do so should result in
an error.

Ensure that a F_LAYOUT lease exists at the time the GUP call is made.
If not return EPERM.

Signed-off-by: Ira Weiny 
---
 fs/locks.c | 41 +
 include/linux/mm.h |  2 ++
 mm/gup.c   | 25 +
 mm/huge_memory.c   | 12 
 4 files changed, 80 insertions(+)

diff --git a/fs/locks.c b/fs/locks.c
index de9761c068de..43f5dc97652c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2945,3 +2945,44 @@ static int __init filelock_init(void)
return 0;
 }
 core_initcall(filelock_init);
+
+/**
+ * mapping_inode_has_layout()
+ * @page page we are trying to GUP
+ *
+ * This should only be called on DAX pages.  DAX pages which are mapped through
+ * FS DAX do not use the page cache.  As a result they require the user to take
+ * a LAYOUT lease on them prior to be able to pin them for longterm use.
+ * This allows the user to opt-into the fact that truncation operations will
+ * fail for the duration of the pin.
+ *
+ * @Return true if the page has a LAYOUT lease associated with it's file.
+ */
+bool mapping_inode_has_layout(struct page *page)
+{
+   bool ret = false;
+   struct inode *inode;
+   struct file_lock *fl;
+   struct file_lock_context *ctx;
+
+   if (WARN_ON(PageAnon(page)) ||
+   WARN_ON(!page) ||
+   WARN_ON(!page->mapping) ||
+   WARN_ON(!page->mapping->host))
+   return false;
+
+   inode = page->mapping->host;
+
+   ctx = locks_get_lock_context(inode, F_RDLCK);
+   spin_lock(>flc_lock);
+   list_for_each_entry(fl, >flc_lease, fl_list) {
+   if (fl->fl_flags & FL_LAYOUT) {
+   ret = true;
+   break;
+   }
+   }
+   spin_unlock(>flc_lock);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(mapping_inode_has_layout);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc373a9b69fc..432b004b920c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1630,6 +1630,8 @@ long get_user_pages_unlocked(unsigned long start, 
unsigned long nr_pages,
 int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
 
+bool mapping_inode_has_layout(struct page *page);
+
 /* Container for pinned pfns / pages */
 struct frame_vector {
unsigned int nr_allocated;  /* Number of frames we have space for */
diff --git a/mm/gup.c b/mm/gup.c
index 26a7a3a3a657..d06cc5b14c0b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -361,6 +361,13 @@ static struct page *follow_page_pte(struct vm_area_struct 
*vma,
page = pte_page(pte);
else
goto no_page;
+
+   if (unlikely(flags & FOLL_LONGTERM) &&
+   (*pgmap)->type == MEMORY_DEVICE_FS_DAX &&
+   !mapping_inode_has_layout(page)) {
+   page = ERR_PTR(-EPERM);
+   goto out;
+   }
} else if (unlikely(!page)) {
if (flags & FOLL_DUMP) {
/* Avoid special (like zero) pages in core dumps */
@@ -1905,6 +1912,16 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
 
VM_BUG_ON_PAGE(compound_head(page) != head, page);
 
+   if (pte_devmap(pte) &&
+   unlikely(flags & FOLL_LONGTERM) &&
+   pgmap->type == MEMORY_DEVICE_FS_DAX &&
+   !mapping_inode_has_layout(head)) {
+   mod_node_page_state(page_pgdat(head),
+   NR_GUP_FAST_PAGE_BACKOFFS, 1);
+   put_user_page(head);
+   goto pte_unmap;
+   }
+
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -1955,6 +1972,14 @@ static int __gup_device_huge(unsigned long pfn, unsigned 
long addr,
}
SetPageReferenced(page);
pages[*nr] = page;
+
+   if (unlikely(flags & FOLL_LONGTERM) &&
+   pgmap->type == MEMORY_DEVICE_FS_DAX &&
+   !mapping_inode_has_layout(page)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   return 0;
+   }
+
if (try_get_gup_pin_page(page, NR_GUP_FAST_PAGES_REQUESTED)) {
undo_dev_pagemap(nr, nr_start, pages);
return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bb7fd7fa6f77..cdc213e50902 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -950,6 +950,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, 
unsigned long addr,
if (!*pgmap)
return 

[PATCH RFC 03/10] mm/gup: Pass flags down to __gup_device_huge* calls

2019-06-05 Thread ira . weiny
From: Ira Weiny 

In order to support checking for a layout lease on a FS DAX inode these
calls need to know if FOLL_LONGTERM was specified.

Prepare for this with this patch.

Signed-off-by: Ira Weiny 
---
 mm/gup.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index a3fb48605836..26a7a3a3a657 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1939,7 +1939,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, 
unsigned long end,
 
 #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
 static int __gup_device_huge(unsigned long pfn, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
int nr_start = *nr;
struct dev_pagemap *pgmap = NULL;
@@ -1969,30 +1970,33 @@ static int __gup_device_huge(unsigned long pfn, 
unsigned long addr,
 }
 
 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
unsigned long fault_pfn;
int nr_start = *nr;
 
fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
return 0;
 
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
undo_dev_pagemap(nr, nr_start, pages);
return 0;
}
+
return 1;
 }
 
 static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
unsigned long fault_pfn;
int nr_start = *nr;
 
fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
+   if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags))
return 0;
 
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
@@ -2003,14 +2007,16 @@ static int __gup_device_huge_pud(pud_t orig, pud_t 
*pudp, unsigned long addr,
 }
 #else
 static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
BUILD_BUG();
return 0;
 }
 
 static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
-   unsigned long end, struct page **pages, int *nr)
+   unsigned long end, struct page **pages, int *nr,
+   unsigned int flags)
 {
BUILD_BUG();
return 0;
@@ -2029,7 +2035,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned 
long addr,
if (pmd_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
-   return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
+   return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr,
+flags);
}
 
refs = 0;
@@ -2072,7 +2079,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned 
long addr,
if (pud_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
-   return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
+   return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr,
+flags);
}
 
refs = 0;
-- 
2.20.1



[PATCH RFC 01/10] fs/locks: Add trace_leases_conflict

2019-06-05 Thread ira . weiny
From: Ira Weiny 

Signed-off-by: Ira Weiny 
---
 fs/locks.c  | 20 ++-
 include/trace/events/filelock.h | 35 +
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index ec1e4a5df629..0cc2b9f30e22 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1534,11 +1534,21 @@ static void time_out_leases(struct inode *inode, struct 
list_head *dispose)
 
 static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
 {
-   if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT))
-   return false;
-   if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE))
-   return false;
-   return locks_conflict(breaker, lease);
+   bool rc;
+
+   if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
+   rc = false;
+   goto trace;
+   }
+   if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
+   rc = false;
+   goto trace;
+   }
+
+   rc = locks_conflict(breaker, lease);
+trace:
+   trace_leases_conflict(rc, lease, breaker);
+   return rc;
 }
 
 static bool
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index fad7befa612d..4b735923f2ff 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -203,6 +203,41 @@ TRACE_EVENT(generic_add_lease,
show_fl_type(__entry->fl_type))
 );
 
+TRACE_EVENT(leases_conflict,
+   TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock 
*breaker),
+
+   TP_ARGS(conflict, lease, breaker),
+
+   TP_STRUCT__entry(
+   __field(void *, lease)
+   __field(void *, breaker)
+   __field(unsigned int, l_fl_flags)
+   __field(unsigned int, b_fl_flags)
+   __field(unsigned char, l_fl_type)
+   __field(unsigned char, b_fl_type)
+   __field(bool, conflict)
+   ),
+
+   TP_fast_assign(
+   __entry->lease = lease;
+   __entry->l_fl_flags = lease->fl_flags;
+   __entry->l_fl_type = lease->fl_type;
+   __entry->breaker = breaker;
+   __entry->b_fl_flags = breaker->fl_flags;
+   __entry->b_fl_type = breaker->fl_type;
+   __entry->conflict = conflict;
+   ),
+
+   TP_printk("conflict %d: lease=0x%p fl_flags=%s fl_type=%s; breaker=0x%p 
fl_flags=%s fl_type=%s",
+   __entry->conflict,
+   __entry->lease,
+   show_fl_flags(__entry->l_fl_flags),
+   show_fl_type(__entry->l_fl_type),
+   __entry->breaker,
+   show_fl_flags(__entry->b_fl_flags),
+   show_fl_type(__entry->b_fl_type))
+);
+
 #endif /* _TRACE_FILELOCK_H */
 
 /* This part must be outside protection */
-- 
2.20.1



  1   2   3   4   5   6   7   8   9   10   >