date:20181227

[PATCH] media: venus: add debugfs support

2018-12-27 Thread Malathi Gottam

Enable logs in venus through debugfs to print
debug information.

Signed-off-by: Malathi Gottam 
---
 drivers/media/platform/qcom/venus/core.c   |  62 -
 drivers/media/platform/qcom/venus/core.h   |  32 +
 drivers/media/platform/qcom/venus/firmware.c   |   6 +-
 drivers/media/platform/qcom/venus/helpers.c|  51 +--
 drivers/media/platform/qcom/venus/hfi.c|  93 ++---
 drivers/media/platform/qcom/venus/hfi_cmds.c   |  33 +++--
 drivers/media/platform/qcom/venus/hfi_msgs.c   |   5 +
 drivers/media/platform/qcom/venus/vdec.c   | 185 +++--
 drivers/media/platform/qcom/venus/vdec_ctrls.c |   7 +-
 drivers/media/platform/qcom/venus/venc.c   | 139 +++
 drivers/media/platform/qcom/venus/venc_ctrls.c |   9 +-
 11 files changed, 512 insertions(+), 110 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index cb411eb..6531830 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -13,6 +13,7 @@
  *
  */
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,6 +32,50 @@
 #include "venc.h"
 #include "firmware.h"
 
+struct dentry *debugfs_root;
+int venus_debug = ERR;
+EXPORT_SYMBOL_GPL(venus_debug);
+
+static struct dentry *venus_debugfs_init_drv(void)
+{
+   bool ok = false;
+   struct dentry *dir = NULL;
+
+   dir = debugfs_create_dir("venus", NULL);
+   if (IS_ERR_OR_NULL(dir)) {
+   dir = NULL;
+   pr_err("failed to create debug dir");
+   goto failed_create_dir;
+   }
+
+#define __debugfs_create(__type, __fname, __value) ({  
\
+   struct dentry *f = debugfs_create_##__type(__fname, 0644,   \
+   dir, __value);\
+   if (IS_ERR_OR_NULL(f)) {  \
+   dprintk(ERR, "Failed creating debugfs file '%pd/%s'\n",  \
+   dir, __name); \
+   f = NULL; \
+   } \
+   f;\
+})
+
+   ok =
+   __debugfs_create(x32, "debug_level", _debug);
+
+#undef __debugfs_create
+
+   if (!ok)
+   goto failed_create_dir;
+
+   return dir;
+
+failed_create_dir:
+   if (dir)
+   debugfs_remove_recursive(debugfs_root);
+
+   return NULL;
+}
+
 static void venus_event_notify(struct venus_core *core, u32 event)
 {
struct venus_inst *inst;
@@ -137,6 +182,7 @@ static int venus_clks_enable(struct venus_core *core)
 
return 0;
 err:
+   dprintk(ERR, "Failed to enable clk:%d\n", i);
while (i--)
clk_disable_unprepare(core->clks[i]);
 
@@ -236,6 +282,8 @@ static int venus_probe(struct platform_device *pdev)
struct resource *r;
int ret;
 
+   debugfs_root = venus_debugfs_init_drv();
+
core = devm_kzalloc(dev, sizeof(*core), GFP_KERNEL);
if (!core)
return -ENOMEM;
@@ -245,8 +293,10 @@ static int venus_probe(struct platform_device *pdev)
 
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
core->base = devm_ioremap_resource(dev, r);
-   if (IS_ERR(core->base))
+   if (IS_ERR(core->base)) {
+   dprintk(ERR, "Failed to ioremap platform resources");
return PTR_ERR(core->base);
+   }
 
core->irq = platform_get_irq(pdev, 0);
if (core->irq < 0)
@@ -297,8 +347,10 @@ static int venus_probe(struct platform_device *pdev)
goto err_runtime_disable;
 
ret = venus_firmware_init(core);
-   if (ret)
+   if (ret) {
+   dprintk(ERR, "Failed to init video firmware\n");
goto err_runtime_disable;
+   }
 
ret = venus_boot(core);
if (ret)
@@ -321,8 +373,10 @@ static int venus_probe(struct platform_device *pdev)
goto err_venus_shutdown;
 
ret = v4l2_device_register(dev, >v4l2_dev);
-   if (ret)
+   if (ret) {
+   dprintk(ERR, "Failed to register v4l2 device\n");
goto err_core_deinit;
+   }
 
ret = pm_runtime_put_sync(dev);
if (ret)
@@ -366,6 +420,8 @@ static int venus_remove(struct platform_device *pdev)
 
v4l2_device_unregister(>v4l2_dev);
 
+   debugfs_remove_recursive(debugfs_root);
+
return ret;
 }
 
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 6382cea..c31d9e0 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -25,6 +25,38 @@
 
 #define VIDC_CLKS_NUM_MAX  4
 
+extern int venus_debug;

RE: [PATCH][next] KVM: x86: Fix bit shifting in update_intel_pt_cfg

2018-12-27 Thread Kang, Luwei




> -Original Message-
> From: Gustavo A. R. Silva [mailto:gust...@embeddedor.com]
> Sent: Thursday, December 27, 2018 4:41 AM
> To: Kang, Luwei ; Paolo Bonzini ; 
> Radim Krčmář ; Thomas Gleixner
> ; Ingo Molnar ; Borislav Petkov 
> ; H. Peter Anvin ;
> x...@kernel.org
> Cc: k...@vger.kernel.org; linux-kernel@vger.kernel.org; Gustavo A. R. Silva 
> 
> Subject: [PATCH][next] KVM: x86: Fix bit shifting in update_intel_pt_cfg
> 
> ctl_bitmask in pt_desc is of type u64. When an integer like 0xf is being left 
> shifted more than 32 bits, the behavior is undefined.
> 
> Fix this by adding suffix ULL to integer 0xf.
> 
> Addresses-Coverity-ID: 1476095 ("Bad bit shift operation")
> Fixes: 6c0f0bba85a0 ("KVM: x86: Introduce a function to initialize the PT 
> configuration")
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  arch/x86/kvm/vmx/vmx.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 
> cbd55e7aeae5..251c68a74bbe 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7012,7 +7012,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
> 
>   /* unmask address range configure area */
>   for (i = 0; i < vmx->pt_desc.addr_range; i++)
> - vmx->pt_desc.ctl_bitmask &= ~(0xf << (32 + i * 4));
> + vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
>  }

Looks good to me, thanks.

Reviewed-by: Luwei Kang 

> 
>  static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> --
> 2.20.1

Re: [PATCH v1 0/2] Virtio: fix some vq allocation issues

2018-12-27 Thread Christian Borntraeger




On 28.12.2018 03:26, Wei Wang wrote:
> Some vqs don't need to be allocated when the related feature bits are
> disabled. Callers notice the vq allocation layer by setting the related
> names[i] to be NULL.
> 
> This patch series fixes the find_vqs implementations to handle this case.

So the random crashes during boot are gone.
What still does not work is actually using the balloon.

So in the qemu monitor using lets say "balloon 1000"  will hang the guest.
Seems to be a deadlock in the virtio-ccw code.  We seem to call the
config code in the interrupt handler.

crash> bt
PID: 0  TASK: d9a400CPU: 0   COMMAND: "swapper/0"
 LOWCORE INFO:
  -psw  : 0x0404c0018000 0x00116472
  -function : smp_yield_cpu at 116472
  -prefix   : 0x7fffc000
  -cpu timer: 0x7fcc8c0af5be
  -clock cmp: 0x720a4e4002831000
  -general registers:
 00 00
 0x009c 0x00fac2b0
 0x0015 0xffe2
 0x03e00010 0x0001
 00 0x0001
 0x03e8 0x0f85c020
 00 0x0001
 0x00116464 0x03e00035fad0
  -access registers:
 00 00 00 00
 00 00 00 00
 00 00 00 00
 00 00 00 00
  -control registers:
 0x008014866a10 0x00fbc007
 0x00100140 00
 0x 0x00100140
 0x3100 0x0f9281c3
 00 00
 00 00
 00 0x00fbc007
 0xdb00 0x00100280
  -floating point registers:
 00 0x02aa374b0298
 0x0001 0x0010
 0x01ae 0x000f
 0x02aa46056010 0x02aa460681c0
 0x03ffd867d590 0x03ffdca7c818
 0x03ffd867d58f 0x03fff6ffdc60
 0x03ffd867dad8 0x03ffdca7c5e8
 0x03ffd867dadc 0x03ffdca7c818

 #0 [3e00035faf8] arch_spin_lock_wait at a7bd52
 #1 [3e00035fb50] ccw_io_helper at 9130ea
 #2 [3e00035fbd0] virtio_ccw_get_config at 914a28
 #3 [3e00035fc30] virtballoon_changed at 76e776
 #4 [3e00035fc70] virtio_config_changed at 76aabc
 #5 [3e00035fca8] virtio_ccw_int_handler at 914ede
 #6 [3e00035fd18] ccw_device_irq at 8941d4
 #7 [3e00035fd48] do_cio_interrupt at 885906
 #8 [3e00035fd80] __handle_irq_event_percpu at 1b3c22
 #9 [3e00035fdf0] handle_irq_event_percpu at 1b3e1e
#10 [3e00035fe28] handle_percpu_irq at 1b87d8
#11 [3e00035fe58] generic_handle_irq at 1b2ce6
#12 [3e00035fe70] do_IRQ at 10c3b2
#13 [3e00035fea8] io_int_handler at a86b3c
 PSW:  0404c0018000 001034f6 (enabled_wait+70)
 GPRS:   7ff70200 0706c0018000 
   000c 01bf6f331c58   
      0001 
   7ff70200 00a8b2f0 001034f6 03e000317e00 
 #0 [3e000317e28] arch_cpu_idle at 103842
 #1 [3e000317e48] do_idle at 17ad18
 #2 [3e000317e80] cpu_startup_entry at 17af16
 #3 [3e000317ea8] arch_call_rest_init at eac934


> 
> Wei Wang (2):
>   virtio_pci: use queue idx instead of array idx to set up the vq
>   virtio: don't allocate vqs when names[i] = NULL
> 
>  drivers/misc/mic/vop/vop_main.c|  9 +++--
>  drivers/remoteproc/remoteproc_virtio.c |  9 +++--
>  drivers/s390/virtio/virtio_ccw.c   | 12 +---
>  drivers/virtio/virtio_mmio.c   |  9 +++--
>  drivers/virtio/virtio_pci_common.c |  8 
>  5 files changed, 34 insertions(+), 13 deletions(-)
>

[RFC PATCH V2 2/3] vhost: fine grain userspace memory accessors

2018-12-27 Thread Jason Wang

This is used to hide the metadata address from virtqueue helpers. This
will allow to implement a vmap based fast accessing to metadata.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 94 +++
 1 file changed, 77 insertions(+), 17 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f179b5ee14c4..337ce6f5a098 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -868,6 +868,34 @@ static inline void __user *__vhost_get_user(struct 
vhost_virtqueue *vq,
ret; \
 })
 
+static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
+{
+   return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
+ vhost_avail_event(vq));
+}
+
+static inline int vhost_put_used(struct vhost_virtqueue *vq,
+struct vring_used_elem *head, int idx,
+int count)
+{
+   return vhost_copy_to_user(vq, vq->used->ring + idx, head,
+ count * sizeof(*head));
+}
+
+static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
+
+{
+   return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
+ >used->flags);
+}
+
+static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
+
+{
+   return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+ >used->idx);
+}
+
 #define vhost_get_user(vq, x, ptr, type)   \
 ({ \
int ret; \
@@ -906,6 +934,43 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
mutex_unlock(>vqs[i]->mutex);
 }
 
+static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
+ __virtio16 *idx)
+{
+   return vhost_get_avail(vq, *idx, >avail->idx);
+}
+
+static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
+  __virtio16 *head, int idx)
+{
+   return vhost_get_avail(vq, *head,
+  >avail->ring[idx & (vq->num - 1)]);
+}
+
+static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
+   __virtio16 *flags)
+{
+   return vhost_get_avail(vq, *flags, >avail->flags);
+}
+
+static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
+  __virtio16 *event)
+{
+   return vhost_get_avail(vq, *event, vhost_used_event(vq));
+}
+
+static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
+__virtio16 *idx)
+{
+   return vhost_get_used(vq, *idx, >used->idx);
+}
+
+static inline int vhost_get_desc(struct vhost_virtqueue *vq,
+struct vring_desc *desc, int idx)
+{
+   return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
+}
+
 static int vhost_new_umem_range(struct vhost_umem *umem,
u64 start, u64 size, u64 end,
u64 userspace_addr, int perm)
@@ -1761,8 +1826,7 @@ EXPORT_SYMBOL_GPL(vhost_log_write);
 static int vhost_update_used_flags(struct vhost_virtqueue *vq)
 {
void __user *used;
-   if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
-  >used->flags) < 0)
+   if (vhost_put_used_flags(vq))
return -EFAULT;
if (unlikely(vq->log_used)) {
/* Make sure the flag is seen before log. */
@@ -1780,8 +1844,7 @@ static int vhost_update_used_flags(struct vhost_virtqueue 
*vq)
 
 static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 
avail_event)
 {
-   if (vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
-  vhost_avail_event(vq)))
+   if (vhost_put_avail_event(vq))
return -EFAULT;
if (unlikely(vq->log_used)) {
void __user *used;
@@ -1818,7 +1881,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
r = -EFAULT;
goto err;
}
-   r = vhost_get_used(vq, last_used_idx, >used->idx);
+   r = vhost_get_used_idx(vq, _used_idx);
if (r) {
vq_err(vq, "Can't access used idx at %p\n",
   >used->idx);
@@ -2017,7 +2080,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
last_avail_idx = vq->last_avail_idx;
 
if (vq->avail_idx == vq->last_avail_idx) {
-   if (unlikely(vhost_get_avail(vq, avail_idx, >avail->idx))) {
+   if (unlikely(vhost_get_avail_idx(vq, _idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
>avail->idx);
return -EFAULT;
@@ -2044,8 +2107,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
/* Grab the next descriptor number they're advertising, and increment
 * the index we've seen. */
-   if

[RFC PATCH V2 3/3] vhost: access vq metadata through kernel virtual address

2018-12-27 Thread Jason Wang

It was noticed that the copy_user() friends that was used to access
virtqueue metdata tends to be very expensive for dataplane
implementation like vhost since it involves lots of software checks,
speculation barrier, hardware feature toggling (e.g SMAP). The
extra cost will be more obvious when transferring small packets since
the time spent on metadata accessing become significant..

This patch tries to eliminate those overhead by accessing them through
kernel virtual address by vmap(). To make the pages can be migrated,
instead of pinning them through GUP, we use mmu notifiers to
invalidate vmaps and re-establish vmaps during each round of metadata
prefetching in necessary. For devices that doesn't use metadata
prefetch, the memory acessors fallback to normal copy_user()
implementation gracefully.

Note that this was only done when device IOTLB is not enabled. We
could use similar method to optimize it in the future.

Tests shows about ~24% improvement on TX PPS when using virtio-user +
vhost_net + xdp1 on TAP:

Before: ~5.0Mpps
After:  ~6.1Mpps

Signed-off-by: Jason Wang 
---
 drivers/vhost/net.c   |   4 +-
 drivers/vhost/vhost.c | 259 +-
 drivers/vhost/vhost.h |  15 ++-
 3 files changed, 271 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 36f3d0f49e60..0b4b3deab5aa 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -971,7 +971,7 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;
 
-   if (!vq_iotlb_prefetch(vq))
+   if (!vq_meta_prefetch(vq))
goto out;
 
vhost_disable_notify(>dev, vq);
@@ -1140,7 +1140,7 @@ static void handle_rx(struct vhost_net *net)
if (!sock)
goto out;
 
-   if (!vq_iotlb_prefetch(vq))
+   if (!vq_meta_prefetch(vq))
goto out;
 
vhost_disable_notify(>dev, vq);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 337ce6f5a098..46a889b61a4d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -440,6 +440,9 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->indirect = NULL;
vq->heads = NULL;
vq->dev = dev;
+   memset(>avail_ring, 0, sizeof(vq->avail_ring));
+   memset(>used_ring, 0, sizeof(vq->used_ring));
+   memset(>desc_ring, 0, sizeof(vq->desc_ring));
mutex_init(>mutex);
vhost_vq_reset(dev, vq);
if (vq->handle_kick)
@@ -489,6 +492,61 @@ bool vhost_dev_has_owner(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
 
+static int vhost_invalidate_vmap(struct vhost_virtqueue *vq,
+struct vhost_vmap *map,
+unsigned long uaddr,
+unsigned long start,
+unsigned long end,
+bool blockable)
+{
+   if (start < uaddr && end >= uaddr) {
+   if (!blockable)
+   return -EAGAIN;
+   mutex_lock(>mutex);
+   if (map->addr)
+   vunmap(map->unmap_addr);
+   map->addr = NULL;
+   map->unmap_addr = NULL;
+   mutex_unlock(>mutex);
+   }
+
+   return 0;
+}
+
+static int vhost_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+struct mm_struct *mm,
+unsigned long start,
+unsigned long end,
+bool blockable)
+{
+   struct vhost_dev *dev = container_of(mn, struct vhost_dev,
+mmu_notifier);
+   int i;
+
+   for (i = 0; i < dev->nvqs; i++) {
+   struct vhost_virtqueue *vq = dev->vqs[i];
+
+   if (vhost_invalidate_vmap(vq, >avail_ring,
+ (unsigned long)vq->avail,
+ start, end, blockable))
+   return -EAGAIN;
+   if (vhost_invalidate_vmap(vq, >desc_ring,
+ (unsigned long)vq->desc,
+ start, end, blockable))
+   return -EAGAIN;
+   if (vhost_invalidate_vmap(vq, >used_ring,
+ (unsigned long)vq->used,
+ start, end, blockable))
+   return -EAGAIN;
+   }
+
+   return 0;
+}
+
+static const struct mmu_notifier_ops vhost_mmu_notifier_ops = {
+   .invalidate_range_start = vhost_mmu_notifier_invalidate_range_start,
+};
+
 /* Caller should have device mutex */
 long vhost_dev_set_owner(struct vhost_dev *dev)
 {
@@ -520,7 +578,14 @@ long

[RFC PATCH V2 1/3] vhost: generalize adding used elem

2018-12-27 Thread Jason Wang

Use one generic vhost_copy_to_user() instead of two dedicated
accessor. This will simplify the conversion to fine grain
accessors. About 2% improvement of PPS were seen during vitio-user
txonly test.

Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 55e5aa662ad5..f179b5ee14c4 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2174,16 +2174,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 
start = vq->last_used_idx & (vq->num - 1);
used = vq->used->ring + start;
-   if (count == 1) {
-   if (vhost_put_user(vq, heads[0].id, >id)) {
-   vq_err(vq, "Failed to write used id");
-   return -EFAULT;
-   }
-   if (vhost_put_user(vq, heads[0].len, >len)) {
-   vq_err(vq, "Failed to write used len");
-   return -EFAULT;
-   }
-   } else if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) {
+   if (vhost_copy_to_user(vq, used, heads, count * sizeof *used)) {
vq_err(vq, "Failed to write used");
return -EFAULT;
}
-- 
2.17.1

[RFC PATCH V2 0/3] vhost: accelerate metadata access through vmap()

2018-12-27 Thread Jason Wang

Hi:

This series tries to access virtqueue metadata through kernel virtual
address instead of copy_user() friends since they had too much
overheads like checks, spec barriers or even hardware feature
toggling.

Test shows about 24% improvement on TX PPS. It should benefit other
cases as well.

Changes from V1:
- instead of pinning pages, use MMU notifier to invalidate vmaps and
  remap duing metadata prefetch
- fix build warning on MIPS

Please review

Jason Wang (3):
  vhost: generalize adding used elem
  vhost: fine grain userspace memory accessors
  vhost: access vq metadata through kernel virtual address

 drivers/vhost/net.c   |   4 +-
 drivers/vhost/vhost.c | 362 ++
 drivers/vhost/vhost.h |  15 +-
 3 files changed, 348 insertions(+), 33 deletions(-)

-- 
2.17.1

Re: [PATCH v6 3/6] Bluetooth: hci_qca: Fix frame reassembly errors for wcn3990

2018-12-27 Thread Balakrishna Godavarthi


Hi Matthias,

On 2018-12-28 01:55, Matthias Kaehlcke wrote:

On Thu, Dec 27, 2018 at 01:01:33PM +0530, Balakrishna Godavarthi wrote:

During initalization of wcn3990, we observed UART is reading some
stray bytes on the Rx line. This is logging Frame reassembly errors
on the serial console. This could be because of tristate of Tx line
of wcn3990 during boot up.


My testing suggests that this change is not needed if the Rx line of
the SoC/AP is configured with a pull-up. We'd probably all prefer not
to have this change if there's a neater way to address the garbage
data. Could you test with adding the pull-up and dropping this patch
on your side?

Thanks

Matthias


Thanks a lot it worked to me. will drop this patch.

--
Regards
Balakrishna.

Re: d_off field in struct dirent and 32-on-64 emulation

2018-12-27 Thread Florian Weimer

* Dmitry V. Levin:

> On Thu, Dec 27, 2018 at 06:18:19PM +0100, Florian Weimer wrote:
>> We have a bit of an interesting problem with respect to the d_off
>> field in struct dirent.
>> 
>> When running a 64-bit kernel on certain file systems, notably ext4,
>> this field uses the full 63 bits even for small directories (strace -v
>> output, wrapped here for readability):
>> 
>> getdents(3, [
>>   {d_ino=1494304, d_off=3901177228673045825, d_reclen=40,
>> d_name="authorized_keys", d_type=DT_REG},
>>   {d_ino=1494277, d_off=7491915799041650922, d_reclen=24,
>> d_name=".", d_type=DT_DIR},
>>   {d_ino=1314655, d_off=9223372036854775807, d_reclen=24,
>> d_name="..", d_type=DT_DIR}
>> ], 32768) = 88
>> 
>> When running in 32-bit compat mode, this value is somehow truncated to
>> 31 bits, for both the getdents and the getdents64 (!) system call (at
>> least on i386).
>
> Why getdents64 system call is affected by this truncation,
> isn't it a kernel bug that has to be fixed in the kernel instead?

It's required because POSIX specifies that telldir and seekdir use
long int (and not off_t) as the seek offset.  If the kernel does not
truncate while keeping a useful value, these functions would turn
unusable.

Re: [PATCH 2/2] lib/genalloc.c: export symbol addr_in_gen_pool

2018-12-27 Thread Huang Shijie

On Thu, Dec 27, 2018 at 09:49:29PM -0800, Andrew Morton wrote:
> On Mon, 24 Dec 2018 15:06:22 +0800 Huang Shijie  wrote:
> 
> > We may use the addr_in_gen_pool() in the driver module.
> > So export the addr_in_gen_pool for the compiling.
> > 
> > ...
> >
> > --- a/lib/genalloc.c
> > +++ b/lib/genalloc.c
> > @@ -450,6 +450,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned 
> > long start,
> > rcu_read_unlock();
> > return found;
> >  }
> > +EXPORT_SYMBOL(addr_in_gen_pool);
> >  
> >  /**
> >   * gen_pool_avail - get available free space of the pool
> 
> OK, but...  The name is poor.
> 
> q:/usr/src/25> grep EXPORT_SYMBOL lib/genalloc.c
> EXPORT_SYMBOL(gen_pool_create);
> EXPORT_SYMBOL(gen_pool_add_virt);
> EXPORT_SYMBOL(gen_pool_virt_to_phys);
> EXPORT_SYMBOL(gen_pool_destroy);
> EXPORT_SYMBOL(gen_pool_alloc);
> EXPORT_SYMBOL(gen_pool_alloc_algo);
> EXPORT_SYMBOL(gen_pool_dma_alloc);
> EXPORT_SYMBOL(gen_pool_free);
> EXPORT_SYMBOL(gen_pool_for_each_chunk);
> EXPORT_SYMBOL_GPL(gen_pool_avail);
> EXPORT_SYMBOL_GPL(gen_pool_size);
> EXPORT_SYMBOL(gen_pool_set_algo);
> EXPORT_SYMBOL(gen_pool_first_fit);
> EXPORT_SYMBOL(gen_pool_first_fit_align);
> EXPORT_SYMBOL(gen_pool_fixed_alloc);
> EXPORT_SYMBOL(gen_pool_first_fit_order_align);
> EXPORT_SYMBOL(gen_pool_best_fit);
> EXPORT_SYMBOL_GPL(gen_pool_get);
> EXPORT_SYMBOL(devm_gen_pool_create);
> EXPORT_SYMBOL_GPL(of_gen_pool_get);
> 
> See?  Almost everything is called gen_pool_foo.  Which is correct as
> per kernel conventions.  We should globally rename this to
> gen_pool_has_addr or similar.
okay, I will do it right now..

Thanks
Huang Shijie

[PATCH] arm64/lib: add accelerated do_csum for arm64

2018-12-27 Thread huhai

do_csum() in lib/checksum.c is too slow in ARM64,
and we can use assembly and algorithm to accelerate it.

Signed-off-by: huhai 
---
 arch/arm64/include/asm/checksum.h |   3 +
 arch/arm64/lib/Makefile   |   2 +-
 arch/arm64/lib/checksum.c | 144 ++
 3 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/lib/checksum.c

diff --git a/arch/arm64/include/asm/checksum.h 
b/arch/arm64/include/asm/checksum.h
index 0b6f5a7d4027..0d7b80fb300e 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -26,6 +26,9 @@ static inline __sum16 csum_fold(__wsum csum)
 }
 #define csum_fold csum_fold
 
+#define do_csum do_csum
+unsigned int do_csum(const unsigned char *buff, unsigned int len);
+
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
__uint128_t tmp;
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 69ff9887f724..4134730a121b 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-lib-y  := clear_user.o delay.o copy_from_user.o\
+lib-y  := checksum.o clear_user.o delay.o copy_from_user.o \
   copy_to_user.o copy_in_user.o copy_page.o\
   clear_page.o memchr.o memcpy.o memmove.o memset.o\
   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o   \
diff --git a/arch/arm64/lib/checksum.c b/arch/arm64/lib/checksum.c
new file mode 100644
index ..6931ef13ef87
--- /dev/null
+++ b/arch/arm64/lib/checksum.c
@@ -0,0 +1,144 @@
+/*
+ * arch/arm64/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed.
+ *
+ * Acknowledgements:
+ * This file is based on arch/x86/lib/csum-partial_64.c and
+ * arch/alpha/lib/checksum.c, which was written by Thomas Gleixner
+ * and Rick Gorton respectively.
+ */
+
+#include 
+#include 
+#include 
+
+static inline unsigned short from64to16(unsigned long x)
+{
+   /* Using extract instructions is a bit more efficient
+* than the original shift/bitmask version.
+*/
+
+   union {
+   unsigned long   ul;
+   unsigned intui[2];
+   unsigned short  us[4];
+   } in_v, tmp_v, out_v;
+
+   in_v.ul = x;
+   tmp_v.ul = (unsigned long) in_v.ui[0] + (unsigned long) in_v.ui[1];
+
+   /* Since the bits of tmp_v.sh[3] are going to always be zero,
+* we don't have to bother to add that in.
+*/
+   out_v.ul = (unsigned long) tmp_v.us[0] + (unsigned long) tmp_v.us[1]
+   + (unsigned long) tmp_v.us[2];
+
+   /* Similarly, out_v.us[2] is always zero for the final add.  */
+   return out_v.us[0] + out_v.us[1];
+}
+
+/*
+ * Do a 64-bit checksum on an arbitrary memory area.
+ * Returns a 16bit checksum.
+ */
+unsigned int do_csum(const unsigned char *buff, unsigned len)
+{
+   unsigned odd, count;
+   unsigned long result = 0;
+
+   if (unlikely(len == 0))
+   return result;
+   odd = 1 & (unsigned long) buff;
+   if (odd) {
+   result = *buff << 8;
+   len--;
+   buff++;
+   }
+   count = len >> 1;   /* nr of 16-bit words.. */
+   if (count) {
+   if (2 & (unsigned long) buff) {
+   result += *(unsigned short *)buff;
+   count--;
+   len -= 2;
+   buff += 2;
+   }
+   count >>= 1;/* nr of 32-bit words.. */
+   if (count) {
+   unsigned long zero;
+   unsigned long tmp1;
+   unsigned count64;
+
+   if (4 & (unsigned long) buff) {
+   result += *(unsigned int *) buff;
+   count--;
+   len -= 4;
+   buff += 4;
+   }
+   count >>= 1;/* nr of 64-bit words.. */
+
+   /* main loop using 64byte blocks */
+   zero = 0;
+   count64 = count >> 3;
+   while (count64) {
+   __asm__ __volatile__(
+   "ldr %x3, [%x1, #0]\n"
+   "adds %x0, %x0, %x3\n"
+   "ldr %x3, [%x1, #8]\n"
+   "adcs %x0, %x0, %x3\n"
+   "ldr %x3, [%x1, #16]\n"
+   "adcs %x0, %x0, %x3\n"
+   "ldr %x3, [%x1, #24]\n"
+   "adcs %x0, %x0, %x3\n"
+   "ldr

[PATCH -next] HSI: omap_ssi_port: fix debugfs_simple_attr.cocci warnings

2018-12-27 Thread YueHaibing

Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE
for debugfs files.

Semantic patch information:
Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
imposes some significant overhead as compared to
DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().

Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci

Signed-off-by: YueHaibing 
---
 drivers/hsi/controllers/omap_ssi_port.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hsi/controllers/omap_ssi_port.c 
b/drivers/hsi/controllers/omap_ssi_port.c
index b2b3989c..afbf134 100644
--- a/drivers/hsi/controllers/omap_ssi_port.c
+++ b/drivers/hsi/controllers/omap_ssi_port.c
@@ -162,7 +162,7 @@ static int ssi_div_set(void *data, u64 val)
return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(ssi_sst_div_fops, ssi_div_get, ssi_div_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(ssi_sst_div_fops, ssi_div_get, ssi_div_set, "%llu\n");
 
 static int ssi_debug_add_port(struct omap_ssi_port *omap_port,
 struct dentry *dir)
@@ -177,8 +177,8 @@ static int ssi_debug_add_port(struct omap_ssi_port 
*omap_port,
dir = debugfs_create_dir("sst", dir);
if (!dir)
return -ENOMEM;
-   debugfs_create_file("divisor", S_IRUGO | S_IWUSR, dir, port,
-   _sst_div_fops);
+   debugfs_create_file_unsafe("divisor", 0644, dir, port,
+  _sst_div_fops);
 
return 0;
 }

[PATCH -next] x86/mce: Fix debugfs_simple_attr.cocci warnings

2018-12-27 Thread YueHaibing

Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE
for debugfs files.

Semantic patch information:
Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
imposes some significant overhead as compared to
DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().

Generated by: scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci

Signed-off-by: YueHaibing 
---
 arch/x86/kernel/cpu/mce/core.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 672c722..d5f067a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2454,8 +2454,8 @@ static int fake_panic_set(void *data, u64 val)
return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
-   fake_panic_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
+"%llu\n");
 
 static int __init mcheck_debugfs_init(void)
 {
@@ -2464,8 +2464,8 @@ static int __init mcheck_debugfs_init(void)
dmce = mce_get_debugfs_dir();
if (!dmce)
return -ENOMEM;
-   ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
- _panic_fops);
+   ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
+NULL, _panic_fops);
if (!ffake_panic)
return -ENOMEM;

Re: [PATCH] drm/intel_dsi_vbt: Remove duplicate header

2018-12-27 Thread Jani Nikula

On Tue, 25 Dec 2018, Brajeswar Ghosh  wrote:
> Remove video/mipi_display.h which is included more than once
>
> Signed-off-by: Brajeswar Ghosh 

Pushed to drm-intel-next-queued, thanks for the patch.

BR,
Jani.

> ---
>  drivers/gpu/drm/i915/intel_dsi_vbt.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c 
> b/drivers/gpu/drm/i915/intel_dsi_vbt.c
> index ac83d6b89ae0..40a5efa33c3d 100644
> --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
> +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
> @@ -32,7 +32,6 @@
>  #include 
>  #include 
>  #include 
> -#include 
>  #include "i915_drv.h"
>  #include "intel_drv.h"
>  #include "intel_dsi.h"

-- 
Jani Nikula, Intel Open Source Graphics Center

Re: [PATCH v6 1/6] Bluetooth: hci_qca: use wait_until_sent() for power pulses

2018-12-27 Thread Balakrishna Godavarthi


HI Matthias,

On 2018-12-28 01:48, Matthias Kaehlcke wrote:

On Thu, Dec 27, 2018 at 01:01:31PM +0530, Balakrishna Godavarthi wrote:

wcn3990 requires a power pulse to turn ON/OFF along with
regulators. Sometimes we are observing the power pulses are sent
out with some time delay, due to queuing these commands. This is
causing synchronization issues with chip, which intern delay the
chip setup or may end up with communication issues.

Signed-off-by: Balakrishna Godavarthi 
---
Changes in v6:
 * added serdev_device_write_flush() in qca_send_power_pulse
   instead during the power off pulse.

Changes in v5:
 * added serdev_device_write_flush() in qca_power_off().

---
 drivers/bluetooth/hci_qca.c | 38 
++---

 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index f036c8f98ea3..507a2355c758 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1013,11 +1013,9 @@ static inline void host_set_baudrate(struct 
hci_uart *hu, unsigned int speed)

hci_uart_set_baudrate(hu, speed);
 }

-static int qca_send_power_pulse(struct hci_dev *hdev, u8 cmd)
+static int qca_send_power_pulse(struct hci_uart *hu, u8 cmd)
 {
-   struct hci_uart *hu = hci_get_drvdata(hdev);
-   struct qca_data *qca = hu->priv;
-   struct sk_buff *skb;
+   int ret;

/* These power pulses are single byte command which are sent
 * at required baudrate to wcn3990. On wcn3990, we have an external
@@ -1029,19 +1027,17 @@ static int qca_send_power_pulse(struct hci_dev 
*hdev, u8 cmd)

 * save power. Disabling hardware flow control is mandatory while
 * sending power pulses to SoC.
 */
-   bt_dev_dbg(hdev, "sending power pulse %02x to SoC", cmd);
-
-   skb = bt_skb_alloc(sizeof(cmd), GFP_KERNEL);
-   if (!skb)
-   return -ENOMEM;
-
+   serdev_device_write_flush(hu->serdev);
+   bt_dev_dbg(hu->hdev, "sending power pulse %02x to SoC", cmd);


nit: why clutter the code flow by putting the log statement in the
middle of code that is actually doing something with the serial
interface?

In case you respin anyway I suggest to structure it like this:

  bt_dev_dbg(hu->hdev, "sending power pulse %02x to SoC", cmd);

  hci_uart_set_flow_control(hu, true);
  serdev_device_write_flush(hu->serdev);
  ret = serdev_device_write_buf(hu->serdev, , sizeof(cmd));



hci_uart_set_flow_control(hu, true);
+   ret = serdev_device_write_buf(hu->serdev, , sizeof(cmd));
+   if (ret < 0) {
+   bt_dev_err(hu->hdev, "failed to send power pulse %02x to SoC",


nit: especially on 'embedded' devices 'SoC' is typically associated
with the CPU running Linux, you might want to change it to
'controller'.



[Bala]: will update.


+  cmd);
+   return ret;
+   }

-   skb_put_u8(skb, cmd);
-   hci_skb_pkt_type(skb) = HCI_COMMAND_PKT;
-
-   skb_queue_tail(>txq, skb);
-   hci_uart_tx_wakeup(hu);
+   serdev_device_wait_until_sent(hu->serdev, 0);

/* Wait for 100 uS for SoC to settle down */
usleep_range(100, 200);


I said earlier the delay here should be enough to ensure that the byte
gets transferred from a hardware buffer/FIFO to the controller,
however that didn't take into account that the power pulses are sent
with a baudrate of 2400. That translates to ~240 bytes/s, hence a
delay of 5 ms is needed to be on the safe side.



[Bala]: sure will update the delay of 5 ms.


In case you change the delay please also update the comment to make
clear this is not only time for the BT controller to settle, but also
to guarantee that the command was actually sent to the controller.



[Bala]: will update the comment.


So far it seems no problems have been observed, though this could be
thanks to the 100 ms delay in qca_wcn3990_init().

Cheers

Matthias


--
Regards
Balakrishna.

Re: [PATCH v3 7/9] drm/komeda: Attach komeda_dev to DRM-KMS

2018-12-27 Thread james qian wang (Arm Technology China)

On Thu, Dec 27, 2018 at 10:31:52PM +0800, Liviu Dudau wrote:
> On Thu, Dec 27, 2018 at 07:09:07AM +, james qian wang (Arm Technology 
> China) wrote:
> > On Mon, Dec 24, 2018 at 08:32:14PM +0800, Liviu Dudau wrote:
> > > On Fri, Dec 21, 2018 at 10:00:33AM +, james qian wang (Arm Technology 
> > > China) wrote:
> > > > Add komeda_kms abstracton to attach komeda_dev to DRM-KMS
> > > >   CRTC: according to the komeda_pipeline
> > > >   PLANE: according to komeda_layer (layer input pipeline)
> > > >   PRIVATE_OBJS: komeda_pipeline/component all will be treat as 
> > > > private_objs
> > > > 
> > > > komeda_kms is for connecting DRM-KMS and komeda_dev, like reporting the
> > > > kms object properties according to the komeda_dev, and pass/convert 
> > > > KMS's
> > > > requirement to komeda_dev.
> > > > 
> > > > Changes in v3:
> > > > - Fixed style problem found by checkpatch.pl --strict.
> > > > 
> > > > Changes in v2:
> > > > - Unified abbreviation of "pipeline" to "pipe".
> > > > 
> > > > Signed-off-by: James (Qian) Wang 
> > > > ---
> > > >  drivers/gpu/drm/arm/display/komeda/Makefile   |   6 +-
> > > >  .../gpu/drm/arm/display/komeda/komeda_crtc.c  | 106 +++
> > > >  .../gpu/drm/arm/display/komeda/komeda_drv.c   |  19 +-
> > > >  .../gpu/drm/arm/display/komeda/komeda_kms.c   | 169 ++
> > > >  .../gpu/drm/arm/display/komeda/komeda_kms.h   | 113 
> > > >  .../drm/arm/display/komeda/komeda_pipeline.h  |   3 +
> > > >  .../gpu/drm/arm/display/komeda/komeda_plane.c | 109 +++
> > > >  .../arm/display/komeda/komeda_private_obj.c   |  88 +
> > > >  8 files changed, 608 insertions(+), 5 deletions(-)
> > > >  create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
> > > >  create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_kms.c
> > > >  create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_kms.h
> > > >  create mode 100644 drivers/gpu/drm/arm/display/komeda/komeda_plane.c
> > > >  create mode 100644 
> > > > drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
> > > > 
> > > > diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile 
> > > > b/drivers/gpu/drm/arm/display/komeda/Makefile
> > > > index 25beae900ed2..1b875e5dc0f6 100644
> > > > --- a/drivers/gpu/drm/arm/display/komeda/Makefile
> > > > +++ b/drivers/gpu/drm/arm/display/komeda/Makefile
> > > > @@ -9,7 +9,11 @@ komeda-y := \
> > > > komeda_dev.o \
> > > > komeda_format_caps.o \
> > > > komeda_pipeline.o \
> > > > -   komeda_framebuffer.o
> > > > +   komeda_framebuffer.o \
> > > > +   komeda_kms.o \
> > > > +   komeda_crtc.o \
> > > > +   komeda_plane.o \
> > > > +   komeda_private_obj.o
> > > >  
> > > >  komeda-y += \
> > > > d71/d71_dev.o
> > > > diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c 
> > > > b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
> > > > new file mode 100644
> > > > index ..5bb5a55f6b31
> > > > --- /dev/null
> > > > +++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
> > > > @@ -0,0 +1,106 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/*
> > > > + * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
> > > > + * Author: James.Qian.Wang 
> > > > + *
> > > > + */
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include 
> > > > +#include "komeda_dev.h"
> > > > +#include "komeda_kms.h"
> > > > +
> > > > +struct drm_crtc_helper_funcs komeda_crtc_helper_funcs = {
> > > > +};
> > > > +
> > > > +static const struct drm_crtc_funcs komeda_crtc_funcs = {
> > > > +};
> > > > +
> > > > +int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms,
> > > > +  struct komeda_dev *mdev)
> > > > +{
> > > > +   struct komeda_crtc *crtc;
> > > > +   struct komeda_pipeline *master;
> > > > +   char str[16];
> > > > +   int i;
> > > > +
> > > > +   kms->n_crtcs = 0;
> > > > +
> > > > +   for (i = 0; i < mdev->n_pipelines; i++) {
> > > > +   crtc = >crtcs[kms->n_crtcs];
> > > > +   master = mdev->pipelines[i];
> > > > +
> > > > +   crtc->master = master;
> > > > +   crtc->slave  = NULL;
> > > > +
> > > > +   if (crtc->slave)
> > > > +   sprintf(str, "pipe-%d", crtc->slave->id);
> > > > +   else
> > > > +   sprintf(str, "None");
> > > > +
> > > > +   DRM_INFO("crtc%d: master(pipe-%d) slave(%s) output: 
> > > > %s.\n",
> > > > +kms->n_crtcs, master->id, str,
> > > > +master->of_output_dev ?
> > > > +master->of_output_dev->full_name : "None");
> > > > +
> > > > +   kms->n_crtcs++;
> > > > +   }
> > > > +
> > > > +   return 0;
> > > > +}
> > > > +
> > > > +static struct drm_plane *
> > > > +get_crtc_primary(struct komeda_kms_dev *kms, struct

[PATCH -next] crypto: chelsio - check set_msg_len overflow in generate_b0

2018-12-27 Thread YueHaibing

set_msg_len may fails with -EOVERFLOW, It should be propagate
to upstream.

Fixes: 2debd3325e55 ("crypto: chcr - Add AEAD algos.")
Signed-off-by: YueHaibing 
---
 drivers/crypto/chelsio/chcr_algo.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c 
b/drivers/crypto/chelsio/chcr_algo.c
index bcef765..bdbdce9 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2762,7 +2762,7 @@ static int set_msg_len(u8 *block, unsigned int msglen, 
int csize)
return 0;
 }
 
-static void generate_b0(struct aead_request *req, u8 *ivptr,
+static int generate_b0(struct aead_request *req, u8 *ivptr,
unsigned short op_type)
 {
unsigned int l, lp, m;
@@ -2787,6 +2787,8 @@ static void generate_b0(struct aead_request *req, u8 
*ivptr,
rc = set_msg_len(b0 + 16 - l,
 (op_type == CHCR_DECRYPT_OP) ?
 req->cryptlen - m : req->cryptlen, l);
+
+   return rc;
 }
 
 static inline int crypto_ccm_check_iv(const u8 *iv)
@@ -2821,7 +2823,7 @@ static int ccm_format_packet(struct aead_request *req,
*((unsigned short *)(reqctx->scratch_pad + 16)) =
htons(assoclen);
 
-   generate_b0(req, ivptr, op_type);
+   rc = generate_b0(req, ivptr, op_type);
/* zero the ctr value */
memset(ivptr + 15 - ivptr[0], 0, ivptr[0] + 1);
return rc;
-- 
2.7.0

Re: Fix 80d20d35af1e ("nohz: Fix local_timer_softirq_pending()") may have revealed another problem

2018-12-27 Thread Heiner Kallweit

On 28.12.2018 07:34, Heiner Kallweit wrote:
> On 28.12.2018 02:31, Frederic Weisbecker wrote:
>> On Fri, Dec 28, 2018 at 12:11:12AM +0100, Heiner Kallweit wrote:
>>>
> [...]
>>
>> Interesting, the softirq is raised from hardirq but it's not handled in the 
>> end of
>> the IRQ. Are you running threaded IRQS by any chance? If so I would expect 
>> ksoftirqd
>> to handle the pending work before we go idle. However I can imagine a small 
>> window
>> where such an expectation may not be met: if the softirq is raised after the 
>> ksoftirqd
>> thread is parked (CPUHP_AP_SMPBOOT_THREADS), which is right before we 
>> disable the CPU
>> (CPUHP_TEARDOWN_CPU).
>>
> I have a network driver (r8169) using NAPI which runs in softirq context 
> AFAIK.
> For testing purposes I sometimes trigger system suspend via network, so there 
> is
> network adapter activity when system suspends. Apart from that nothing really
> exciting:
> CPU0   CPU1   CPU2   CPU3
>0: 43  0  0  0   IO-APIC2-edge  
> timer
>1:  4  0  0  0   IO-APIC1-edge  
> i8042
>8:  0  1  0  0   IO-APIC8-fasteoi   
> rtc0
>9:  0  0  0  0   IO-APIC9-fasteoi   
> acpi
>   12:  0  0  0  5   IO-APIC   12-edge  
> i8042
>  120:  0  0  0  0   PCI-MSI 311296-edge  
> PCIe PME
>  121:  0  0  0  0   PCI-MSI 315392-edge  
> PCIe PME
>  122:  0  0  0  0   PCI-MSI 327680-edge  
> PCIe PME
>  123:  0  0   3328  0   PCI-MSI 294912-edge  
> ahci[:00:12.0]
>  124:  0133  0  0   PCI-MSI 344064-edge  
> xhci_hcd
>  125:  0  0 32  0   PCI-MSI 245760-edge  
> mei_me
>  127:381  0  0  0   PCI-MSI 1572864-edge  
> enp3s0
>  128:  0  0  0236   PCI-MSI 32768-edge  
> i915
>  129:  0374  0  0   PCI-MSI 229376-edge  
> snd_hda_intel:card0
> 
>> I don't know if we can afford to ignore a softirq even at this late stage. 
>> We should
>> probably avoid leaking any. So here is a possible fix, if you don't mind 
>> trying:
>>
> I tested your patch and at least in the first minutes of testing couldn't 
> reproduce
> the issue any longer. I tested manual system suspend and the following script 
> you
> sent when we started to analyze the issue.
> 

Also after some more time the issue didn't occur again. So it seems your 
analysis
was right and also the approach to fix it. Thanks!
Will let you know in case the issue should pop up again under special
circumstances.


> Heiner
> 
> --
> 
> #!/bin/bash
> 
> do_hotplug()
> {
>   for i in $(seq 1 $2)
>   do
>   echo $1 > /sys/devices/system/cpu/cpu$i/online
>   done
> }
> 
> LAST_CPU=$(($(nproc)-1))
> 
> while true
> do
>   do_hotplug 0 $LAST_CPU
>   do_hotplug 1 $LAST_CPU
> done
>

[PATCH 3/3] arm64: dts: hi3660: Add hisi asp dma device

2018-12-27 Thread h00249924

From: Youlin Wang 

Signed-off-by: John Stultz 
Signed-off-by: Youlin Wang 
Signed-off-by: Tanglei Han 
Cc: Wei Xu 
Cc: Rob Herring 
Cc: Mark Rutland 
---
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index f432b0a..5223e36 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -1122,5 +1122,19 @@
};
};
};
+
+   asp_dmac: asp_dmac@E804B000 {
+   compatible = "hisilicon,hisi-pcm-asp-dma-1.0";
+   reg = <0x0 0xe804b000 0x0 0x1000>;
+   #dma-cells = <1>;
+   dma-channels = <16>;
+   dma-requests = <32>;
+   dma-min-chan = <0>;
+   dma-used-chans = <0xFFFE>;
+   dma-share;
+   interrupts = <0 216 4>;
+   interrupt-names = "asp_dma_irq";
+   status = "ok";
+   };
};
 };
-- 
1.9.1

[PATCH 1/3] k3dma: Upgrade k3dma drever to support hisi_asp_dma hardware

2018-12-27 Thread h00249924

From: Youlin Wang 

There is an new "hisi-pcm-asp-dma-1.0" device added in
"arch/arm64/boot/dts/hisilicon/hi3660.dtsi".
So we have to add a matching id in the driver file:
"{ .compatible = "hisilicon,hisi-pcm-asp-dma-1.0", }"

And also hisi-pcm-asp dma device needs no setting to the clock.
So we skip this by "if" sentence on id string matching:
"if (strcasecmp((of_id->compatible), (k3_pdma_dt_ids[0].compatible)) == 0)"

After above this driver will support both k3 and hisi_asp dma hardware.

Signed-off-by: Youlin Wang 
Signed-off-by: Tanglei Han 
Cc: Dan Williams 
Cc: Vinod Koul 
---
 drivers/dma/k3dma.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index fdec2b6..10eecc2 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -792,6 +792,7 @@ static int k3_dma_transfer_resume(struct dma_chan *chan)
 
 static const struct of_device_id k3_pdma_dt_ids[] = {
{ .compatible = "hisilicon,k3-dma-1.0", },
+   { .compatible = "hisilicon,hisi-pcm-asp-dma-1.0", },
{}
 };
 MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
@@ -835,10 +836,12 @@ static int k3_dma_probe(struct platform_device *op)
"dma-requests", >dma_requests);
}
 
-   d->clk = devm_clk_get(>dev, NULL);
-   if (IS_ERR(d->clk)) {
-   dev_err(>dev, "no dma clk\n");
-   return PTR_ERR(d->clk);
+   if (strcasecmp((of_id->compatible), (k3_pdma_dt_ids[0].compatible)) == 
0) {
+   d->clk = devm_clk_get(>dev, NULL);
+   if (IS_ERR(d->clk)) {
+   dev_err(>dev, "no dma clk\n");
+   return PTR_ERR(d->clk);
+   }
}
 
irq = platform_get_irq(op, 0);
-- 
1.9.1

[PATCH 2/3] dmaengine: Extend the k3dma driver binding

2018-12-27 Thread h00249924

From: Youlin Wang 

Extend the k3dma driver binding to support hisi-asp hardware variants.

Signed-off-by: Youlin Wang 
Signed-off-by: Tanglei Han 
Cc: Vinod Koul 
Cc: Rob Herring 
Cc: Mark Rutland 
---
 Documentation/devicetree/bindings/dma/k3dma.txt | 33 -
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/dma/k3dma.txt 
b/Documentation/devicetree/bindings/dma/k3dma.txt
index 4945aea..cd21b82 100644
--- a/Documentation/devicetree/bindings/dma/k3dma.txt
+++ b/Documentation/devicetree/bindings/dma/k3dma.txt
@@ -3,7 +3,9 @@
 See dma.txt first
 
 Required properties:
-- compatible: Should be "hisilicon,k3-dma-1.0"
+- compatible: Must be one of
+-  "hisilicon,k3-dma-1.0"
+-  "hisilicon,hisi-pcm-asp-dma-1.0"
 - reg: Should contain DMA registers location and length.
 - interrupts: Should contain one interrupt shared by all channel
 - #dma-cells: see dma.txt, should be 1, para number
@@ -43,3 +45,32 @@ For example, i2c0 read channel request line is 18, while 
write channel use 19
dma-names = "rx", "tx";
};
 
+
+
+
+Controller:
+   asp_dmac: asp_dmac@E804B000 {
+   compatible = "hisilicon,hisi-pcm-asp-dma-1.0";
+   reg = <0x0 0xe804b000 0x0 0x1000>;
+   #dma-cells = <1>;
+   dma-channels = <16>;
+   dma-requests = <32>;
+   dma-min-chan = <0>;
+   dma-used-chans = <0xFFFE>;
+   dma-share;
+   interrupts = <0 216 4>;
+   interrupt-names = "asp_dma_irq";
+   status = "ok";
+   };
+
+Client:
+   i2s2: hisi_i2s {
+   compatible = "hisilicon,hisi-i2s";
+   reg = <0x0 0xe804f800 0x0 0x400>,
+   <0x0 0xe804e000 0x0 0x400>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_pmx_func _cfg_func>;
+   dmas = <_dmac 18 _dmac 19>;
+   dma-names = "rx", "tx";
+   #sound-dai-cells = <0>;
+   };
-- 
1.9.1

Re: [PATCH 07/14] clock: milbeaut: Add Milbeaut M10V clock control

2018-12-27 Thread Sugaya, Taichi


Hi

On 2018/12/28 9:39, Stephen Boyd wrote:

Quoting Sugaya, Taichi (2018-12-25 17:35:27)

Hi

On 2018/11/30 17:31, Stephen Boyd wrote:

+   init.num_parents = parents;
+   init.parent_names = parent_names;
+
+   mcm->cname = clk_name;
+   mcm->parent = 0;
+   mcm->hw.init = 
+
+   clk = clk_register(NULL, >hw);
+   if (IS_ERR(clk))
+   goto err_clk;
+
+   of_clk_add_provider(node, of_clk_src_simple_get, clk);
+   return;
+
+err_clk:
+   kfree(mcm);
+err_mcm:
+   kfree(parent_names);
+}
+CLK_OF_DECLARE(m10v_clk_mux, "socionext,milbeaut-m10v-clk-mux",
+   m10v_clk_mux_setup);


Any chance you can use a platform driver?



Excuse me to re-ask you.
Why do you recommend to use a platform driver? Is that current fad?


Not exactly a fad. We've been doing it for some time now. From an older
email on the list:

Reasons (in no particular order):

   1. We get a dev pointer to use with clk_hw_register()

   2. We can handle probe defer if some resource is not available

   3. Using device model gets us a hook into power management frameworks
  like runtime PM and system PM for things like suspend and hibernate

   4. It encourages a single DT node clk controller style binding
  instead of a single node per clk style binding

   5. We can use non-DT specific functions like devm_ioremap_resource() to map
  registers and acquire other resources, leading to more portable and
  generic code

   6. We may be able to make the device driver a module, which will
  make distros happy if we don't have to compile in all
  these clk drivers to the resulting vmlinux



Great thanks for answering. I strongly understand.
#It takes a bit of time to send v2.

Best Regards,
Sugaya Taichi

Re: [PATCH v37 0/3] Virtio-balloon: support free page reporting

2018-12-27 Thread Wei Wang


On 12/27/2018 08:17 PM, Christian Borntraeger wrote:


On 27.12.2018 12:59, Christian Borntraeger wrote:

On 27.12.2018 12:31, Christian Borntraeger wrote:

This patch triggers random crashes in the guest kernel on s390 early during 
boot.
No migration and no setting of the balloon is involved.


Adding Conny and Halil,

As the QEMU provides no PAGE_HINT feature yet, this quick hack makes the
guest boot fine again:


diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 728ecd1eea305..aa2e1864c5736 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -492,7 +492,7 @@ static int init_vqs(struct virtio_balloon *vb)
 callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
 }
  
-   err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,

+   err = vb->vdev->config->find_vqs(vb->vdev, 3, //VIRTIO_BALLOON_VQ_MAX,
  vqs, callbacks, names, NULL, NULL);
 if (err)
 return err;


To me it looks like that virtio_ccw_find_vqs will abort if any of the virtqueues
that it is been asked for does not exist (including the earlier ones).


This "hack" makes the random crashes go away, but the balloon interface itself
does not work. (setting the value to anything will hang the guest).
As patch 1 also modifies the main path, there seem to be additional issues, 
maybe
endianess

Looking at things like

+   vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+   vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);
+   vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+ VIRTIO_BALLOON_CMD_ID_STOP);


Why is cmd_id_received not using cpu_to_virtio32?



That conversion is only needed when we need to send the value to the device.
cmd_id_received doesn't need to be sent to the device.

Best,
Wei

Re: Fix 80d20d35af1e ("nohz: Fix local_timer_softirq_pending()") may have revealed another problem

2018-12-27 Thread Heiner Kallweit

On 28.12.2018 02:31, Frederic Weisbecker wrote:
> On Fri, Dec 28, 2018 at 12:11:12AM +0100, Heiner Kallweit wrote:
>>
[...]
> 
> Interesting, the softirq is raised from hardirq but it's not handled in the 
> end of
> the IRQ. Are you running threaded IRQS by any chance? If so I would expect 
> ksoftirqd
> to handle the pending work before we go idle. However I can imagine a small 
> window
> where such an expectation may not be met: if the softirq is raised after the 
> ksoftirqd
> thread is parked (CPUHP_AP_SMPBOOT_THREADS), which is right before we disable 
> the CPU
> (CPUHP_TEARDOWN_CPU).
> 
I have a network driver (r8169) using NAPI which runs in softirq context AFAIK.
For testing purposes I sometimes trigger system suspend via network, so there is
network adapter activity when system suspends. Apart from that nothing really
exciting:
CPU0   CPU1   CPU2   CPU3
   0: 43  0  0  0   IO-APIC2-edge  timer
   1:  4  0  0  0   IO-APIC1-edge  i8042
   8:  0  1  0  0   IO-APIC8-fasteoi   rtc0
   9:  0  0  0  0   IO-APIC9-fasteoi   acpi
  12:  0  0  0  5   IO-APIC   12-edge  i8042
 120:  0  0  0  0   PCI-MSI 311296-edge  
PCIe PME
 121:  0  0  0  0   PCI-MSI 315392-edge  
PCIe PME
 122:  0  0  0  0   PCI-MSI 327680-edge  
PCIe PME
 123:  0  0   3328  0   PCI-MSI 294912-edge  
ahci[:00:12.0]
 124:  0133  0  0   PCI-MSI 344064-edge  
xhci_hcd
 125:  0  0 32  0   PCI-MSI 245760-edge  
mei_me
 127:381  0  0  0   PCI-MSI 1572864-edge  
enp3s0
 128:  0  0  0236   PCI-MSI 32768-edge  i915
 129:  0374  0  0   PCI-MSI 229376-edge  
snd_hda_intel:card0

> I don't know if we can afford to ignore a softirq even at this late stage. We 
> should
> probably avoid leaking any. So here is a possible fix, if you don't mind 
> trying:
> 
I tested your patch and at least in the first minutes of testing couldn't 
reproduce
the issue any longer. I tested manual system suspend and the following script 
you
sent when we started to analyze the issue.

Heiner

--

#!/bin/bash

do_hotplug()
{
for i in $(seq 1 $2)
do
echo $1 > /sys/devices/system/cpu/cpu$i/online
done
}

LAST_CPU=$(($(nproc)-1))

while true
do
do_hotplug 0 $LAST_CPU
do_hotplug 1 $LAST_CPU
done

[PATCH 2/3] arm64: dts: Using standard CCF interface to set vcodec clk

2018-12-27 Thread Yunfei Dong

Using standard CCF interface to set vdec/venc parent clk
and clk rate.

Signed-off-by: Yunfei Dong 
Signed-off-by: Qianqian Yan 
---
 arch/arm64/boot/dts/mediatek/mt8173.dtsi | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index abd2f15a544b..bbc282aae412 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -1295,6 +1295,15 @@
  "vencpll",
  "venc_lt_sel",
  "vdec_bus_clk_src";
+   assigned-clocks = < CLK_TOP_VENC_LT_SEL>,
+ < CLK_TOP_CCI400_SEL>,
+ < CLK_TOP_VDEC_SEL>,
+ < CLK_APMIXED_VCODECPLL>,
+ < CLK_APMIXED_VENCPLL>;
+   assigned-clock-parents = < 
CLK_TOP_VCODECPLL_370P5>,
+< CLK_TOP_UNIVPLL_D2>,
+< CLK_TOP_VCODECPLL>;
+   assigned-clock-rates = <0>, <0>, <0>, <148200>, 
<8>;
};
 
larb1: larb@1601 {
@@ -1360,6 +1369,10 @@
  "venc_sel",
  "venc_lt_sel_src",
  "venc_lt_sel";
+   assigned-clocks = < CLK_TOP_VENC_SEL>,
+ < CLK_TOP_VENC_LT_SEL>;
+   assigned-clock-parents = < CLK_TOP_VENCPLL_D2>,
+< 
CLK_TOP_UNIVPLL1_D2>;
};
 
vencltsys: clock-controller@1900 {
-- 
2.19.1

[PATCH 1/3] media: dt-bindings: media: Fix MTK document for vcodec

2018-12-27 Thread Yunfei Dong

Fix MTK binding document for MT8173 dtsi changed in order
to use standard CCF interface.
MT8173 SoC from Mediatek.

Signed-off-by: Yunfei Dong 
Signed-off-by: Qianqian Yan 
---
 .../devicetree/bindings/media/mediatek-vcodec.txt   | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt 
b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
index 2a615d84a682..b6b5dde6abd8 100644
--- a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
+++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
@@ -66,6 +66,15 @@ vcodec_dec: vcodec@1600 {
   "vencpll",
   "venc_lt_sel",
   "vdec_bus_clk_src";
+assigned-clocks = < CLK_TOP_VENC_LT_SEL>,
+  < CLK_TOP_CCI400_SEL>,
+  < CLK_TOP_VDEC_SEL>,
+  < CLK_APMIXED_VCODECPLL>,
+  < CLK_APMIXED_VENCPLL>;
+assigned-clock-parents = < CLK_TOP_VCODECPLL_370P5>,
+ < CLK_TOP_UNIVPLL_D2>,
+ < CLK_TOP_VCODECPLL>;
+assigned-clock-rates = <0>, <0>, <0>, <148200>, <8>;
   };
 
   vcodec_enc: vcodec@18002000 {
@@ -105,4 +114,8 @@ vcodec_dec: vcodec@1600 {
   "venc_sel",
   "venc_lt_sel_src",
   "venc_lt_sel";
+assigned-clocks = < CLK_TOP_VENC_SEL>,
+  < CLK_TOP_VENC_LT_SEL>;
+assigned-clock-parents = < CLK_TOP_VENCPLL_D2>,
+ < CLK_TOP_UNIVPLL1_D2>;
   };
-- 
2.19.1

[PATCH 2/2] sound: Add hisi i2s audio driver

2018-12-27 Thread h00249924

From: Youlin Wang 

Add i2s driver for hisi3660.

Origenal patch from 13dcb3aeefe431010689de314d7543db86ebf93c by Guangke Ji.

Reviewed-by: Feng Chen 
Signed-off-by: Kaihua Zhong 
Signed-off-by: Jun Chen 
Signed-off-by: Guangke Ji 
Signed-off-by: John Stultz 
Signed-off-by: Youlin Wang 
Signed-off-by: Tanglei Han 
Cc: Liam Girdwood 
Cc: Mark Brown 
Cc: Jaroslav Kysela 
Cc: Takashi Iwai 
---
 sound/soc/hisilicon/Kconfig  |   8 +-
 sound/soc/hisilicon/Makefile |   1 +
 sound/soc/hisilicon/hi3660-i2s.c | 423 +++
 sound/soc/hisilicon/hi3660-i2s.h |  95 +
 4 files changed, 526 insertions(+), 1 deletion(-)
 create mode 100644 sound/soc/hisilicon/hi3660-i2s.c
 create mode 100644 sound/soc/hisilicon/hi3660-i2s.h

diff --git a/sound/soc/hisilicon/Kconfig b/sound/soc/hisilicon/Kconfig
index 4356d5a..b023ef9 100644
--- a/sound/soc/hisilicon/Kconfig
+++ b/sound/soc/hisilicon/Kconfig
@@ -1,5 +1,11 @@
 config SND_I2S_HI6210_I2S
-   tristate "Hisilicon I2S controller"
+   tristate "Hisilicon Hi6210 I2S controller"
+   select SND_SOC_GENERIC_DMAENGINE_PCM
+   help
+ Hisilicon I2S
+
+config SND_I2S_HI3660_I2S
+   tristate "Hisilicon 960 I2S controller"
select SND_SOC_GENERIC_DMAENGINE_PCM
help
  Hisilicon I2S
diff --git a/sound/soc/hisilicon/Makefile b/sound/soc/hisilicon/Makefile
index e8095e2..6800516 100644
--- a/sound/soc/hisilicon/Makefile
+++ b/sound/soc/hisilicon/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_SND_I2S_HI6210_I2S) += hi6210-i2s.o
+obj-$(CONFIG_SND_I2S_HI3660_I2S) += hi3660-i2s.o
diff --git a/sound/soc/hisilicon/hi3660-i2s.c b/sound/soc/hisilicon/hi3660-i2s.c
new file mode 100644
index 000..d709043
--- /dev/null
+++ b/sound/soc/hisilicon/hi3660-i2s.c
@@ -0,0 +1,423 @@
+/*
+ * linux/sound/soc/hisilicon/hisi_i2s.c - I2S IP driver
+ *
+ * Copyright (c) 2001-2021, Huawei Tech. Co., Ltd.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "hi3660-i2s.h"
+
+struct hisi_i2s {
+   struct device *dev;
+   struct reset_control *rc;
+   int clocks;
+   struct regulator *regu_asp;
+   struct pinctrl *pctrl;
+   struct pinctrl_state *pin_default;
+   struct pinctrl_state *pin_idle;
+   struct clk *asp_subsys_clk;
+   struct snd_soc_dai_driver dai;
+   void __iomem *base;
+   void __iomem *base_syscon;
+   phys_addr_t base_phys;
+   struct snd_dmaengine_dai_dma_data dma_data[2];
+   spinlock_t lock;
+   int rate;
+   int format;
+   int bits;
+   int channels;
+   u32 master;
+   u32 status;
+};
+
+static void hisi_bits(struct hisi_i2s *i2s, u32 ofs, u32 reset, u32 set)
+{
+   u32 val = readl(i2s->base + ofs) & ~reset;
+
+   writel(val | set, i2s->base + ofs);
+}
+
+static void hisi_syscon_bits(struct hisi_i2s *i2s, u32 ofs, u32 reset, u32 set)
+{
+   u32 val = readl(i2s->base_syscon + ofs) & ~reset;
+
+   writel(val | set, i2s->base_syscon + ofs);
+}
+
+static int _hisi_i2s_set_fmt(struct hisi_i2s *i2s,
+  struct snd_pcm_substream *substream)
+{
+   switch (i2s->format & SND_SOC_DAIFMT_MASTER_MASK) {
+   case SND_SOC_DAIFMT_CBM_CFM:
+   i2s->master = false;
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK_SEL_REG, 0, 
HI_ASP_CFG_R_CLK_SEL_EN);
+   break;
+   case SND_SOC_DAIFMT_CBS_CFS:
+   i2s->master = true;
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK_SEL_REG, 
HI_ASP_CFG_R_CLK_SEL_EN,0);
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+int hisi_i2s_startup(struct snd_pcm_substream *substream,
+struct snd_soc_dai *cpu_dai)
+{
+   struct hisi_i2s *i2s = dev_get_drvdata(cpu_dai->dev);
+
+   /* deassert reset on sio_bt*/
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_RST_CTRLDIS_REG, 0, 
BIT(2)|BIT(6)|BIT(8)|BIT(16));
+
+   /* enable clk before frequency division */
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_GATE_EN_REG, 0, BIT(5)|BIT(6));
+
+   /* enable frequency division */
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_GATE_CLKDIV_EN_REG, 0, 
BIT(2)|BIT(5));
+
+   /* select clk */
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK_SEL_REG, HI_ASP_MASK, 
HI_ASP_CFG_R_CLK_SEL);
+
+   /* select clk_div */
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK1_DIV_REG, HI_ASP_MASK, 
HI_ASP_CFG_R_CLK1_DIV_SEL);
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK4_DIV_REG, HI_ASP_MASK, 
HI_ASP_CFG_R_CLK4_DIV_SEL);
+   hisi_syscon_bits(i2s, HI_ASP_CFG_R_CLK6_DIV_REG, HI_ASP_MASK, 
HI_ASP_CFG_R_CLK6_DIV_SEL);
+
+   /* sio config */
+   hisi_bits(i2s, HI_ASP_SIO_MODE_REG, HI_ASP_MASK, 0x0);
+   hisi_bits(i2s, HI_ASP_SIO_DATA_WIDTH_SET_REG,

[PATCH 3/3] media: mtk-vcodec: Using common interface to manage vdec/venc clock

2018-12-27 Thread Yunfei Dong

VDec: Using standard CCF interface to set parent clock and
clock rate in dtsi and using common interface to open/close
video decoder clock.
VEnc: Using standard CCF interface to set parent clock/larb
in dtsi and using common interface to open/close
video encoder clock/larb.

Signed-off-by: Yunfei Dong 
Signed-off-by: Qianqian Yan 
---
 .../platform/mtk-vcodec/mtk_vcodec_dec_pm.c   | 163 ++
 .../platform/mtk-vcodec/mtk_vcodec_drv.h  |  31 ++--
 .../platform/mtk-vcodec/mtk_vcodec_enc_pm.c   | 106 +++-
 3 files changed, 132 insertions(+), 168 deletions(-)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c 
b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
index 79ca03ac449c..7884465afcd2 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
@@ -27,11 +27,14 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev)
struct device_node *node;
struct platform_device *pdev;
struct mtk_vcodec_pm *pm;
-   int ret = 0;
+   struct mtk_vcodec_clk *dec_clk;
+   struct mtk_vcodec_clk_info *clk_info;
+   int i = 0, ret = 0;
 
pdev = mtkdev->plat_dev;
pm = >pm;
pm->mtkdev = mtkdev;
+   dec_clk = >vdec_clk;
node = of_parse_phandle(pdev->dev.of_node, "mediatek,larb", 0);
if (!node) {
mtk_v4l2_err("of_parse_phandle mediatek,larb fail!");
@@ -47,52 +50,34 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev)
pdev = mtkdev->plat_dev;
pm->dev = >dev;
 
-   pm->vcodecpll = devm_clk_get(>dev, "vcodecpll");
-   if (IS_ERR(pm->vcodecpll)) {
-   mtk_v4l2_err("devm_clk_get vcodecpll fail");
-   ret = PTR_ERR(pm->vcodecpll);
+   dec_clk->clk_num =
+   of_property_count_strings(pdev->dev.of_node, "clock-names");
+   if (dec_clk->clk_num > 0) {
+   dec_clk->clk_info = devm_kcalloc(>dev,
+   dec_clk->clk_num, sizeof(*clk_info),
+   GFP_KERNEL);
+   if (!dec_clk->clk_info)
+   return -ENOMEM;
+   } else {
+   mtk_v4l2_err("Failed to get vdec clock count");
+   return -EINVAL;
}
 
-   pm->univpll_d2 = devm_clk_get(>dev, "univpll_d2");
-   if (IS_ERR(pm->univpll_d2)) {
-   mtk_v4l2_err("devm_clk_get univpll_d2 fail");
-   ret = PTR_ERR(pm->univpll_d2);
-   }
-
-   pm->clk_cci400_sel = devm_clk_get(>dev, "clk_cci400_sel");
-   if (IS_ERR(pm->clk_cci400_sel)) {
-   mtk_v4l2_err("devm_clk_get clk_cci400_sel fail");
-   ret = PTR_ERR(pm->clk_cci400_sel);
-   }
-
-   pm->vdec_sel = devm_clk_get(>dev, "vdec_sel");
-   if (IS_ERR(pm->vdec_sel)) {
-   mtk_v4l2_err("devm_clk_get vdec_sel fail");
-   ret = PTR_ERR(pm->vdec_sel);
-   }
-
-   pm->vdecpll = devm_clk_get(>dev, "vdecpll");
-   if (IS_ERR(pm->vdecpll)) {
-   mtk_v4l2_err("devm_clk_get vdecpll fail");
-   ret = PTR_ERR(pm->vdecpll);
-   }
-
-   pm->vencpll = devm_clk_get(>dev, "vencpll");
-   if (IS_ERR(pm->vencpll)) {
-   mtk_v4l2_err("devm_clk_get vencpll fail");
-   ret = PTR_ERR(pm->vencpll);
-   }
-
-   pm->venc_lt_sel = devm_clk_get(>dev, "venc_lt_sel");
-   if (IS_ERR(pm->venc_lt_sel)) {
-   mtk_v4l2_err("devm_clk_get venc_lt_sel fail");
-   ret = PTR_ERR(pm->venc_lt_sel);
-   }
-
-   pm->vdec_bus_clk_src = devm_clk_get(>dev, "vdec_bus_clk_src");
-   if (IS_ERR(pm->vdec_bus_clk_src)) {
-   mtk_v4l2_err("devm_clk_get vdec_bus_clk_src");
-   ret = PTR_ERR(pm->vdec_bus_clk_src);
+   for (i = 0; i < dec_clk->clk_num; i++) {
+   clk_info = _clk->clk_info[i];
+   ret = of_property_read_string_index(pdev->dev.of_node,
+   "clock-names", i, _info->clk_name);
+   if (ret) {
+   mtk_v4l2_err("Failed to get clock name id = %d", i);
+   return ret;
+   }
+   clk_info->vcodec_clk = devm_clk_get(>dev,
+   clk_info->clk_name);
+   if (IS_ERR(clk_info->vcodec_clk)) {
+   mtk_v4l2_err("devm_clk_get (%d)%s fail", i,
+   clk_info->clk_name);
+   return PTR_ERR(clk_info->vcodec_clk);
+   }
}
 
pm_runtime_enable(>dev);
@@ -125,78 +110,36 @@ void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm)
 
 void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm)
 {
-   int ret;
-
-   ret = clk_set_rate(pm->vcodecpll, 1482 * 100);
-   if (ret)
-   mtk_v4l2_err("clk_set_rate vcodecpll fail %d", ret);
-
-   ret = clk_set_rate(pm->vencpll, 800 * 100);
-   if (ret)
-

[PATCH 1/2] arm64: dts: hi3660: Add i2s & sound device

2018-12-27 Thread h00249924

From: Youlin Wang 

Signed-off-by: John Stultz 
Signed-off-by: Youlin Wang 
Signed-off-by: Tanglei Han 
Cc: Wei Xu 
Cc: Rob Herring 
Cc: Mark Rutland 
---
 arch/arm64/boot/dts/hisilicon/hi3660.dtsi | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi 
b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
index 5223e36..ae535da 100644
--- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi
@@ -1136,5 +1136,33 @@
interrupt-names = "asp_dma_irq";
status = "ok";
};
+
+   i2s2: hisi_i2s {
+   compatible = "hisilicon,hisi-i2s";
+   reg = <0x0 0xe804f800 0x0 0x400>,
+ <0x0 0xe804e000 0x0 0x400>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_pmx_func _cfg_func>;
+   dmas = <_dmac 18 _dmac 19>;
+   dma-names = "rx", "tx";
+   #sound-dai-cells = <0>;
+   };
+
+   sound {
+   compatible = "simple-audio-card";
+   simple-audio-card,name = "hikey-hdmi";
+   simple-audio-card,format = "i2s";
+
+   simple-audio-card,bitclock-master = <_master>;
+   simple-audio-card,frame-master = <_master>;
+
+   sound_master: simple-audio-card,cpu {
+   sound-dai = <>;
+   };
+
+   simple-audio-card,codec {
+   sound-dai = <>;
+   };
+   };
};
 };
-- 
1.9.1

APIC timer checked before it is set up, boot fails on Connex L1430

2018-12-27 Thread Daniel Drake

Hi,

On the Connex L1430 laptop based on Intel Apollo Lake N3350, Linux
doesn't boot. It hangs early on a blank screen. Reproduced with Linus
git, 4.18 and 4.19 (there is no previous known working kernel
version). EFI earlyprintk shows:

APIC: switch to symmetric I/O mode setup
x2apic: IRQ remapping doesn't support X2APIC mode
..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
..MP-BIOS bug: 8254 timer not connected to IO-APIC
...tryign to set up timer (IRQ0) through the 8259A ...
. (found apic 0 pin 2) ...
... failed.
...trying to set up timer as Virtual Wire IRQ...
. failed.
...trying to set up timer as ExtINT IRQ...
do_IRQ: 0.55 No irq handler for vector
. failed :(.
Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with
apic=debug and send a report.

Looking closer, check_timer() is observing that the IOAPIC timer
doesn't tick, so it then tries some other approaches but doesn't
manage to get them working either.

The strange thing is, I booted with the no_timer_check parameter and
the system works fine! With this parameter it assumes the IOAPIC timer
is ticking and just continues the boot sequence anyway. Here is the
boot log with apic=debug no_timer_check:
https://gist.github.com/dsd/6f40d8ecc7102dd5dcb90c5dedc69214#file-dmesg-txt

/proc/interrupts shows that APIC Local timer interrupts are working
fine on both CPUs:
https://gist.github.com/dsd/6f40d8ecc7102dd5dcb90c5dedc69214#file-interrupts-txt

So, check_timer() is incorrectly deducing that the IOAPIC timer isn't
working. The way it checks this is to do a delay loop and then check
if jiffies has advanced. I presume the expectation here is that during
this delay, the hardware IRQ will invoke local_apic_timer_interrupt()
which will then increment jiffies. Indeed, during check_timer()
execution this interrupt does not fire, however by using
no_timer_check and adding a log message I can see that it fires for
the first time quite some time after check_timer() is done:

 ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
 clocksource: tsc-early: mask: 0x max_cycles:
0xfc66f4fc7c, max_idle_ns: 440795224246 ns
 Calibrating delay loop (skipped), value calculated using timer
frequency.. 2188.80 BogoMIPS (lpj=1094400)
 pid_max: default: 32768 minimum: 301
 LSM: Security Framework initializing
 SELinux:  Initializing.
 Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
 Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
 Mount-cache hash table entries: 4096 (order: 3, 32768 bytes)
 Mountpoint-cache hash table entries: 4096 (order: 3, 32768 bytes)
 mce: CPU supports 7 MCE banks
 mce: CPU0: Thermal monitoring enabled (TM1)
 Last level iTLB entries: 4KB 48, 2MB 0, 4MB 0
 Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0
 Spectre V2 : Spectre mitigation: kernel not compiled with retpoline;
no mitigation available!
 Freeing SMP alternatives memory: 44K
 TSC deadline timer enabled
 smpboot: CPU0: Intel(R) Celeron(R) CPU N3350 @ 1.10GHz (family: 0x6,
model: 0x5c, stepping: 0x9)
 Performance Events: PEBS fmt3+, Goldmont events, 32-deep LBR,
full-width counters, Intel PMU driver.
 ... version:4
 ... bit width:  48
 ... generic registers:  4
 ... value mask: 
 ... max period: 7fff
 ... fixed-purpose events:   3
 ... event mask: 0007000f
 rcu: Hierarchical SRCU implementation.
 smp: Bringing up secondary CPUs ...
 !!! local_apic_timer_interrupt for the first time cpu0 !!!

Experimenting further, I used the same approach of adding delays and
checking for the interrupt during the delay to figure out at which
precise point during the boot sequence the timer interrupt starts
working. It's here:

static void setup_APIC_timer(void)
{
[...]
if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
[...]
clockevents_config_and_register(levt,
tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
}
}

We reach clockevents_register_device() which does:
 1. Take a spinlock and disable IRQs
 2. lapic_set_oneshot() which leads to "TSC deadline timer enabled" message
 3. lapic_next_deadline()
 4. Spin unlock & re-enable IRQs

At the exact point where IRQs are re-enabled above, which is at the
time of return from clockevents_config_and_register(), timer
interrupts start working.


The overall ordering here seems surprising. check_timer() is probing
whether the APIC timer works well before setup_APIC_timer() has been
called. Shouldn't the timer be checked only after it has been set up?

Or is Linux assuming that the BIOS will boot with the APIC timer
already running?

Any other debugging suggestions much appreciated.

Thanks
Daniel

Re: [PATCH 2/2] lib/genalloc.c: export symbol addr_in_gen_pool

2018-12-27 Thread Andrew Morton

On Mon, 24 Dec 2018 15:06:22 +0800 Huang Shijie  wrote:

> We may use the addr_in_gen_pool() in the driver module.
> So export the addr_in_gen_pool for the compiling.
> 
> ...
>
> --- a/lib/genalloc.c
> +++ b/lib/genalloc.c
> @@ -450,6 +450,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned 
> long start,
>   rcu_read_unlock();
>   return found;
>  }
> +EXPORT_SYMBOL(addr_in_gen_pool);
>  
>  /**
>   * gen_pool_avail - get available free space of the pool

OK, but...  The name is poor.

q:/usr/src/25> grep EXPORT_SYMBOL lib/genalloc.c
EXPORT_SYMBOL(gen_pool_create);
EXPORT_SYMBOL(gen_pool_add_virt);
EXPORT_SYMBOL(gen_pool_virt_to_phys);
EXPORT_SYMBOL(gen_pool_destroy);
EXPORT_SYMBOL(gen_pool_alloc);
EXPORT_SYMBOL(gen_pool_alloc_algo);
EXPORT_SYMBOL(gen_pool_dma_alloc);
EXPORT_SYMBOL(gen_pool_free);
EXPORT_SYMBOL(gen_pool_for_each_chunk);
EXPORT_SYMBOL_GPL(gen_pool_avail);
EXPORT_SYMBOL_GPL(gen_pool_size);
EXPORT_SYMBOL(gen_pool_set_algo);
EXPORT_SYMBOL(gen_pool_first_fit);
EXPORT_SYMBOL(gen_pool_first_fit_align);
EXPORT_SYMBOL(gen_pool_fixed_alloc);
EXPORT_SYMBOL(gen_pool_first_fit_order_align);
EXPORT_SYMBOL(gen_pool_best_fit);
EXPORT_SYMBOL_GPL(gen_pool_get);
EXPORT_SYMBOL(devm_gen_pool_create);
EXPORT_SYMBOL_GPL(of_gen_pool_get);

See?  Almost everything is called gen_pool_foo.  Which is correct as
per kernel conventions.  We should globally rename this to
gen_pool_has_addr or similar.

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Sargun Dhillon

On Thu, Dec 27, 2018 at 9:02 PM Tejun Heo  wrote:
>
> On Thu, Dec 27, 2018 at 05:53:52PM -0800, Tejun Heo wrote:
> > Vincent knows that part way better than me but I think the safest way
> > would be doing the optimization removal iff tmp_alone_branch is
> > already pointing to leaf_cfs_rq_list.  IIUC, it's pointing to
> > something else only while a branch is being built and deferring
> > optimization removal by an avg update cycle isn't gonna make any
> > difference anyway.
>
> So, something like the following.  Xie, can you see whether the
> following patch resolves the problem?
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d1907506318a..88b9118b5191 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7698,7 +7698,8 @@ static void update_blocked_averages(int cpu)
>  * There can be a lot of idle CPU cgroups.  Don't let fully
>  * decayed cfs_rqs linger on the list.
>  */
> -   if (cfs_rq_is_decayed(cfs_rq))
> +   if (cfs_rq_is_decayed(cfs_rq) &&
> +   rq->tmp_alone_branch == >leaf_cfs_rq_list)
> list_del_leaf_cfs_rq(cfs_rq);
>
> /* Don't need periodic decay once load/util_avg are null */
Tested-by: Sargun Dhillon 
We've deployed this patch to our test workload. We haven't seen a crash yet.

Re: bug report: hugetlbfs: use i_mmap_rwsem for more pmd sharing, synchronization

2018-12-27 Thread Mike Kravetz

On 12/27/18 6:45 PM, Andrew Morton wrote:
> On Thu, 27 Dec 2018 11:24:31 -0800 Mike Kravetz  
> wrote:
>> It would be better to make an explicit check for mapping != null before
>> calling i_mmap_lock_write/try_to_unmap.  In this way, unrelated changes to
>> code above will not potentially lead to the possibility of mapping == null.
>>
>> I'm not sure what is the best way to handle this.  Below is an updated 
>> version
>> of the patch sent to Andrew.  I can also provide a simple patch to the patch
>> if that is easier.
>>
> 
> Below is the delta.  Please check it.  It seems to do more than the
> above implies.
> 
> Also, I have notes here that 
> 
> hugetlbfs-use-i_mmap_rwsem-for-more-pmd-sharing-synchronization.patch
> and
> hugetlbfs-use-i_mmap_rwsem-to-fix-page-fault-truncate-race.patch
> 
> have additional updates pending.  Due to emails such as
> 
> http://lkml.kernel.org/r/849f5202-2200-265f-7769-8363053e8...@oracle.com
> http://lkml.kernel.org/r/732c0b7d-5a4e-97a8-9677-30f352089...@oracle.com
> http://lkml.kernel.org/r/6b91dd42-b903-1f6c-729a-bd9f51273...@oracle.com
> 
> What's the status, please?
> 

There was a V3 of the patches which was Acked-by Kirill.   See,
http://lkml.kernel.org/r/20181224101349.jjjmk2hzwah6g64h@kshutemo-mobl1

The two V3 patches are:
http://lkml.kernel.org/r/2018123013.22193-2-mike.krav...@oracle.com
http://lkml.kernel.org/r/2018123013.22193-3-mike.krav...@oracle.com

The patch I sent in this thread was an update to the V3.  The delta you
created was based on V2.  So, the delta contains V2 -> V3 changes as well
as the changes mentioned in this thread.  My apologies for not noticing
and clarifying.

Let me know what you would like me to do to help.  I hate to send any
more patches right now as they might cause more confusion.
-- 
Mike Kravetz

Re: [PATCH] zram: idle writeback fixes and cleanup

2018-12-27 Thread Minchan Kim

Hi Sergey,

On Thu, Dec 27, 2018 at 11:26:24AM +0900, Sergey Senozhatsky wrote:
> On (12/24/18 12:35), Minchan Kim wrote:
> [..]
> > @@ -645,10 +680,13 @@ static ssize_t writeback_store(struct device *dev,
> > bvec.bv_len = PAGE_SIZE;
> > bvec.bv_offset = 0;
> >  
> > -   if (zram->stop_writeback) {
> > +   spin_lock(>wb_limit_lock);
> > +   if (zram->wb_limit_enable && !zram->bd_wb_limit) {
> > +   spin_unlock(>wb_limit_lock);
> > ret = -EIO;
> > break;
> > }
> > +   spin_unlock(>wb_limit_lock);
> [..]
> > @@ -732,11 +771,10 @@ static ssize_t writeback_store(struct device *dev,
> > zram_set_element(zram, index, blk_idx);
> > blk_idx = 0;
> > atomic64_inc(>stats.pages_stored);
> > -   if (atomic64_add_unless(>stats.bd_wb_limit,
> > -   -1 << (PAGE_SHIFT - 12), 0)) {
> > -   if (atomic64_read(>stats.bd_wb_limit) == 0)
> > -   zram->stop_writeback = true;
> > -   }
> > +   spin_lock(>wb_limit_lock);
> > +   if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
> > +   zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
> > +   spin_unlock(>wb_limit_lock);
> 
> Do we really need ->wb_limit_lock spinlock? We kinda punch it twice
> in this loop. If someone clears ->wb_limit_enable somewhere in between
> then the worst thing to happen is that we will just write extra page
> to the backing device; not a very big deal to me. Am I missing
> something?

Without the lock, bd_wb_limit store/read would be racy.

CPU A   CPU B
if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
zram->bd_wb_limit = 0
zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12) 

It makes limit feature void.

> 
>   -ss

Re: [RFC][PATCH v2 00/21] PMEM NUMA node and hotness accounting/migration

2018-12-27 Thread Fengguang Wu


On Thu, Dec 27, 2018 at 09:31:58PM +0100, Michal Hocko wrote:

On Wed 26-12-18 21:14:46, Wu Fengguang wrote:

This is an attempt to use NVDIMM/PMEM as volatile NUMA memory that's
transparent to normal applications and virtual machines.

The code is still in active development. It's provided for early design review.


So can we get a high level description of the design and expected
usecases please?


Good question.

Use cases
=

The general use case is to use PMEM as slower but cheaper "DRAM".
The suitable ones can be

- workloads care memory size more than bandwidth/latency
- workloads with a set of warm/cold pages that don't change rapidly over time
- low cost VM/containers

Foundation: create PMEM NUMA nodes
==

To create PMEM nodes in native kernel, Dave Hansen and Dan Williams
have working patches for kernel and ndctl. According to Ying, it'll
work like this

   ndctl destroy-namespace -f namespace0.0
   ndctl destroy-namespace -f namespace1.0
   ipmctl create -goal MemoryMode=100
   reboot

To create PMEM nodes in QEMU VMs, current Debian/Fedora etc. distros
already support this

qemu-system-x86_64
-machine pc,nvdimm
   -enable-kvm
   -smp 64
   -m 256G
   # DRAM node 0
   -object 
memory-backend-file,size=128G,share=on,mem-path=/dev/shm/qemu_node0,id=tmpfs-node0
-numa node,cpus=0-31,nodeid=0,memdev=tmpfs-node0
   # PMEM node 1
   -object 
memory-backend-file,size=128G,share=on,mem-path=/dev/dax1.0,align=128M,id=dax-node1
   -numa node,cpus=32-63,nodeid=1,memdev=dax-node1

Optimization: do hot/cold page tracking and migration
=

Since PMEM is slower than DRAM, we need to make sure hot pages go to
DRAM and cold pages stay in PMEM, to get the best out of PMEM and DRAM.

- DRAM=>PMEM cold page migration

It can be done in kernel page reclaim path, near the anonymous page
swap out point. Instead of swapping out, we now have the option to
migrate cold pages to PMEM NUMA nodes.

User space may also do it, however cannot act on-demand, when there
are memory pressure in DRAM nodes.

- PMEM=>DRAM hot page migration

While LRU can be good enough for identifying cold pages, frequency
based accounting can be more suitable for identifying hot pages.

Our design choice is to create a flexible user space daemon to drive
the accounting and migration, with necessary kernel supports by this
patchset.

Linux kernel already offers move_pages(2) for user space to migrate
pages to specified NUMA nodes. The major gap lies in hotness accounting.

User space driven hotness accounting


One way to find out hot/cold pages is to scan page table multiple
times and collect the "accessed" bits.

We created the kvm-ept-idle kernel module to provide the "accessed"
bits via interface /proc/PID/idle_pages. User space can open it and
read the "accessed" bits for a range of virtual address.

Inside kernel module, it implements 2 independent set of page table
scan code, seamlessly providing the same interface:

- for QEMU, scan HVA range of the VM's EPT(Extended Page Table)
- for others, scan VA range of the process page table 


With /proc/PID/idle_pages and move_pages(2), the user space daemon
can work like this

One round of scan+migration:

   loop N=(3-10) times:
   sleep 0.01-10s (typical values)
   scan page tables and read/accumulate accessed bits into arrays
   treat pages with accessed_count == N as hot  pages
   treat pages with accessed_count == 0 as cold pages
   migrate hot  pages to DRAM nodes
   migrate cold pages to PMEM nodes (optional, may do it once on multi scan 
rounds, to make sure they are really cold)

That just describes the bare minimal working model. A real world
daemon should consider lots more to be useful and robust. The notable
one is to avoid thrashing.

Hotness accounting can be rough and workload can be unstable. We need
to avoid promoting a warm page to DRAM and then demoting it soon.

The basic scheme is to auto control scan interval and count, so that
each round of scan will get hot pages < 1/2 DRAM size.

May also do multiple round of scans before migration, to filter out
unstable/burst accesses.

In long run, most of the accounted hot pages will already be in DRAM.
So only need to migrate the new ones to DRAM. When doing so, should
consider QoS and rate limiting to reduce impacts to user workloads.

When user space drives hot page migration, the DRAM nodes may well be
pressured, which will in turn trigger in-kernel cold page migration.
The above 1/2 DRAM size hot pages target can help kernel easily find
cold pages on LRU scan.

To avoid thrashing, it's also important to maintain persistent kernel
and user-space view of hot/cold pages. Since they will do migrations
in 2 different directions.

- the regular page table scans will clear PMD/PTE young
- user

[PATCH 2/2] locking/lockdep: Provide enum lock_usage_bit mask names

2018-12-27 Thread Frederic Weisbecker

It makes the code more self-explanatory and tells throughout the code
what magic number refers to:

* state (Hardirq/Softirq)
* direction (used in or enabled above state)
* read or write

We can even remove some comments that were compensating for the lack of
those constant names.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
---
 kernel/locking/lockdep.c   | 33 +++--
 kernel/locking/lockdep_internals.h |  4 
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 118a554..115d43e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1624,29 +1624,18 @@ static const char *state_rnames[] = {
 
 static inline const char *state_name(enum lock_usage_bit bit)
 {
-   return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
+   return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : 
state_names[bit >> 2];
 }
 
 static int exclusive_bit(int new_bit)
 {
-   /*
-* USED_IN
-* USED_IN_READ
-* ENABLED
-* ENABLED_READ
-*
-* bit 0 - write/read
-* bit 1 - used_in/enabled
-* bit 2+  state
-*/
-
-   int state = new_bit & ~3;
-   int dir = new_bit & 2;
+   int state = new_bit & LOCK_USAGE_STATE_MASK;
+   int dir = new_bit & LOCK_USAGE_DIR_MASK;
 
/*
 * keep state, bit flip the direction and strip read.
 */
-   return state | (dir ^ 2);
+   return state | (dir ^ LOCK_USAGE_DIR_MASK);
 }
 
 static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
@@ -2662,8 +2651,8 @@ mark_lock_irq(struct task_struct *curr, struct held_lock 
*this,
enum lock_usage_bit new_bit)
 {
int excl_bit = exclusive_bit(new_bit);
-   int read = new_bit & 1;
-   int dir = new_bit & 2;
+   int read = new_bit & LOCK_USAGE_READ_MASK;
+   int dir = new_bit & LOCK_USAGE_DIR_MASK;
 
/*
 * mark USED_IN has to look forwards -- to ensure no dependency
@@ -2687,19 +2676,19 @@ mark_lock_irq(struct task_struct *curr, struct 
held_lock *this,
 * states.
 */
if ((!read || !dir || STRICT_READ_CHECKS) &&
-   !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
+   !usage(curr, this, excl_bit, state_name(new_bit & 
~LOCK_USAGE_READ_MASK)))
return 0;
 
/*
 * Check for read in write conflicts
 */
if (!read) {
-   if (!valid_state(curr, this, new_bit, excl_bit + 1))
+   if (!valid_state(curr, this, new_bit, excl_bit + 
LOCK_USAGE_READ_MASK))
return 0;
 
if (STRICT_READ_CHECKS &&
-   !usage(curr, this, excl_bit + 1,
-   state_name(new_bit + 1)))
+   !usage(curr, this, excl_bit + LOCK_USAGE_READ_MASK,
+   state_name(new_bit + LOCK_USAGE_READ_MASK)))
return 0;
}
 
@@ -2723,7 +2712,7 @@ mark_held_locks(struct task_struct *curr, enum 
lock_usage_bit base_bit)
hlock = curr->held_locks + i;
 
if (hlock->read)
-   hlock_bit += 1; /* READ */
+   hlock_bit += LOCK_USAGE_READ_MASK;
 
BUG_ON(hlock_bit >= LOCK_USAGE_STATES);
 
diff --git a/kernel/locking/lockdep_internals.h 
b/kernel/locking/lockdep_internals.h
index 88c847a..2ebb9d0 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -22,6 +22,10 @@ enum lock_usage_bit {
LOCK_USAGE_STATES
 };
 
+#define LOCK_USAGE_READ_MASK 1
+#define LOCK_USAGE_DIR_MASK  2
+#define LOCK_USAGE_STATE_MASK (~(LOCK_USAGE_READ_MASK | LOCK_USAGE_DIR_MASK))
+
 /*
  * Usage-state bitmasks:
  */
-- 
2.7.4

[PATCH 0/2] locking/lockdep: A few cleanups

2018-12-27 Thread Frederic Weisbecker

Just a few simplification and code cleanup.

Frederic Weisbecker (2):
  locking/lockdep: Simplify mark_held_locks()
  locking/lockdep: Provide enum lock_usage_bit mask names

 kernel/locking/lockdep.c   | 54 +-
 kernel/locking/lockdep_internals.h |  4 +++
 2 files changed, 22 insertions(+), 36 deletions(-)

-- 
2.7.4

[PATCH 1/2] locking/lockdep: Simplify mark_held_locks()

2018-12-27 Thread Frederic Weisbecker

The enum mark_type appears a bit artificial here. We can directly pass
the base enum lock_usage_bit value to mark_held_locks(). All we need
then is to add the read index for each lock if necessary. It makes the
code clearer.

Signed-off-by: Frederic Weisbecker 
Cc: Peter Zijlstra 
Cc: Ingo Molnar 
---
 kernel/locking/lockdep.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 5c837a5..118a554 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2709,35 +2709,28 @@ mark_lock_irq(struct task_struct *curr, struct 
held_lock *this,
return 1;
 }
 
-enum mark_type {
-#define LOCKDEP_STATE(__STATE) __STATE,
-#include "lockdep_states.h"
-#undef LOCKDEP_STATE
-};
-
 /*
  * Mark all held locks with a usage bit:
  */
 static int
-mark_held_locks(struct task_struct *curr, enum mark_type mark)
+mark_held_locks(struct task_struct *curr, enum lock_usage_bit base_bit)
 {
-   enum lock_usage_bit usage_bit;
struct held_lock *hlock;
int i;
 
for (i = 0; i < curr->lockdep_depth; i++) {
+   enum lock_usage_bit hlock_bit = base_bit;
hlock = curr->held_locks + i;
 
-   usage_bit = 2 + (mark << 2); /* ENABLED */
if (hlock->read)
-   usage_bit += 1; /* READ */
+   hlock_bit += 1; /* READ */
 
-   BUG_ON(usage_bit >= LOCK_USAGE_STATES);
+   BUG_ON(hlock_bit >= LOCK_USAGE_STATES);
 
if (!hlock->check)
continue;
 
-   if (!mark_lock(curr, hlock, usage_bit))
+   if (!mark_lock(curr, hlock, hlock_bit))
return 0;
}
 
@@ -2758,7 +2751,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 * We are going to turn hardirqs on, so set the
 * usage bit for all held locks:
 */
-   if (!mark_held_locks(curr, HARDIRQ))
+   if (!mark_held_locks(curr, LOCK_ENABLED_HARDIRQ))
return;
/*
 * If we have softirqs enabled, then set the usage
@@ -2766,7 +2759,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 * this bit from being set before)
 */
if (curr->softirqs_enabled)
-   if (!mark_held_locks(curr, SOFTIRQ))
+   if (!mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ))
return;
 
curr->hardirq_enable_ip = ip;
@@ -2880,7 +2873,7 @@ void trace_softirqs_on(unsigned long ip)
 * enabled too:
 */
if (curr->hardirqs_enabled)
-   mark_held_locks(curr, SOFTIRQ);
+   mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
current->lockdep_recursion = 0;
 }
 
-- 
2.7.4

New RED Racing Parts: 18% / 20% off and free shipping

2018-12-27 Thread RED Racing Parts (Info)



Dear Rider,

>From the new RED Racing Parts you find spare parts and accessories for road / 
>offroad motorbike and
scooter.
Stickers, ergal / titanium bolts, brake / clutch levers, brake pads, carbon 
fiber parts, racing
coolers, clutches and more...
Excellent quality!

Visit our website on 
https://www.redracingparts.com .

Free shipping and 18% OFF (20% OFF paying with Bitcoin) for today only.


RED Racing Parts Staff




If you found this email useful, please forward it on to your friends.

To unsubscribe our newsletters click here
https://www.redracingparts.com/news/u.php?l=e=cvaidqzxnfcihwm8b9u2linux-ker...@vger.kernel.org

Re: [PATCH v5 6/6] net: lorawan: List LORAWAN in menuconfig

2018-12-27 Thread Andreas Färber

Hi Alexander and Xue Liu,

Am 24.12.18 um 16:32 schrieb Alexander Aring:
> On Tue, Dec 18, 2018 at 02:50:58PM +0100, Xue Liu wrote:
>> On Mon, 17 Dec 2018 at 15:19, Andreas Färber  wrote:
>>> Am 17.12.18 um 09:50 schrieb Xue Liu:
 I have a question about the architecture of your module. AFAIK LoRaWAN
 is already the MAC Layer above the LoRa technology. Why do you want to
 make a new layer called "maclorawan" ?
>>>
>>> I had asked Jian-Hong to separate between his soft-MAC implementation
>>> and the common bits needed to drive hard-MAC implementations found on
>>> several of the hardware modules made available to me.
>>>
>> As a reference Linux 802.11 uses cfg80211 to talk with hard-MAC devices.
>> We may also use the name “cfglora” for hard-MAC implementation.
> 
> There exists also a cfg802154. :-)
> 
> Note that cfg80211 is also for providing a backwardscompatibility to the
> wireless ioctl() interface.
> 
> In theory it's simple:
> 
> netlink API -> SoftMAC (macFOOBAR layer) -> cfgFOOBAR implementation -> 
> driver layer
> \-> HardMAC (driver layer) -> cfgFOOBAR implementation

So how does cfgFOOBAR relate to nlFOOBAR now? Given that we were told to
use netlink and pointed to some nl802whatever, I am confused about two
people now calling for cfg. We have an nllora stubbed in linux-lora.git,
and I was expecting to see an nllorawan¹ either in this series or on
top. If you're suggesting to rename them technology-neutral, then please
say so clearly - otherwise it sounds to me like you didn't actually look
at the staged code yet or didn't read our previous discussions and lead
our contributors to reinvent things we already have...

We really need to complete the layers from the ground up before we get
lost in more nice-to-have upper layers: For LoRaWAN that means we need
to have TX and RX working for LoRa _and_ FSK. sx1276 still has lots of
hardcoded stuff from my own testing that needs to hook into nllora, and
FSK exists only as ETH_P_FSK constant so far, with no concept for
switching modes yet (which as mentioned in my presentation¹ needs to go
via sleep mode, losing most register settings) nor any netlink support.
Not all drivers need to be at the same implementation level, of course,
but we need at least one that's far enough to validate such patches.

And seeing that I just found a major bug in sx1276 driver's TX path,
apparently no one apart from me is testing that driver - sx128x and
sx1301 were not yet complete enough to transmit, and due to the open
socket address/protocol discussions none can receive yet, so as Jiri
hinted, this LoRaWAN soft-MAC patch series can't have been
runtime-tested against any staged driver at all!  => [RFC lora-next v5 6/6]

Therefore I thought in our case some hard-MAC may be easier to validate
LoRaWAN sockets (patch 1/6), to avoid a dependency on completing the MAC
implementation first. For example, iM880, RF1276TS and 32001353 are pure
LoRaWAN modules without raw LoRa support. (Whereas many others support
both and I'm still looking for input on how to best deal with that -
currently exposing them as LoRa devices for maximal flexibility.)

Regards,
Andreas

¹
https://events.linuxfoundation.org/wp-content/uploads/2017/12/ELCE2018_LoRa_final_Andreas-Farber.pdf
https://www.youtube.com/watch?v=Jjel65sZO9M

-- 
SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

[PATCH v2 2/2] remoteproc: qcom: Add support for parsing fw dt bindings

2018-12-27 Thread Sibi Sankar

Add support for parsing "firmware-name" dt bindings which specifies
the relative paths of mba/modem/pas image as strings. Fallback to
the default paths for mba/modem/pas image on -EINVAL.

Signed-off-by: Sibi Sankar 
---
 drivers/remoteproc/qcom_q6v5_mss.c | 46 +++---
 drivers/remoteproc/qcom_q6v5_pas.c | 11 ++-
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/drivers/remoteproc/qcom_q6v5_mss.c 
b/drivers/remoteproc/qcom_q6v5_mss.c
index 01be7314e176..c75179006e24 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -188,6 +188,7 @@ struct q6v5 {
bool has_alt_reset;
int mpss_perm;
int mba_perm;
+   const char *hexagon_mdt_image;
int version;
 };
 
@@ -860,17 +861,27 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
phys_addr_t min_addr = PHYS_ADDR_MAX;
phys_addr_t max_addr = 0;
bool relocate = false;
-   char seg_name[10];
+   char *fw_name;
+   size_t fw_name_len;
ssize_t offset;
size_t size = 0;
void *ptr;
int ret;
int i;
 
-   ret = request_firmware(, "modem.mdt", qproc->dev);
+   fw_name_len = strlen(qproc->hexagon_mdt_image);
+   if (fw_name_len <= 4)
+   return -EINVAL;
+
+   fw_name = kstrdup(qproc->hexagon_mdt_image, GFP_KERNEL);
+   if (!fw_name)
+   return -ENOMEM;
+
+   ret = request_firmware(, qproc->hexagon_mdt_image, qproc->dev);
if (ret < 0) {
-   dev_err(qproc->dev, "unable to load modem.mdt\n");
-   return ret;
+   dev_err(qproc->dev, "unable to load %s\n",
+   qproc->hexagon_mdt_image);
+   goto out;
}
 
/* Initialize the RMB validator */
@@ -918,10 +929,12 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
ptr = qproc->mpss_region + offset;
 
if (phdr->p_filesz) {
-   snprintf(seg_name, sizeof(seg_name), "modem.b%02d", i);
-   ret = request_firmware(_fw, seg_name, qproc->dev);
+   snprintf(fw_name + fw_name_len - 3, fw_name_len,
+"b%02d", i);
+   ret = request_firmware(_fw, fw_name, qproc->dev);
if (ret) {
-   dev_err(qproc->dev, "failed to load %s\n", 
seg_name);
+   dev_err(qproc->dev, "failed to load %s\n",
+   fw_name);
goto release_firmware;
}
 
@@ -960,6 +973,8 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
 
 release_firmware:
release_firmware(fw);
+out:
+   kfree(fw_name);
 
return ret < 0 ? ret : 0;
 }
@@ -1075,9 +1090,10 @@ static int qcom_q6v5_register_dump_segments(struct rproc 
*rproc,
unsigned long i;
int ret;
 
-   ret = request_firmware(, "modem.mdt", qproc->dev);
+   ret = request_firmware(, qproc->hexagon_mdt_image, qproc->dev);
if (ret < 0) {
-   dev_err(qproc->dev, "unable to load modem.mdt\n");
+   dev_err(qproc->dev, "unable to load %s\n",
+   qproc->hexagon_mdt_image);
return ret;
}
 
@@ -1253,6 +1269,8 @@ static int q6v5_probe(struct platform_device *pdev)
const struct rproc_hexagon_res *desc;
struct q6v5 *qproc;
struct rproc *rproc;
+   const char *mba_image;
+   const char *fw_name[2];
int ret;
 
desc = of_device_get_match_data(>dev);
@@ -1262,8 +1280,15 @@ static int q6v5_probe(struct platform_device *pdev)
if (desc->need_mem_protection && !qcom_scm_is_available())
return -EPROBE_DEFER;
 
+   ret = of_property_read_string_array(pdev->dev.of_node, "firmware-name",
+   fw_name, 2);
+   if (ret != -EINVAL && ret != 2)
+   return ret > 0 ? -EINVAL : ret;
+
+   mba_image = (ret != 2) ? desc->hexagon_mba_image : fw_name[0];
+
rproc = rproc_alloc(>dev, pdev->name, _ops,
-   desc->hexagon_mba_image, sizeof(*qproc));
+   mba_image, sizeof(*qproc));
if (!rproc) {
dev_err(>dev, "failed to allocate rproc\n");
return -ENOMEM;
@@ -1272,6 +1297,7 @@ static int q6v5_probe(struct platform_device *pdev)
qproc = (struct q6v5 *)rproc->priv;
qproc->dev = >dev;
qproc->rproc = rproc;
+   qproc->hexagon_mdt_image = (ret != 2) ? "modem.mdt" : fw_name[1];
platform_set_drvdata(pdev, qproc);
 
ret = q6v5_init_mem(qproc, pdev);
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index b1e63fcd5fdf..141c7da29e9a 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -258,6 +258,8 @@ static int

[PATCH v2 1/2] dt-bindings: remoteproc: qcom: Add firmware bindings for Q6V5

2018-12-27 Thread Sibi Sankar

Add optional "firmware-name" bindings for Q6V5 MSS and PAS based
remoteprocs. For Q6V5 MSS/PAS the two/one relative firmware
paths/path are to be listed respectively. Fallback to the default
images for mba/modem for Q6V5 MSS or the default Hexagon image
for Q6V5 PAS if the "firmware-name" binding is not present.

Signed-off-by: Sibi Sankar 
---
 Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt | 6 ++
 Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt | 7 +++
 2 files changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt 
b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
index 9c0cff3a5ed8..60ee0f73071a 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
@@ -27,6 +27,12 @@ on the Qualcomm ADSP Hexagon core.
Value type: 
Definition: must be "wdog", "fatal", "ready", "handover", "stop-ack"
 
+- firmware-name:
+   Usage: optional
+   Value type: 
+   Definition: must list the relative firmware image path for the
+   Hexagon Core.
+
 - clocks:
Usage: required
Value type: 
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt 
b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
index 9ff5b0309417..3a99e7379d8c 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
@@ -36,6 +36,13 @@ on the Qualcomm Hexagon core.
Value type: 
Definition: must be "wdog", "fatal", "ready", "handover", "stop-ack"
 
+- firmware-name:
+   Usage: optional
+   Value type: 
+   Definition: must list the relative firmware image paths for mba and
+   modem. They are used for booting and authenticating the
+   Hexagon core.
+
 - clocks:
Usage: required
Value type: 
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

[PATCH v2 0/2] Add firmware bindings for Q6V5 MSS/PAS

2018-12-27 Thread Sibi Sankar

Q6V5 MSS on certain SoCs like SDM845 are capable of operating under
completely different configuration (like Non-Modem WLAN configuration)
depending on the firmware loaded without any change in boot sequence
of the Hexagon core. The patch series is ultimately aimed to avoid
multiple compatibles per SoC to just specify different upstreamed firmware
locations. This is achieved by using "firmware-name" binding to store
the relative path of mba/modem/pas firmware images.

remoteproc@408 { 
...
firmware-name = "qcom/sdm845/mss/mba.mbn",
"qcom/sdm845/mss/modem.mdt";
...
}

remoteproc@1730 {
...
firmware-name = "qcom/sdm845/lpass/adsp.mdt";
...
}

Suggested-by: Bjorn Andersson 

v2:
  * Replace "qcom,firmware" with "firmware-name" as suggested
by Rob
  * Include dt-bindings/parsing logic for PAS based remoteprocs 

Sibi Sankar (2):
  dt-bindings: remoteproc: qcom: Add firmware bindings for Q6V5
  remoteproc: qcom: Add support for parsing fw dt bindings

 .../bindings/remoteproc/qcom,adsp.txt |  6 +++
 .../bindings/remoteproc/qcom,q6v5.txt |  7 +++
 drivers/remoteproc/qcom_q6v5_mss.c| 46 +++
 drivers/remoteproc/qcom_q6v5_pas.c| 11 -
 4 files changed, 59 insertions(+), 11 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Re: [PATCH 1/2] dt-bindings: remoteproc: qcom: Add firmware bindings for Q6V5 MSS

2018-12-27 Thread Sibi Sankar


Hi Rob,
Thanks for the review :)

On 2018-12-21 01:40, Rob Herring wrote:

On Wed, Dec 12, 2018 at 06:14:55PM +0530, Sibi Sankar wrote:

Add optional firmware bindings for Q6V5 MSS. It lists the two relative
firmware paths which are used for booting and authenticating the 
Hexagon

core.

Signed-off-by: Sibi Sankar 
---
 Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt | 7 
+++

 1 file changed, 7 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt 
b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt

index 9ff5b0309417..1f6988a60636 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
@@ -36,6 +36,13 @@ on the Qualcomm Hexagon core.
Value type: 
Definition: must be "wdog", "fatal", "ready", "handover", "stop-ack"

+- qcom,firmware:


We already have a standard name 'firmware-name'. Use that. I'm fine 
with

allowing it to be more than 1 string.


sure will replace "qcom,firmware" with "firmware-name" in the next 
re-spin





+   Usage: optional
+   Value type: 
+   Definition: must list the 2 relative firmware paths (mba and modem
+   metadata respectively) which are used for booting and
+   authenticating the Hexagon core.
+
 - clocks:
Usage: required
Value type: 
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora 
Forum,

a Linux Foundation Collaborative Project



--
-- Sibi Sankar --
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project.

Re: [PATCH v3 2/8] dt-bindings: remoteproc: qcom: Add missing clocks for SDM845

2018-12-27 Thread Sibi Sankar


Hi Rob,
Thanks for the review!

On 2018-12-28 02:51, Rob Herring wrote:

On Wed, Dec 26, 2018 at 06:22:23PM +0530, Sibi Sankar wrote:

Add missing clock bindings for Q6V5 MSS on SDM845 SoCs.

Fixes: fb22022ff63d ("dt-bindings: remoteproc: Add Q6v5 Modem PIL
binding for SDM845")

Signed-off-by: Sibi Sankar 
---

v3:
  * Fixup dt-binding documentation as suggested by Doug

 .../devicetree/bindings/remoteproc/qcom,q6v5.txt   | 14 
+++---

 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git 
a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt 
b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt

index 9ff5b0309417..20dd19f9ed99 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,q6v5.txt
@@ -39,13 +39,21 @@ on the Qualcomm Hexagon core.
 - clocks:
Usage: required
Value type: 
-   Definition: reference to the iface, bus and mem clocks to be held on
-   behalf of the booting of the Hexagon core
+   Definition: reference to the clocks that match clock-names

 - clock-names:
Usage: required
Value type: 
-   Definition: must be "iface", "bus", "mem"
+   Definition: The clocks needed depend on the compatible string:
+   qcom,ipq8074-wcss-pil:
+   no clock names required
+   qcom,q6v5-pil:
+   qcom,msm8916-mss-pil:
+   qcom,msm8974-mss-pil:
+   must be "iface", "bus", "mem", "xo"
+   qcom,sdm845-mss-pil:
+   must be "xo", "prng", "iface", "bus", "mem", "gpll0_mss",
+   "snoc_axi", "mnoc_axi"


Please keep the same order for the 4 clocks which are the same.


Will re-order them in the next re-spin.



Rob


--
-- Sibi Sankar --
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project.

Re: [PATCH] h8300: pci: Remove local declaration of pcibios_penalize_isa_irq

2018-12-27 Thread Sinan Kaya

On Thu, Dec 27, 2018 at 7:46 PM Guenter Roeck  wrote:
>
> h8300 builds fail with:
>
> In file included from drivers/of/address.c:11:
> include/linux/pci.h:1966:20: error: redefinition of 'pcibios_penalize_isa_irq'
>
> This is because CONFIG_PCI is not enabled, and pcibios_penalize_isa_irq()
> is now declared as inline static function in generic code if this is the
> case. Since h8300 does not support PCI to start with, fix the problem by
> removing the architecture specific pci.h.
>
> Fixes: 5d32a66541c46 ("PCI/ACPI: Allow ACPI to be built without CONFIG_PCI 
> set")
> Cc: Sinan Kaya 
> Cc: Bjorn Helgaas 
> Signed-off-by: Guenter Roeck 
> ---
>  arch/h8300/include/asm/Kbuild |  1 +
>  arch/h8300/include/asm/pci.h  | 18 --
>  2 files changed, 1 insertion(+), 18 deletions(-)
>  delete mode 100644 arch/h8300/include/asm/pci.h
>

Thanks, makes sense.

Reviewed-by : Sinan Kaya 


> diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
> index a5d0b2991f47..cd400d353d18 100644
> --- a/arch/h8300/include/asm/Kbuild
> +++ b/arch/h8300/include/asm/Kbuild
> @@ -33,6 +33,7 @@ generic-y += mmu.h
>  generic-y += mmu_context.h
>  generic-y += module.h
>  generic-y += parport.h
> +generic-y += pci.h
>  generic-y += percpu.h
>  generic-y += pgalloc.h
>  generic-y += preempt.h
> diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
> deleted file mode 100644
> index d4d345a52092..
> --- a/arch/h8300/include/asm/pci.h
> +++ /dev/null
> @@ -1,18 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_H8300_PCI_H
> -#define _ASM_H8300_PCI_H
> -
> -/*
> - * asm-h8300/pci.h - H8/300 specific PCI declarations.
> - *
> - * Yoshinori Sato 
> - */
> -
> -#define pcibios_assign_all_busses()0
> -
> -static inline void pcibios_penalize_isa_irq(int irq, int active)
> -{
> -   /* We don't do dynamic PCI IRQ allocation */
> -}
> -
> -#endif /* _ASM_H8300_PCI_H */
> --
> 2.7.4
>

[PATCH] net: tsn: add an netlink interface between kernel and application layer

2018-12-27 Thread PO LIU

This patch provids netlink method to configure the TSN protocols hardwares.
TSN guaranteed packet transport with bounded low latency, low packet delay
variation, and low packet loss by hardware and software methods.

The three basic components of TSN are:

1. Time synchronization: This was implement by 8021AS which base on the
   IEEE1588 precision Time Protocol. This is configured by the other way
   in kernel.
   8021AS not included in this patch.

2. Scheduling and traffic shaping and per-stream filter policing:
   This patch support Qbv/Qci.

3. Selection of communication paths:
   This patch not support the pure software only TSN protocols(like Qcc)
   but hardware related configuration.

TSN Protocols supports by this patch: Qbv/Qci/Qbu/Credit-base Shaper(Qav).
This patch verified on NXP ls1028ardb board.

Will add more protocols in the future.

Signed-off-by: Po Liu 
---
 MAINTAINERS  |6 +
 include/net/tsn.h|   76 ++
 include/uapi/linux/tsn.h |  958 +
 net/Kconfig  |1 +
 net/Makefile |3 +
 net/tsn/Kconfig  |   15 +
 net/tsn/Makefile |1 +
 net/tsn/genl_tsn.c   | 2626 ++
 8 files changed, 3686 insertions(+)
 create mode 100644 include/net/tsn.h
 create mode 100644 include/uapi/linux/tsn.h
 create mode 100644 net/tsn/Kconfig
 create mode 100644 net/tsn/Makefile
 create mode 100644 net/tsn/genl_tsn.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 7a9804a..bc037e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15385,6 +15385,12 @@ F: sound/soc/codecs/tscs*.c
 F: sound/soc/codecs/tscs*.h
 F: Documentation/devicetree/bindings/sound/tscs*.txt
 
+TSN NETLINK INTERFACE DRVIER
+M: Po Liu 
+F: net/tsn/genl_tsn.c
+F: include/net/tsn.h
+F: include/uapi/linux/tsn.h
+
 TTY LAYER
 M: Greg Kroah-Hartman 
 M: Jiri Slaby 
diff --git a/include/net/tsn.h b/include/net/tsn.h
new file mode 100644
index 000..008360c
--- /dev/null
+++ b/include/net/tsn.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2017-2018 NXP */
+
+#ifndef __TSN_H__
+#define __TSN_H__
+
+#include 
+
+struct tsn_ops {
+   void (*device_init)(struct net_device *ndev);
+   void (*device_deinit)(struct net_device *ndev);
+   u32 (*get_capability)(struct net_device *ndev);
+   /* Qbv standard */
+   int (*qbv_set)(struct net_device *ndev, struct tsn_qbv_conf *qbvconf);
+   int (*qbv_get)(struct net_device *ndev, struct tsn_qbv_conf *qbvconf);
+   int (*qbv_get_status)(struct net_device *ndev,
+   struct tsn_qbv_status *qbvstat);
+   int (*cb_streamid_set)(struct net_device *ndev, u32 index,
+   bool enable, struct tsn_cb_streamid *sid);
+   int (*cb_streamid_get)(struct net_device *ndev, u32 index,
+   struct tsn_cb_streamid *sid);
+   int (*cb_streamid_counters_get)(struct net_device *ndev, u32 index,
+   struct tsn_cb_streamid_counters *sidcounter);
+   int (*qci_get_maxcap)(struct net_device *ndev,
+   struct tsn_qci_psfp_stream_param *qcicapa);
+   int (*qci_sfi_set)(struct net_device *ndev, u32 index, bool enable,
+   struct tsn_qci_psfp_sfi_conf *sficonf);
+   /* return: 0 stream filter instance not valid
+* 1 stream filter instance valid
+* -1 error happened
+*/
+   int (*qci_sfi_get)(struct net_device *ndev, u32 index,
+   struct tsn_qci_psfp_sfi_conf *sficonf);
+   int (*qci_sfi_counters_get)(struct net_device *ndev, u32 index,
+   struct tsn_qci_psfp_sfi_counters *sficounter);
+   int (*qci_sgi_set)(struct net_device *ndev, u32 index,
+   struct tsn_qci_psfp_sgi_conf *sgiconf);
+   int (*qci_sgi_get)(struct net_device *ndev, u32 index,
+   struct tsn_qci_psfp_sgi_conf *sgiconf);
+   int (*qci_sgi_status_get)(struct net_device *ndev, u16 index,
+   struct tsn_psfp_sgi_status *sgistat);
+   int (*qci_fmi_set)(struct net_device *ndev, u32 index, bool enable,
+   struct tsn_qci_psfp_fmi *fmi);
+   int (*qci_fmi_get)(struct net_device *ndev, u32 index,
+   struct tsn_qci_psfp_fmi *fmi,
+   struct tsn_qci_psfp_fmi_counters *counters);
+   int (*cbs_set)(struct net_device *ndev, u8 tc, u8 bw);
+   int (*cbs_get)(struct net_device *ndev, u8 tc);
+   /* To set a 8 bits vector shows 8 traffic classes
+* preemtable(1) or express(0)
+*/
+   int (*qbu_set)(struct net_device *ndev, u8 ptvector);
+   /* To get port preemtion status */
+   int (*qbu_get)(struct net_device *ndev,
+   struct

Re: [PATCH v4 10/10] KVM/x86/lbr: lazy save the guest lbr stack

2018-12-27 Thread Wei Wang


On 12/28/2018 04:51 AM, Andi Kleen wrote:

Thanks. This looks a lot better than the earlier versions.

Some more comments.

On Wed, Dec 26, 2018 at 05:25:38PM +0800, Wei Wang wrote:

When the vCPU is scheduled in:
- if the lbr feature was used in the last vCPU time slice, set the lbr
   stack to be interceptible, so that the host can capture whether the
   lbr feature will be used in this time slice;
- if the lbr feature wasn't used in the last vCPU time slice, disable
   the vCPU support of the guest lbr switching.

time slice is the time from exit to exit?


It's the vCPU thread time slice (e.g. 100ms).




This might be rather short in some cases if the workload does a lot of exits
(which I would expect PMU workloads to do) Would be better to use some
explicit time check, or at least N exits.


Did you mean further increasing the lazy time to multiple host thread
scheduling time slices?
What would be a good value for "N"?



Upon the first access to one of the lbr related MSRs (since the vCPU was
scheduled in):
- record that the guest has used the lbr;
- create a host perf event to help save/restore the guest lbr stack if
   the guest uses the user callstack mode lbr stack;

This is a bit risky. It would be safer (but also more expensive)
to always safe even for any guest LBR use independent of callstack.

Otherwise we might get into a situation where
a vCPU context switch inside the guest PMI will clear the LBRs
before they can be read in the PMI, so some LBR samples will be fully
or partially cleared. This would be user visible.

In theory could try to detect if the guest is inside a PMI and
save/restore then, but that would likely be complicated. I would
save/restore for all cases.


Yes, it is easier to save for all the cases. But curious for the 
non-callstack
mode, it's just ponit sampling functions (kind of speculative in some 
degree).

Would rarely losing a few recordings important in that case?





+static void
+__always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr,
+ int type, bool value);

__always_inline should only be used if it's needed for functionality,
or in a header.


Thanks, will fix it.

Best,
Wei

Re: [PATCHv3 0/2] mm/memblock: reuse memblock bottom-up allocation style

2018-12-27 Thread Baoquan He

On 12/28/18 at 11:00am, Pingfan Liu wrote:
> The bottom-up allocation style is introduced to cope with movable_node,
> where the limit inferior of allocation starts from kernel's end, due to
> lack of knowledge of memory hotplug info at this early time.
> Beside this original aim, 'kexec -c' prefers to reuse this style to alloc mem

Wondering what is 'kexec -c'.

> at lower address, since if the reserved region is beyond 4G, then it requires
> extra mem (default is 16M) for swiotlb. But at this time hotplug info has been

The default is 256M, not sure if we are talking about the same thing.

low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);

> got, the limit inferior can be extend to 0, which is done by this series
> 
> Cc: Tang Chen 
> Cc: "Rafael J. Wysocki" 
> Cc: Len Brown 
> Cc: Andrew Morton 
> Cc: Mike Rapoport 
> Cc: Michal Hocko 
> Cc: Jonathan Corbet 
> Cc: Yaowei Bai 
> Cc: Pavel Tatashin 
> Cc: Nicholas Piggin 
> Cc: Naoya Horiguchi 
> Cc: Daniel Vacek 
> Cc: Mathieu Malaterre 
> Cc: Stefan Agner 
> Cc: Dave Young 
> Cc: Baoquan He 
> Cc: ying...@kernel.org,
> Cc: vgo...@redhat.com
> Cc: linux-kernel@vger.kernel.org
> 
> Pingfan Liu (2):
>   mm/memblock: extend the limit inferior of bottom-up after parsing
> hotplug attr
>   x86/kdump: bugfix, make the behavior of crashkernel=X consistent with
> kaslr
> 
>  arch/x86/kernel/setup.c  |  9 +---
>  drivers/acpi/numa.c  |  4 
>  include/linux/memblock.h |  1 +
>  mm/memblock.c| 58 
> +---
>  4 files changed, 46 insertions(+), 26 deletions(-)
> 
> -- 
> 2.7.4
>

Re: [PATCH v1 01/12] of: Add bindings of thermtrip for Tegra soctherm

2018-12-27 Thread Wei Ni




On 28/12/2018 7:06 AM, Rob Herring wrote:
> On Tue, Dec 18, 2018 at 03:34:33PM +0800, Wei Ni wrote:
>> Add optional property "nvidia,thermtrips".
>> If present, these trips will be used as HW shutdown trips,
>> and critical trips will be used as SW shutdown trips.
>>
>> Signed-off-by: Wei Ni 
>> ---
>>  .../bindings/thermal/nvidia,tegra124-soctherm.txt| 20 
>> +---
>>  1 file changed, 17 insertions(+), 3 deletions(-)
>>
>> diff --git 
>> a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt 
>> b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
>> index b6c0ae53d4dc..ab66d6feab4b 100644
>> --- a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
>> +++ b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
>> @@ -55,10 +55,21 @@ Required properties :
>>- #cooling-cells: Should be 1. This cooling device only support 
>> on/off state.
>>  See ./thermal.txt for a description of this property.
>>  
>> +Optional properties:
>> +- nvidia,thermtrips : When present, this property specifies the temperature 
>> at
>> +  which the soctherm hardware will assert the thermal trigger signal to the
>> +  Power Management IC, which can be configured to reset or shutdown the 
>> device.
>> +  It is an array of pairs where each pair represents a tsensor id followed 
>> by a
>> +  temperature in milli Celcius. In the absence of this property the critical
>> +  trip point will be used for thermtrip temperature.
>> +
>>  Note:
>> -- the "critical" type trip points will be set to SOC_THERM hardware as the
>> -shut down temperature. Once the temperature of this thermal zone is higher
>> -than it, the system will be shutdown or reset by hardware.
>> +- the "critical" type trip points will be used to set the temperature at 
>> which
>> +the SOC_THERM hardware will assert a thermal trigger if the 
>> "nvidia,thermtrips"
>> +property is missing. When the thermtrips property is present, the breach of 
>> a
>> +critical trip point is reported back to the thermal framework to implement
>> +software shutdown.
> 
> This hardly seems like a NVidia specific concept. A h/w shutdown 
> temperature... Come up with something common.

Hi Rob,
In current thermal framework, it doesn't support to set h/w shutdown
trips, the "critical" trip is used for s/w shutdown. I noticed that in
rockchip-thermal driver, it also used "rockchip,hw-tshut-temp" to set
h/w shutdown temperature.

> 
> Also, we already have a temperature table. Why do we need temperatures 
> in 2 places.

Sorry, what do you mean temperature table? Do you mean the trip nodes?

Thanks.
Wei.

> 
>> +
>>  - the "hot" type trip points will be set to SOC_THERM hardware as the 
>> throttle
>>  temperature. Once the the temperature of this thermal zone is higher
>>  than it, it will trigger the HW throttle event.
>> @@ -79,6 +90,9 @@ Example :
>>  
>>  #thermal-sensor-cells = <1>;
>>  
>> +nvidia,thermtrips = > + TEGRA124_SOCTHERM_SENSOR_GPU 103000>;
>> +
>>  throttle-cfgs {
>>  /*
>>   * When the "heavy" cooling device triggered,
>> -- 
>> 2.7.4
>>

Re: [RFC][PATCH v2 01/21] e820: cheat PMEM as DRAM

2018-12-27 Thread Fengguang Wu


On Thu, Dec 27, 2018 at 11:32:06AM -0800, Yang Shi wrote:

On Wed, Dec 26, 2018 at 9:13 PM Dan Williams  wrote:


On Wed, Dec 26, 2018 at 8:11 PM Fengguang Wu  wrote:
>
> On Wed, Dec 26, 2018 at 07:41:41PM -0800, Matthew Wilcox wrote:
> >On Wed, Dec 26, 2018 at 09:14:47PM +0800, Fengguang Wu wrote:
> >> From: Fan Du 
> >>
> >> This is a hack to enumerate PMEM as NUMA nodes.
> >> It's necessary for current BIOS that don't yet fill ACPI HMAT table.
> >>
> >> WARNING: take care to backup. It is mutual exclusive with libnvdimm
> >> subsystem and can destroy ndctl managed namespaces.
> >
> >Why depend on firmware to present this "correctly"?  It seems to me like
> >less effort all around to have ndctl label some namespaces as being for
> >this kind of use.
>
> Dave Hansen may be more suitable to answer your question. He posted
> patches to make PMEM NUMA node coexist with libnvdimm and ndctl:
>
> [PATCH 0/9] Allow persistent memory to be used like normal RAM
> https://lkml.org/lkml/2018/10/23/9
>
> That depends on future BIOS. So we did this quick hack to test out
> PMEM NUMA node for the existing BIOS.

No, it does not depend on a future BIOS.


It is correct. We already have Dave's patches + Dan's patch (added
target_node field) work on our machine which has SRAT.


Thanks for the correction. It looks my perception was out of date.
So we can follow Dave+Dan's patches to create the PMEM NUMA nodes.

Thanks,
Fengguang



Willy, have a look here [1], here [2], and here [3] for the
work-in-progress ndctl takeover approach (actually 'daxctl' in this
case).

[1]: https://lkml.org/lkml/2018/10/23/9
[2]: https://lkml.org/lkml/2018/10/31/243
[3]: https://lists.01.org/pipermail/linux-nvdimm/2018-November/018677.html

Re: [PATCH -mmotm] arm64: fix build for MAX_USER_VA_BITS

2018-12-27 Thread Andrew Morton

On Mon, 24 Dec 2018 16:03:12 -0500 Qian Cai  wrote:

> Some code in 9b31cf493ff was lost during merging into the -mmotm tree
> for some reasons,
> 
> In file included from ./arch/arm64/include/asm/processor.h:46,
>  from ./include/linux/rcupdate.h:43,
>  from ./include/linux/rculist.h:11,
>  from ./include/linux/pid.h:5,
>  from ./include/linux/sched.h:14,
>from arch/arm64/kernel/asm-offsets.c:22:
> ./arch/arm64/include/asm/pgtable-hwdef.h:83:30: error:
> 'MAX_USER_VA_BITS' undeclared here (not in a function); did you mean
> 'MAX_USER_PRIO'?
>  #define PTRS_PER_PGD  (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
>   ^~~~
> ./arch/arm64/include/asm/pgtable.h:442:26: note: in expansion of macro
> 'PTRS_PER_PGD'
>  extern pgd_t init_pg_dir[PTRS_PER_PGD];
>
> ...
>
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -67,6 +67,12 @@
>  #define KERNEL_START  _text
>  #define KERNEL_END_end
>  
> +#ifdef CONFIG_ARM64_USER_VA_BITS_52
> +#define MAX_USER_VA_BITS 52
> +#else
> +#define MAX_USER_VA_BITS VA_BITS
> +#endif
> +
>  /*
>   * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
>   * address space for the shadow region respectively. They can bloat the stack

hm, that was presumably me getting lost in a maze of rejects.  It seems
OK now.

RE: iMX6 FEC driver Linux-fslc 4.17 - IPV6 Multicast not working when unplugging/plugging ethernet cable

2018-12-27 Thread Andy Duan

From: Stefano Cappa 
> Hi everyone,
> I already posted this in NXP forum as a comment
> (https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fco
> mmunity.nxp.com%2Fthread%2F359397data=02%7C01%7Cfugang.dua
> n%40nxp.com%7C189d5cad534e470a162508d66c068de2%7C686ea1d3bc2b
> 4c6fa92cd99c5c301635%7C0%7C0%7C636815172743636032sdata=fb
> H1wYnfrMI4437OIeAEDG1W23Ry6S3fscNqhHD8Kas%3Dreserved=0),
> in yocto mailing list
> (https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists
> .yoctoproject.org%2Fpipermail%2Fyocto%2F2018-December%2F043664.html
> data=02%7C01%7Cfugang.duan%40nxp.com%7C189d5cad534e470a1
> 62508d66c068de2%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C
> 636815172743636032sdata=Tomd6ITYFIQ5M8NWzSMJG3N2llTCJqQU
> Qg7oSY5zs4s%3Dreserved=0)
> and in meta-freescale mailing list
> (https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists
> .yoctoproject.org%2Fpipermail%2Fmeta-freescale%2F2018-December%2F02
> 3625.htmldata=02%7C01%7Cfugang.duan%40nxp.com%7C189d5cad5
> 34e470a162508d66c068de2%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0
> %7C0%7C636815172743636032sdata=WPGDb3B3P3nBGdkKbrgeW3ls
> Po0jVrQmxk9UlfFIkCw%3Dreserved=0)
> A user in meta-freescale's mailing list suggested to resend this message to 
> the
> emails obtained running "./scripts/get_maintainer.pl -F
> drivers/net/ethernet/freescale/fec_main.c".
> 
> 
> The problem is:
> 
> If I boot my iMX6 device with ethernet cable attached and I execute "ping6
> ff02::fb" to ping the multicast address I get this response:
> ~# ping6 ff02::fb
> PING ff02::fb (ff02::fb): 56 data bytes
> 64 bytes from fe80::c2f:eff:fe11:2d71: seq=0 ttl=64 time=2.057 ms
> 64 bytes from fe80::809:1bfb:8d4c:ae54: seq=0 ttl=64 time=73.101 ms
> (DUP!)
> 64 bytes from fe80::3e28:6dff:feed:5b97: seq=0 ttl=64 time=150.772 ms
> (DUP!)
> 
> 
> Otherwise, If I unplug and plug again ethernet cable, I cannot ping the
> multicast ipv6 address anymore.
> The result is:
> ~# ping6 ff02::fb
> PING ff02::fb (ff02::fb): 56 data bytes
> ping6: sendto: Network is unreachable
> 
> 
> The original NXP discussion was about older version of Linux, however this
> issue is happening with both Linux 4.9.88 and Linux 4.17.
> Probably also with the latest version, but I didn't try.
> 
I just test it on L4.14 kernel, it works as blow log.
If you unplug and plug the ethernet cable, you should see the log print out:
Unplug:  IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready
Plug:IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready

Please check the link status in your local.

Log:
root@imx8mqevk:~# ping6 ff02::fb
PING ff02::fb (ff02::fb): 56 data bytes
64 bytes from fe80::219:19ff:fe81:9149: seq=0 ttl=64 time=0.208 ms
64 bytes from fe80::be30:5bff:feeb:81f1: seq=0 ttl=64 time=1.586 ms (DUP!)
64 bytes from fe80::baac:6fff:fe37:e4d9: seq=0 ttl=64 time=1.611 ms (DUP!)
64 bytes from fe80::85da:f9d1:6bbc:f86d: seq=0 ttl=64 time=1.633 ms (DUP!)
64 bytes from fe80::38b2:b24:bc4a:1475: seq=0 ttl=64 time=1.653 ms (DUP!)
64 bytes from fe80::204:9fff:fe02:e33d: seq=0 ttl=64 time=1.673 ms (DUP!)
64 bytes from fe80::baac:6fff:fe37:e62f: seq=0 ttl=64 time=1.692 ms (DUP!)
64 bytes from fe80::cf53:3c0e:3d04:87c6: seq=0 ttl=64 time=1.712 ms (DUP!)
64 bytes from fe80::204:9fff:fe02:7786: seq=0 ttl=64 time=1.732 ms (DUP!)
64 bytes from fe80::f781:3862:65df:dadd: seq=0 ttl=64 time=1.752 ms (DUP!)
64 bytes from fe80::f69b:5750:754b:4583: seq=0 ttl=64 time=1.771 ms (DUP!)
64 bytes from fe80::204:9fff:fe02:c5eb: seq=0 ttl=64 time=1.791 ms (DUP!)
64 bytes from fe80::204:9fff:fe02:556a: seq=0 ttl=64 time=1.811 ms (DUP!)
64 bytes from fe80::20e:c6ff:fea6:6880: seq=0 ttl=64 time=1.830 ms (DUP!)
64 bytes from fe80::82dc:2912:4288:c88f: seq=0 ttl=64 time=1.850 ms (DUP!)
64 bytes from fe80::1479:ad62:9b34:1e3e: seq=0 ttl=64 time=1.869 ms (DUP!)
64 bytes from fe80::972f:1cc8:846f:85e2: seq=0 ttl=64 time=1.889 ms (DUP!)
64 bytes from fe80::62ae:d015:196e:5d76: seq=0 ttl=64 time=1.908 ms (DUP!)
64 bytes from fe80::204:9fff:fe03:c37e: seq=0 ttl=64 time=2.439 ms (DUP!)
64 bytes from fe80::204:9fff:fe05:cf0f: seq=0 ttl=64 time=2.462 ms (DUP!)
64 bytes from fe80::204:9fff:fe05:cf11: seq=0 ttl=64 time=3.550 ms (DUP!)
^C
--- ff02::fb ping statistics ---
1 packets transmitted, 1 packets received, 20 duplicates, 0% packet loss
round-trip min/avg/max = 0.208/1.829/3.550 ms
root@imx8mqevk:~# [ 2338.265902] fec 30be.ethernet eth0: Link is Down
[ 2338.298193] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready

root@imx8mqevk:~# [ 2344.410482] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link 
becomes ready
[ 2344.418129] fec 30be.ethernet eth0: Link is Up - 1Gbps/Full - flow 
control rx/tx

root@imx8mqevk:~# ping6 ff02::fb
PING ff02::fb (ff02::fb): 56 data bytes
64 bytes from fe80::219:19ff:fe81:9149: seq=0 ttl=64 time=0.158 ms
64 bytes from fe80::be30:5bff:feeb:81f1: seq=0 ttl=64 time=1.853 ms (DUP!)
64 bytes from fe80::38b2:b24:bc4a:1475: seq=0 ttl=64 time=1.879 ms (DUP!)
64 bytes from

Re: [PATCH v37 1/3] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2018-12-27 Thread Wei Wang


On 12/27/2018 08:03 PM, Christian Borntraeger wrote:

On 27.08.2018 03:32, Wei Wang wrote:

  static int init_vqs(struct virtio_balloon *vb)
  {
-   struct virtqueue *vqs[3];
-   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request 
};
-   static const char * const names[] = { "inflate", "deflate", "stats" };
-   int err, nvqs;
+   struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+   vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+   const char *names[VIRTIO_BALLOON_VQ_MAX];
+   int err;

/*
-* We expect two virtqueues: inflate and deflate, and
-* optionally stat.
+* Inflateq and deflateq are used unconditionally. The names[]
+* will be NULL if the related feature is not enabled, which will
+* cause no allocation for the corresponding virtqueue in find_vqs.
 */

This might be true for virtio-pci, but it is not for virtio-ccw.


Hi Christian,


Please try the fix patches: https://lkml.org/lkml/2018/12/27/336

Best,
Wei

Re: [PATCH -mmotm] efi: drop kmemleak_ignore() for page allocator

2018-12-27 Thread Andrew Morton

On Wed, 26 Dec 2018 16:31:59 +0100 Ard Biesheuvel  
wrote:

> Please stop sending EFI patches if you can't be bothered to
> test/reproduce against the EFI tree.

um, sorry, but that's a bit strong.  Finding (let alone fixing) a bug
in EFI is a great contribution (thanks!) and the EFI maintainers are
perfectly capable of reviewing and testing the proposed fix.  Or of
fixing the bug by other means.

Let's not beat people up for helping us in a less-than-perfect way, no?

[PATCH v1 2/2] virtio: don't allocate vqs when names[i] = NULL

2018-12-27 Thread Wei Wang

Some vqs may not need to be allocated when their related feature bits
are disabled. So callers may pass in such vqs with "names = NULL".
Then we skip such vq allocations.

Signed-off-by: Wei Wang 
---
 drivers/misc/mic/vop/vop_main.c|  9 +++--
 drivers/remoteproc/remoteproc_virtio.c |  9 +++--
 drivers/s390/virtio/virtio_ccw.c   | 12 +---
 drivers/virtio/virtio_mmio.c   |  9 +++--
 4 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 6b212c8..2bfa3a9 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -394,16 +394,21 @@ static int vop_find_vqs(struct virtio_device *dev, 
unsigned nvqs,
struct _vop_vdev *vdev = to_vopvdev(dev);
struct vop_device *vpdev = vdev->vpdev;
struct mic_device_ctrl __iomem *dc = vdev->dc;
-   int i, err, retry;
+   int i, err, retry, queue_idx = 0;
 
/* We must have this many virtqueues. */
if (nvqs > ioread8(>desc->num_vq))
return -ENOENT;
 
for (i = 0; i < nvqs; ++i) {
+   if (!names[i]) {
+   vqs[i] = NULL;
+   continue;
+   }
+
dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
__func__, i, names[i]);
-   vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+   vqs[i] = vop_find_vq(dev, queue_idx++, callbacks[i], names[i],
 ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
diff --git a/drivers/remoteproc/remoteproc_virtio.c 
b/drivers/remoteproc/remoteproc_virtio.c
index 183fc42..2d7cd344 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -153,10 +153,15 @@ static int rproc_virtio_find_vqs(struct virtio_device 
*vdev, unsigned int nvqs,
 const bool * ctx,
 struct irq_affinity *desc)
 {
-   int i, ret;
+   int i, ret, queue_idx = 0;
 
for (i = 0; i < nvqs; ++i) {
-   vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i],
+   if (!names[i]) {
+   vqs[i] = NULL;
+   continue;
+   }
+
+   vqs[i] = rp_find_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
ret = PTR_ERR(vqs[i]);
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index fc9dbad..ae1d56d 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -635,7 +635,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, 
unsigned nvqs,
 {
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
unsigned long *indicatorp = NULL;
-   int ret, i;
+   int ret, i, queue_idx = 0;
struct ccw1 *ccw;
 
ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
@@ -643,8 +643,14 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, 
unsigned nvqs,
return -ENOMEM;
 
for (i = 0; i < nvqs; ++i) {
-   vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i],
-ctx ? ctx[i] : false, ccw);
+   if (!names[i]) {
+   vqs[i] = NULL;
+   continue;
+   }
+
+   vqs[i] = virtio_ccw_setup_vq(vdev, queue_idx++, callbacks[i],
+names[i], ctx ? ctx[i] : false,
+ccw);
if (IS_ERR(vqs[i])) {
ret = PTR_ERR(vqs[i]);
vqs[i] = NULL;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 4cd9ea5..d9dd0f78 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -468,7 +468,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned 
nvqs,
 {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
-   int i, err;
+   int i, err, queue_idx = 0;
 
err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(>dev), vm_dev);
@@ -476,7 +476,12 @@ static int vm_find_vqs(struct virtio_device *vdev, 
unsigned nvqs,
return err;
 
for (i = 0; i < nvqs; ++i) {
-   vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i],
+   if (!names[i]) {
+   vqs[i] = NULL;
+   continue;
+   }
+
+   vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 ctx ? ctx[i] : false);
if

[PATCH v1 0/2] Virtio: fix some vq allocation issues

2018-12-27 Thread Wei Wang

Some vqs don't need to be allocated when the related feature bits are
disabled. Callers notice the vq allocation layer by setting the related
names[i] to be NULL.

This patch series fixes the find_vqs implementations to handle this case.

Wei Wang (2):
  virtio_pci: use queue idx instead of array idx to set up the vq
  virtio: don't allocate vqs when names[i] = NULL

 drivers/misc/mic/vop/vop_main.c|  9 +++--
 drivers/remoteproc/remoteproc_virtio.c |  9 +++--
 drivers/s390/virtio/virtio_ccw.c   | 12 +---
 drivers/virtio/virtio_mmio.c   |  9 +++--
 drivers/virtio/virtio_pci_common.c |  8 
 5 files changed, 34 insertions(+), 13 deletions(-)

-- 
2.7.4

[PATCHv3 2/2] x86/kdump: bugfix, make the behavior of crashkernel=X consistent with kaslr

2018-12-27 Thread Pingfan Liu

Customer reported a bug on a high end server with many pcie devices, where
kernel bootup with crashkernel=384M, and kaslr is enabled. Even
though we still see much memory under 896 MB, the finding still failed
intermittently. Because currently we can only find region under 896 MB,
if w/0 ',high' specified. Then KASLR breaks 896 MB into several parts
randomly, and crashkernel reservation need be aligned to 128 MB, that's
why failure is found. It raises confusion to the end user that sometimes
crashkernel=X works while sometimes fails.
If want to make it succeed, customer can change kernel option to
"crashkernel=384M, high". Just this give "crashkernel=xx@yy" a very
limited space to behave even though its grammer looks more generic.
And we can't answer questions raised from customer that confidently:
1) why it doesn't succeed to reserve 896 MB;
2) what's wrong with memory region under 4G;
3) why I have to add ',high', I only require 384 MB, not 3840 MB.

This patch simplifies the method suggested in the mail [1]. It just goes
bottom-up to find a candidate region for crashkernel. The bottom-up may be
better compatible with the old reservation style, i.e. still want to get
memory region from 896 MB firstly, then [896 MB, 4G], finally above 4G.

There is one trivial thing about the compatibility with old kexec-tools:
if the reserved region is above 896M, then old tool will fail to load
bzImage. But without this patch, the old tool also fail since there is no
memory below 896M can be reserved for crashkernel.

[1]: http://lists.infradead.org/pipermail/kexec/2017-October/019571.html
Signed-off-by: Pingfan Liu 
Cc: Tang Chen 
Cc: "Rafael J. Wysocki" 
Cc: Len Brown 
Cc: Andrew Morton 
Cc: Mike Rapoport 
Cc: Michal Hocko 
Cc: Jonathan Corbet 
Cc: Yaowei Bai 
Cc: Pavel Tatashin 
Cc: Nicholas Piggin 
Cc: Naoya Horiguchi 
Cc: Daniel Vacek 
Cc: Mathieu Malaterre 
Cc: Stefan Agner 
Cc: Dave Young 
Cc: Baoquan He 
Cc: ying...@kernel.org,
Cc: vgo...@redhat.com
Cc: linux-kernel@vger.kernel.org
---
 arch/x86/kernel/setup.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d494b9b..165f9c3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -541,15 +541,18 @@ static void __init reserve_crashkernel(void)
 
/* 0 means: find the address automatically */
if (crash_base <= 0) {
+   bool bottom_up = memblock_bottom_up();
+
+   memblock_set_bottom_up(true);
/*
 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
 * as old kexec-tools loads bzImage below that, unless
 * "crashkernel=size[KMG],high" is specified.
 */
crash_base = memblock_find_in_range(CRASH_ALIGN,
-   high ? CRASH_ADDR_HIGH_MAX
-: CRASH_ADDR_LOW_MAX,
-   crash_size, CRASH_ALIGN);
+   (max_pfn * PAGE_SIZE), crash_size, CRASH_ALIGN);
+   memblock_set_bottom_up(bottom_up);
+
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable 
area found.\n");
return;
-- 
2.7.4

[PATCH v1 1/2] virtio_pci: use queue idx instead of array idx to set up the vq

2018-12-27 Thread Wei Wang

When find_vqs, there will be no vq[i] allocation if its corresponding
names[i] is NULL. For example, the caller may pass in names[i] (i=4)
with names[2] being NULL because the related feature bit is turned off,
so technically there are 3 queues on the device, and name[4] should
correspond to the 3rd queue on the device.

So we use queue_idx as the queue index, which is increased only when the
queue exists.

Signed-off-by: Wei Wang 
---
 drivers/virtio/virtio_pci_common.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index 465a6f5..d0584c0 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -285,7 +285,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, 
unsigned nvqs,
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
-   int i, err, nvectors, allocated_vectors;
+   int i, err, nvectors, allocated_vectors, queue_idx = 0;
 
vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
if (!vp_dev->vqs)
@@ -321,7 +321,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, 
unsigned nvqs,
msix_vec = allocated_vectors++;
else
msix_vec = VP_MSIX_VQ_VECTOR;
-   vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+   vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 ctx ? ctx[i] : false,
 msix_vec);
if (IS_ERR(vqs[i])) {
@@ -356,7 +356,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, 
unsigned nvqs,
const char * const names[], const bool *ctx)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-   int i, err;
+   int i, err, queue_idx = 0;
 
vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
if (!vp_dev->vqs)
@@ -374,7 +374,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, 
unsigned nvqs,
vqs[i] = NULL;
continue;
}
-   vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+   vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
 ctx ? ctx[i] : false,
 VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
-- 
2.7.4

[PATCHv3 0/2] mm/memblock: reuse memblock bottom-up allocation style

2018-12-27 Thread Pingfan Liu

The bottom-up allocation style is introduced to cope with movable_node,
where the limit inferior of allocation starts from kernel's end, due to
lack of knowledge of memory hotplug info at this early time.
Beside this original aim, 'kexec -c' prefers to reuse this style to alloc mem
at lower address, since if the reserved region is beyond 4G, then it requires
extra mem (default is 16M) for swiotlb. But at this time hotplug info has been
got, the limit inferior can be extend to 0, which is done by this series

Cc: Tang Chen 
Cc: "Rafael J. Wysocki" 
Cc: Len Brown 
Cc: Andrew Morton 
Cc: Mike Rapoport 
Cc: Michal Hocko 
Cc: Jonathan Corbet 
Cc: Yaowei Bai 
Cc: Pavel Tatashin 
Cc: Nicholas Piggin 
Cc: Naoya Horiguchi 
Cc: Daniel Vacek 
Cc: Mathieu Malaterre 
Cc: Stefan Agner 
Cc: Dave Young 
Cc: Baoquan He 
Cc: ying...@kernel.org,
Cc: vgo...@redhat.com
Cc: linux-kernel@vger.kernel.org

Pingfan Liu (2):
  mm/memblock: extend the limit inferior of bottom-up after parsing
hotplug attr
  x86/kdump: bugfix, make the behavior of crashkernel=X consistent with
kaslr

 arch/x86/kernel/setup.c  |  9 +---
 drivers/acpi/numa.c  |  4 
 include/linux/memblock.h |  1 +
 mm/memblock.c| 58 +---
 4 files changed, 46 insertions(+), 26 deletions(-)

-- 
2.7.4

[PATCHv3 1/2] mm/memblock: extend the limit inferior of bottom-up after parsing hotplug attr

2018-12-27 Thread Pingfan Liu

The bottom-up allocation style is introduced to cope with movable_node,
where the limit inferior of allocation starts from kernel's end, due to
lack of knowledge of memory hotplug info at this early time. But if later,
hotplug info has been got, the limit inferior can be extend to 0.
'kexec -c' prefers to reuse this style to alloc mem at lower address,
since if the reserved region is beyond 4G, then it requires extra mem
(default is 16M) for swiotlb.

Signed-off-by: Pingfan Liu 
Cc: Tang Chen 
Cc: "Rafael J. Wysocki" 
Cc: Len Brown 
Cc: Andrew Morton 
Cc: Mike Rapoport 
Cc: Michal Hocko 
Cc: Jonathan Corbet 
Cc: Yaowei Bai 
Cc: Pavel Tatashin 
Cc: Nicholas Piggin 
Cc: Naoya Horiguchi 
Cc: Daniel Vacek 
Cc: Mathieu Malaterre 
Cc: Stefan Agner 
Cc: Dave Young 
Cc: Baoquan He 
Cc: ying...@kernel.org,
Cc: vgo...@redhat.com
Cc: linux-kernel@vger.kernel.org
---
 drivers/acpi/numa.c  |  4 
 include/linux/memblock.h |  1 +
 mm/memblock.c| 58 +---
 3 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 2746994..3eea4e4 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -462,6 +462,10 @@ int __init acpi_numa_init(void)
 
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity, 0);
+
+#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
+   mark_mem_hotplug_parsed();
+#endif
}
 
/* SLIT: System Locality Information Table */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index aee299a..d89ed9e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -125,6 +125,7 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 bool memblock_overlaps_region(struct memblock_type *type,
  phys_addr_t base, phys_addr_t size);
+void mark_mem_hotplug_parsed(void);
 int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index 81ae63c..a3f5e46 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -231,6 +231,12 @@ __memblock_find_range_top_down(phys_addr_t start, 
phys_addr_t end,
return 0;
 }
 
+static bool mem_hotmovable_parsed __initdata_memblock;
+void __init_memblock mark_mem_hotplug_parsed(void)
+{
+   mem_hotmovable_parsed = true;
+}
+
 /**
  * memblock_find_in_range_node - find free area in given range and node
  * @size: size of free area to find
@@ -259,7 +265,7 @@ phys_addr_t __init_memblock 
memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid,
enum memblock_flags flags)
 {
-   phys_addr_t kernel_end, ret;
+   phys_addr_t kernel_end, ret = 0;
 
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
@@ -270,34 +276,40 @@ phys_addr_t __init_memblock 
memblock_find_in_range_node(phys_addr_t size,
end = max(start, end);
kernel_end = __pa_symbol(_end);
 
-   /*
-* try bottom-up allocation only when bottom-up mode
-* is set and @end is above the kernel image.
-*/
-   if (memblock_bottom_up() && end > kernel_end) {
-   phys_addr_t bottom_up_start;
+   if (memblock_bottom_up()) {
+   phys_addr_t bottom_up_start = start;
 
-   /* make sure we will allocate above the kernel */
-   bottom_up_start = max(start, kernel_end);
-
-   /* ok, try bottom-up allocation first */
-   ret = __memblock_find_range_bottom_up(bottom_up_start, end,
- size, align, nid, flags);
-   if (ret)
+   if (mem_hotmovable_parsed) {
+   ret = __memblock_find_range_bottom_up(
+   bottom_up_start, end, size, align, nid,
+   flags);
return ret;
 
/*
-* we always limit bottom-up allocation above the kernel,
-* but top-down allocation doesn't have the limit, so
-* retrying top-down allocation may succeed when bottom-up
-* allocation failed.
-*
-* bottom-up allocation is expected to be fail very rarely,
-* so we use WARN_ONCE() here to see the stack trace if
-* fail happens.
+* if mem hotplug info is not parsed yet, try bottom-up
+* allocation with @end above the kernel image.
 */
-   WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
+   } else if (!mem_hotmovable_parsed && end > kernel_end) {
+

Re: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% regression

2018-12-27 Thread kemi




On 2018/12/28 上午10:55, Waiman Long wrote:
> On 12/27/2018 08:31 PM, Wang, Kemi wrote:
>> Hi, Waiman
>>Did you post that patch? Let's see if it helps.
> 
> I did post the patch a while ago. I will need to rebase it to a new
> baseline. Will do that in a week or 2.
> 

OK．I　will take a look at it and try to rebase it on shi's patch to see if 
the regression can be fixed.
May I know where I can get that patch, I didn't find it in my inbox. Thanks

> -Longman
> 
>>
>> -Original Message-
>> From: LKP [mailto:lkp-boun...@lists.01.org] On Behalf Of Waiman Long
>> Sent: Tuesday, November 6, 2018 6:40 AM
>> To: Linus Torvalds ; vba...@suse.cz; 
>> Davidlohr Bueso 
>> Cc: yang@linux.alibaba.com; Linux Kernel Mailing List 
>> ; Matthew Wilcox ; 
>> mho...@kernel.org; Colin King ; Andrew Morton 
>> ; lduf...@linux.vnet.ibm.com; l...@01.org; 
>> kirill.shute...@linux.intel.com
>> Subject: Re: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% 
>> regression
>>
>> On 11/05/2018 05:14 PM, Linus Torvalds wrote:
>>> On Mon, Nov 5, 2018 at 12:12 PM Vlastimil Babka  wrote:
 I didn't spot an obvious mistake in the patch itself, so it looks
 like some bad interaction between scheduler and the mmap downgrade?
>>> I'm thinking it's RWSEM_SPIN_ON_OWNER that ends up being confused by
>>> the downgrade.
>>>
>>> It looks like the benchmark used to be basically CPU-bound, at about
>>> 800% CPU, and now it's somewhere in the 200% CPU region:
>>>
>>>   will-it-scale.time.percent_of_cpu_this_job_got
>>>
>>>   800 
>>> +-+---+
>>>   |.+.+.+.+.+.+.+.  .+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+..+.+.+.+. 
>>> .+.+.+.|
>>>   700 +-+ +.+   
>>> |
>>>   | 
>>> |
>>>   600 +-+   
>>> |
>>>   | 
>>> |
>>>   500 +-+   
>>> |
>>>   | 
>>> |
>>>   400 +-+   
>>> |
>>>   | 
>>> |
>>>   300 +-+   
>>> |
>>>   | 
>>> |
>>>   200 O-O O O O OO  
>>> |
>>>   |   O O O  O O O O   O O O O O O O O O O O
>>> |
>>>   100 
>>> +-+---+
>>>
>>> which sounds like the downgrade really messes with the "spin waiting
>>> for lock" logic.
>>>
>>> I'm thinking it's the "wake up waiter" logic that has some bad
>>> interaction with spinning, and breaks that whole optimization.
>>>
>>> Adding Waiman and Davidlohr to the participants, because they seem to
>>> be the obvious experts in this area.
>>>
>>> Linus
>> Optimistic spinning on rwsem is done only on writers spinning on a
>> writer-owned rwsem. If a write-lock is downgraded to a read-lock, all
>> the spinning waiters will quit. That may explain the drop in cpu
>> utilization. I do have a old patch that enable a certain amount of
>> reader spinning which may help the situation. I can rebase that and send
>> it out for review if people have interest.
>>
>> Cheers,
>> Longman
>>
>>
>> ___
>> LKP mailing list
>> l...@lists.01.org
>> https://lists.01.org/mailman/listinfo/lkp
> 
>

Re: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% regression

2018-12-27 Thread Waiman Long

On 12/27/2018 08:31 PM, Wang, Kemi wrote:
> Hi, Waiman
>Did you post that patch? Let's see if it helps.

I did post the patch a while ago. I will need to rebase it to a new
baseline. Will do that in a week or 2.

-Longman

>
> -Original Message-
> From: LKP [mailto:lkp-boun...@lists.01.org] On Behalf Of Waiman Long
> Sent: Tuesday, November 6, 2018 6:40 AM
> To: Linus Torvalds ; vba...@suse.cz; Davidlohr 
> Bueso 
> Cc: yang@linux.alibaba.com; Linux Kernel Mailing List 
> ; Matthew Wilcox ; 
> mho...@kernel.org; Colin King ; Andrew Morton 
> ; lduf...@linux.vnet.ibm.com; l...@01.org; 
> kirill.shute...@linux.intel.com
> Subject: Re: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% 
> regression
>
> On 11/05/2018 05:14 PM, Linus Torvalds wrote:
>> On Mon, Nov 5, 2018 at 12:12 PM Vlastimil Babka  wrote:
>>> I didn't spot an obvious mistake in the patch itself, so it looks
>>> like some bad interaction between scheduler and the mmap downgrade?
>> I'm thinking it's RWSEM_SPIN_ON_OWNER that ends up being confused by
>> the downgrade.
>>
>> It looks like the benchmark used to be basically CPU-bound, at about
>> 800% CPU, and now it's somewhere in the 200% CPU region:
>>
>>   will-it-scale.time.percent_of_cpu_this_job_got
>>
>>   800 +-+---+
>>   |.+.+.+.+.+.+.+.  .+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+..+.+.+.+. .+.+.+.|
>>   700 +-+ +.+   |
>>   | |
>>   600 +-+   |
>>   | |
>>   500 +-+   |
>>   | |
>>   400 +-+   |
>>   | |
>>   300 +-+   |
>>   | |
>>   200 O-O O O O OO  |
>>   |   O O O  O O O O   O O O O O O O O O O O|
>>   100 +-+---+
>>
>> which sounds like the downgrade really messes with the "spin waiting
>> for lock" logic.
>>
>> I'm thinking it's the "wake up waiter" logic that has some bad
>> interaction with spinning, and breaks that whole optimization.
>>
>> Adding Waiman and Davidlohr to the participants, because they seem to
>> be the obvious experts in this area.
>>
>> Linus
> Optimistic spinning on rwsem is done only on writers spinning on a
> writer-owned rwsem. If a write-lock is downgraded to a read-lock, all
> the spinning waiters will quit. That may explain the drop in cpu
> utilization. I do have a old patch that enable a certain amount of
> reader spinning which may help the situation. I can rebase that and send
> it out for review if people have interest.
>
> Cheers,
> Longman
>
>
> ___
> LKP mailing list
> l...@lists.01.org
> https://lists.01.org/mailman/listinfo/lkp

Re: [PATCH] mm, swap: Fix swapoff with KSM pages

2018-12-27 Thread Andrew Morton

On Wed, 26 Dec 2018 13:15:22 +0800 Huang Ying  wrote:

> KSM pages may be mapped to the multiple VMAs that cannot be reached
> from one anon_vma.  So during swapin, a new copy of the page need to
> be generated if a different anon_vma is needed, please refer to
> comments of ksm_might_need_to_copy() for details.
> 
> During swapoff, unuse_vma() uses anon_vma (if available) to locate VMA
> and virtual address mapped to the page, so not all mappings to a
> swapped out KSM page could be found.  So in try_to_unuse(), even if
> the swap count of a swap entry isn't zero, the page needs to be
> deleted from swap cache, so that, in the next round a new page could
> be allocated and swapin for the other mappings of the swapped out KSM
> page.
> 
> But this contradicts with the THP swap support.  Where the THP could
> be deleted from swap cache only after the swap count of every swap
> entry in the huge swap cluster backing the THP has reach 0.  So
> try_to_unuse() is changed in commit e07098294adf ("mm, THP, swap:
> support to reclaim swap space for THP swapped out") to check that
> before delete a page from swap cache, but this has broken KSM swapoff
> too.
> 
> Fortunately, KSM is for the normal pages only, so the original
> behavior for KSM pages could be restored easily via checking
> PageTransCompound().  That is how this patch works.
> 
> ...
>
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2197,7 +2197,8 @@ int try_to_unuse(unsigned int type, bool frontswap,
>*/
>   if (PageSwapCache(page) &&
>   likely(page_private(page) == entry.val) &&
> - !page_swapped(page))
> + (!PageTransCompound(page) ||
> +  !swap_page_trans_huge_swapped(si, entry)))
>   delete_from_swap_cache(compound_head(page));
>  

The patch "mm, swap: rid swapoff of quadratic complexity" changes this
code significantly.  There are a few issues with that patch so I'll
drop it for now.

Vineeth, please ensure that future versions retain the above fix,
thanks.

[PATCH v3] sock: Make sock->sk_stamp thread-safe

2018-12-27 Thread Deepa Dinamani

Al Viro mentioned (Message-ID
<20170626041334.gz10...@zeniv.linux.org.uk>)
that there is probably a race condition
lurking in accesses of sk_stamp on 32-bit machines.

sock->sk_stamp is of type ktime_t which is always an s64.
On a 32 bit architecture, we might run into situations of
unsafe access as the access to the field becomes non atomic.

Use seqlocks for synchronization.
This allows us to avoid using spinlocks for readers as
readers do not need mutual exclusion.

Another approach to solve this is to require sk_lock for all
modifications of the timestamps. The current approach allows
for timestamps to have their own lock: sk_stamp_lock.
This allows for the patch to not compete with already
existing critical sections, and side effects are limited
to the paths in the patch.

The addition of the new field maintains the data locality
optimizations from
commit 9115e8cd2a0c ("net: reorganize struct sock for better data
locality")

Note that all the instances of the sk_stamp accesses
are either through the ioctl or the syscall recvmsg.

Signed-off-by: Deepa Dinamani 
---
Changes since v2:
* added ifdef as per eric's request
Changes since v1:
* fixed sunrpc sk_stamp update

 include/net/sock.h   | 38 +++---
 net/compat.c | 15 +--
 net/core/sock.c  | 15 ++-
 net/sunrpc/svcsock.c |  2 +-
 4 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index f665d74ae509..e144c071c93f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -298,6 +298,7 @@ struct sock_common {
   *@sk_filter: socket filtering instructions
   *@sk_timer: sock cleanup timer
   *@sk_stamp: time stamp of last packet received
+  *@sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
   *@sk_tsflags: SO_TIMESTAMPING socket options
   *@sk_tskey: counter to disambiguate concurrent tstamp requests
   *@sk_zckey: counter to order MSG_ZEROCOPY notifications
@@ -474,6 +475,9 @@ struct sock {
const struct cred   *sk_peer_cred;
longsk_rcvtimeo;
ktime_t sk_stamp;
+#if BITS_PER_LONG==32
+   seqlock_t   sk_stamp_seq;
+#endif
u16 sk_tsflags;
u8  sk_shutdown;
u32 sk_tskey;
@@ -2287,6 +2291,34 @@ static inline void sk_drops_add(struct sock *sk, const 
struct sk_buff *skb)
atomic_add(segs, >sk_drops);
 }
 
+static inline ktime_t sock_read_timestamp(struct sock *sk)
+{
+#if BITS_PER_LONG==32
+   unsigned int seq;
+   ktime_t kt;
+
+   do {
+   seq = read_seqbegin(>sk_stamp_seq);
+   kt = sk->sk_stamp;
+   } while (read_seqretry(>sk_stamp_seq, seq));
+
+   return kt;
+#else
+   return sk->sk_stamp;
+#endif
+}
+
+static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
+{
+#if BITS_PER_LONG==32
+   write_seqlock(>sk_stamp_seq);
+   sk->sk_stamp = kt;
+   write_sequnlock(>sk_stamp_seq);
+#else
+   sk->sk_stamp = kt;
+#endif
+}
+
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
   struct sk_buff *skb);
 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
@@ -2311,7 +2343,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 
struct sk_buff *skb)
 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
-   sk->sk_stamp = kt;
+   sock_write_timestamp(sk, kt);
 
if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
__sock_recv_wifi_status(msg, sk, skb);
@@ -2332,9 +2364,9 @@ static inline void sock_recv_ts_and_drops(struct msghdr 
*msg, struct sock *sk,
if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
__sock_recv_ts_and_drops(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
-   sk->sk_stamp = skb->tstamp;
+   sock_write_timestamp(sk, skb->tstamp);
else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP))
-   sk->sk_stamp = 0;
+   sock_write_timestamp(sk, 0);
 }
 
 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
diff --git a/net/compat.c b/net/compat.c
index 47a614b370cd..d1f3a8a0b3ef 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -467,12 +467,14 @@ int compat_sock_get_timestamp(struct sock *sk, struct 
timeval __user *userstamp)
ctv = (struct compat_timeval __user *) userstamp;
err = -ENOENT;
sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-   tv = ktime_to_timeval(sk->sk_stamp);
+   tv = ktime_to_timeval(sock_read_timestamp(sk));
+
if (tv.tv_sec == -1)
return err;
if (tv.tv_sec == 0) {
-   sk->sk_stamp = ktime_get_real();
-   tv =

Re: bug report: hugetlbfs: use i_mmap_rwsem for more pmd sharing, synchronization

2018-12-27 Thread Andrew Morton

On Thu, 27 Dec 2018 11:24:31 -0800 Mike Kravetz  wrote:

> On 12/27/18 3:44 AM, Colin Ian King wrote:
> > Hi,
> > 
> > Static analysis with CoverityScan on linux-next detected a potential
> > null pointer dereference with the following commit:
> > 
> > From d8a1051ed4ba55679ef24e838a1942c9c40f0a14 Mon Sep 17 00:00:00 2001
> > From: Mike Kravetz 
> > Date: Sat, 22 Dec 2018 10:55:57 +1100
> > Subject: [PATCH] hugetlbfs: use i_mmap_rwsem for more pmd sharing
> > 
> > The earlier check implies that "mapping" may be a null pointer:
> > 
> > var_compare_op: Comparing mapping to null implies that mapping might be
> > null.
> > 
> > 1008if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
> > 1009mapping_cap_writeback_dirty(mapping)) {
> > 
> > ..however later "mapper" is dereferenced when it may be potentially null:
> > 
> > 1034/*
> > 1035 * For hugetlb pages, try_to_unmap could potentially
> > call
> > 1036 * huge_pmd_unshare.  Because of this, take semaphore in
> > 1037 * write mode here and set TTU_RMAP_LOCKED to
> > indicate we
> > 1038 * have taken the lock at this higer level.
> > 1039 */
> > CID 1476097 (#1 of 1): Dereference after null check (FORWARD_NULL)
> > 
> > var_deref_model: Passing null pointer mapping to
> > i_mmap_lock_write, which dereferences it.
> > 
> > 1040i_mmap_lock_write(mapping);
> > 1041unmap_success = try_to_unmap(hpage,
> > ttu|TTU_RMAP_LOCKED);
> > 1042i_mmap_unlock_write(mapping);
> > 
> 
> Thanks for the report.
> 
> The 'good news' is that mapping can not be null in the code path above.
> The reasons are:
> - The page is locked upon entry to the routine
> - Earlier in the routine there is the check:
>   if (!page_mapped(hpage))
>   return true;
>   For huge pages (which are processed in the else clause above), page_mapped
>   implies page->mapping != null.
> 
> However, the routine hwpoison_user_mappings handles all page types.  The
> page_mapped check is actually there to check for pages in the swap cache.
> It is just coincidence that it also implies mapping != null for huge pages.
> 
> It would be better to make an explicit check for mapping != null before
> calling i_mmap_lock_write/try_to_unmap.  In this way, unrelated changes to
> code above will not potentially lead to the possibility of mapping == null.
> 
> I'm not sure what is the best way to handle this.  Below is an updated version
> of the patch sent to Andrew.  I can also provide a simple patch to the patch
> if that is easier.
> 

Below is the delta.  Please check it.  It seems to do more than the
above implies.

Also, I have notes here that 

hugetlbfs-use-i_mmap_rwsem-for-more-pmd-sharing-synchronization.patch
and
hugetlbfs-use-i_mmap_rwsem-to-fix-page-fault-truncate-race.patch

have additional updates pending.  Due to emails such as

http://lkml.kernel.org/r/849f5202-2200-265f-7769-8363053e8...@oracle.com
http://lkml.kernel.org/r/732c0b7d-5a4e-97a8-9677-30f352089...@oracle.com
http://lkml.kernel.org/r/6b91dd42-b903-1f6c-729a-bd9f51273...@oracle.com

What's the status, please?


From: Mike Kravetz 
Subject: hugetlbfs-use-i_mmap_rwsem-for-more-pmd-sharing-synchronization-fix

It would be better to make an explicit check for mapping != null before
calling i_mmap_lock_write/try_to_unmap.  In this way, unrelated changes to
code above will not potentially lead to the possibility of mapping ==
null.

Signed-off-by: Mike Kravetz 
Cc: Michal Hocko 
Cc: Hugh Dickins 
Cc: Naoya Horiguchi 
Cc: "Aneesh Kumar K . V" 
Cc: Andrea Arcangeli 
Cc: "Kirill A . Shutemov" 
Cc: Davidlohr Bueso 
Cc: Prakash Sangappa 
Cc: Colin Ian King 
Signed-off-by: Andrew Morton 
---


--- 
a/mm/hugetlb.c~hugetlbfs-use-i_mmap_rwsem-for-more-pmd-sharing-synchronization-fix
+++ a/mm/hugetlb.c
@@ -3250,6 +3250,14 @@ int copy_hugetlb_page_range(struct mm_st
mmu_notifier_range_init(, src, vma->vm_start,
vma->vm_end, MMU_NOTIFY_CLEAR);
mmu_notifier_invalidate_range_start();
+   } else {
+   /*
+* For shared mappings i_mmap_rwsem must be held to call
+* huge_pte_alloc, otherwise the returned ptep could go
+* away if part of a shared pmd and another thread calls
+* huge_pmd_unshare.
+*/
+   i_mmap_lock_read(mapping);
}
 
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
@@ -3259,18 +3267,8 @@ int copy_hugetlb_page_range(struct mm_st
if (!src_pte)
continue;
 
-   /*
-* i_mmap_rwsem must be held to call huge_pte_alloc.
-* Continue to hold until finished  with dst_pte, otherwise
-* it could go away if part of a shared pmd.
-*
-*

[PATCH -next] mm: Mark 'stack_vm_area' with __maybe_unused

2018-12-27 Thread YueHaibing

Fixes gcc '-Wunused-but-set-variable' warning when
CONFIG_VMAP_STACK is not set:

kernel/fork.c: In function 'dup_task_struct':
kernel/fork.c:843:20: warning:
 variable 'stack_vm_area' set but not used [-Wunused-but-set-variable]

Signed-off-by: YueHaibing 
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index e2a5156..b38c392 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -840,7 +840,7 @@ static struct task_struct *dup_task_struct(struct 
task_struct *orig, int node)
 {
struct task_struct *tsk;
unsigned long *stack;
-   struct vm_struct *stack_vm_area;
+   struct vm_struct *stack_vm_area __maybe_unused;
int err;
 
if (node == NUMA_NO_NODE)

Re: [RFC][PATCH v2 08/21] mm: introduce and export pgdat peer_node

2018-12-27 Thread Fengguang Wu


On Thu, Dec 27, 2018 at 08:07:26PM +, Christopher Lameter wrote:

On Wed, 26 Dec 2018, Fengguang Wu wrote:


Each CPU socket can have 1 DRAM and 1 PMEM node, we call them "peer nodes".
Migration between DRAM and PMEM will by default happen between peer nodes.


Which one does numa_node_id() point to? I guess that is the DRAM node and


Yes. In our test machine, PMEM nodes show up as memory-only nodes, so
numa_node_id() points to DRAM node.

Here is numactl --hardware output on a 2S test machine.

available: 4 nodes (0-3)
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
25 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
node 0 size: 257712 MB
node 0 free: 178251 MB
node 1 cpus: 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 
48 49 50 51 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 
100 101 102
103
node 1 size: 258038 MB
node 1 free: 174796 MB
node 2 cpus:
node 2 size: 503999 MB
node 2 free: 438349 MB
node 3 cpus:
node 3 size: 503999 MB
node 3 free: 438349 MB
node distances:
node   0   1   2   3
 0:  10  21  20  20
 1:  21  10  20  20
 2:  20  20  10  20
 3:  20  20  20  10


then we fall back to the PMEM node?


Fall back is possible but not the scope of this patchset. We modified
fallback zonelists in patch 10 to simplify PMEM usage. With that
patch, page allocations on DRAM nodes won't fallback to PMEM nodes.
Instead, PMEM nodes will mainly be used by explicit numactl placement
and as migration target. When there is memory pressure in DRAM node,
LRU cold pages there will be demote migrated to its peer PMEM node on
the same socket by patch 20.

Thanks,
Fengguang

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Xie XiuQi

Hi Tejun,

On 2018/12/28 10:02, Tejun Heo wrote:
> On Thu, Dec 27, 2018 at 05:53:52PM -0800, Tejun Heo wrote:
>> Vincent knows that part way better than me but I think the safest way
>> would be doing the optimization removal iff tmp_alone_branch is
>> already pointing to leaf_cfs_rq_list.  IIUC, it's pointing to
>> something else only while a branch is being built and deferring
>> optimization removal by an avg update cycle isn't gonna make any
>> difference anyway.
> 
> So, something like the following.  Xie, can you see whether the
> following patch resolves the problem?

Zhipeng is preparing to test it, thanks.

> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index d1907506318a..88b9118b5191 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7698,7 +7698,8 @@ static void update_blocked_averages(int cpu)
>* There can be a lot of idle CPU cgroups.  Don't let fully
>* decayed cfs_rqs linger on the list.
>*/
> - if (cfs_rq_is_decayed(cfs_rq))
> + if (cfs_rq_is_decayed(cfs_rq) &&
> + rq->tmp_alone_branch == >leaf_cfs_rq_list)
>   list_del_leaf_cfs_rq(cfs_rq);
>  
>   /* Don't need periodic decay once load/util_avg are null */
> 
> .
> 

-- 
Thanks,
Xie XiuQi

Re: d_off field in struct dirent and 32-on-64 emulation

2018-12-27 Thread Dmitry V. Levin

On Thu, Dec 27, 2018 at 06:18:19PM +0100, Florian Weimer wrote:
> We have a bit of an interesting problem with respect to the d_off
> field in struct dirent.
> 
> When running a 64-bit kernel on certain file systems, notably ext4,
> this field uses the full 63 bits even for small directories (strace -v
> output, wrapped here for readability):
> 
> getdents(3, [
>   {d_ino=1494304, d_off=3901177228673045825, d_reclen=40, 
> d_name="authorized_keys", d_type=DT_REG},
>   {d_ino=1494277, d_off=7491915799041650922, d_reclen=24, d_name=".", 
> d_type=DT_DIR},
>   {d_ino=1314655, d_off=9223372036854775807, d_reclen=24, d_name="..", 
> d_type=DT_DIR}
> ], 32768) = 88
> 
> When running in 32-bit compat mode, this value is somehow truncated to
> 31 bits, for both the getdents and the getdents64 (!) system call (at
> least on i386).

Why getdents64 system call is affected by this truncation,
isn't it a kernel bug that has to be fixed in the kernel instead?


-- 
ldv


signature.asc
Description: PGP signature

[GIT PULL] cgroup changes for v4.21-rc1

2018-12-27 Thread Tejun Heo

Hello, Linus.

cgroup changes for v4.21.

* Waiman's cgroup2 cpuset support has been finally merged closing one
  of the last remaining feature gaps.

* cgroup.procs could show non-leader threads when cgroup2 threaded
  mode was used in certain ways.  I forgot to push the fix during the
  last cycle.

Happy new year!

The following changes since commit f2ce1065e767fc7da106a5f5381d1e8f842dc6f4:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2018-11-19 
09:24:04 -0800)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git for-4.21

for you to fetch changes up to 4d71c6f8771a6bccb844244f09831fa4624b22c1:

  Merge branch 'for-4.20-fixes' into for-4.21 (2018-12-27 18:05:30 -0800)


Tejun Heo (4):
  cpuset: Minor cgroup2 interface updates
  cgroup: Add .__DEBUG__. prefix to debug file names
  cgroup: fix CSS_TASK_ITER_PROCS
  Merge branch 'for-4.20-fixes' into for-4.21

Waiman Long (12):
  cpuset: Enable cpuset controller in default hierarchy
  cpuset: Define data structures to support scheduling partition
  cpuset: Simply allocation and freeing of cpumasks
  cpuset: Add new v2 cpuset.sched.partition flag
  cpuset: Add an error state to cpuset.sched.partition
  cpuset: Track cpusets that use parent's effective_cpus
  cpuset: Make CPU hotplug work with partition
  cpuset: Make generate_sched_domains() work with partition
  cpuset: Expose cpus.effective and mems.effective on cgroup v2 root
  cpuset: Use descriptive text when reading/writing cpuset.sched.partition
  cpuset: Add documentation about the new "cpuset.sched.partition" flag
  cpuset: Expose cpuset.cpus.subpartitions with cgroup_debug

Yangtao Li (1):
  cgroup: remove unnecessary unlikely()

YueHaibing (1):
  cpuset: Remove set but not used variable 'cs'

 Documentation/admin-guide/cgroup-v2.rst | 182 +-
 include/linux/cgroup-defs.h |   1 +
 kernel/cgroup/cgroup-internal.h |   2 +
 kernel/cgroup/cgroup.c  |  58 +-
 kernel/cgroup/cpuset.c  | 944 +---
 kernel/cgroup/debug.c   |   4 +-
 6 files changed, 1095 insertions(+), 96 deletions(-)

[PATCH] x86_64: increase stack size for KASAN_EXTRA

2018-12-27 Thread Qian Cai

If the kernel is configured with KASAN_EXTRA, the stack size is
increasted significantly due to enable this option will set
"-fstack-reuse" to "none" in GCC [1]. As the results, it could trigger
stack overrun quite often with 32k stack size compiled using GCC 8. For
example, this reproducer

https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/\
syscalls/madvise/madvise06.c

could trigger a "corrupted stack end detected inside scheduler" very
reliably with CONFIG_SCHED_STACK_END_CHECK enabled.

There are just too many functions that could have a large stack with
KASAN_EXTRA due to large local variables that have been called over and
over again without being able to reuse the stacks. Some noticiable ones
are,

size
7648 shrink_page_list
3584 xfs_rmap_convert
3312 migrate_page_move_mapping
3312 dev_ethtool
3200 migrate_misplaced_transhuge_page
3168 copy_process

There are other 49 functions are over 2k in size while compiling kernel
with "-Wframe-larger-than=" even with a related minimal config on this
machine. Hence, it is too much work to change Makefiles for each object
to compile without "-fsanitize-address-use-after-scope" individually.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23

Although there is a patch in GCC 9 to help the situation, GCC 9 probably
won't be released in a few months and then it probably take another
6-month to 1-year for all major distros to include it as a default.
Hence, the stack usage with KASAN_EXTRA can be revisited again in 2020
when GCC 9 is everywhere. Until then, this patch will help users avoid
stack overrun.

This has already been fixed for arm64 for the same reason via
6e8830674ea (arm64: kasan: Increase stack size for KASAN_EXTRA).

Signed-off-by: Qian Cai 
---
 arch/x86/include/asm/page_64_types.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/include/asm/page_64_types.h 
b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..0ce558a8150d 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -7,7 +7,11 @@
 #endif
 
 #ifdef CONFIG_KASAN
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_STACK_ORDER 2
+#else
 #define KASAN_STACK_ORDER 1
+#endif
 #else
 #define KASAN_STACK_ORDER 0
 #endif
-- 
2.17.2 (Apple Git-113)

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Tejun Heo

On Thu, Dec 27, 2018 at 05:53:52PM -0800, Tejun Heo wrote:
> Vincent knows that part way better than me but I think the safest way
> would be doing the optimization removal iff tmp_alone_branch is
> already pointing to leaf_cfs_rq_list.  IIUC, it's pointing to
> something else only while a branch is being built and deferring
> optimization removal by an avg update cycle isn't gonna make any
> difference anyway.

So, something like the following.  Xie, can you see whether the
following patch resolves the problem?

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d1907506318a..88b9118b5191 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7698,7 +7698,8 @@ static void update_blocked_averages(int cpu)
 * There can be a lot of idle CPU cgroups.  Don't let fully
 * decayed cfs_rqs linger on the list.
 */
-   if (cfs_rq_is_decayed(cfs_rq))
+   if (cfs_rq_is_decayed(cfs_rq) &&
+   rq->tmp_alone_branch == >leaf_cfs_rq_list)
list_del_leaf_cfs_rq(cfs_rq);
 
/* Don't need periodic decay once load/util_avg are null */

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Tejun Heo

Hello,

On Thu, Dec 27, 2018 at 05:36:47PM -0800, Linus Torvalds wrote:
> > Unless I'm totally confused, which is definitely possible, I don't
> > think there's a race condition and the only bug is the
> > tmp_alone_branch pointer getting dangled, which maybe doesn't happen
> > all that much?
> 
> Ahh. That would explain the list corruption. The next
> list_add_leaf_cfs_rq() could try to add to a removed entry.
> 
> How would you reset it? Do something like
> 
>rq->tmp_alone_branch = >leaf_cfs_rq_list;
> 
> for every removal, or make it conditional on it matching the removed entry?

Vincent knows that part way better than me but I think the safest way
would be doing the optimization removal iff tmp_alone_branch is
already pointing to leaf_cfs_rq_list.  IIUC, it's pointing to
something else only while a branch is being built and deferring
optimization removal by an avg update cycle isn't gonna make any
difference anyway.

Thanks.

-- 
tejun

Re: [PATCH] dsa: return error code upstream

2018-12-27 Thread David Miller

From: Florian Fainelli 
Date: Thu, 27 Dec 2018 17:06:58 -0800

> Le 12/27/18 à 4:22 PM, David Miller a écrit :
>> From: Kangjie Lu 
>> Date: Tue, 25 Dec 2018 22:08:18 -0600
>> 
>>> Both bcm_sf2_sw_indir_rw and mdiobus_write_nested could fail, so let's
>>> return their error codes upstream.
>>>
>>> Signed-off-by: Kangjie Lu 
>> 
>> Applied with Subject line adjusted as per Florian's feedback.
>> 
> 
> Thanks, technically bcm_sf2_sw_indir_rw() cannot fail and checking its
> return value in the write (op == 0) case will always lead to returning 0
> anyway. In case this ever changes in the future, we should be more
> future proof with that change anyway.

Ok, thanks for the insight.

Re: [GIT PULL] xfs: updates for v4.21

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Sun, 23 Dec 2018 16:26:49 -0800:

> git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git tags/xfs-4.21-merge-2

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/47a43f2f0ce24bb75e3e4500118000585a3b496a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [GIT PULL] Btrfs updates for 4.21

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Wed, 19 Dec 2018 16:11:04 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-4.21-tag

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/32ee34eddad13cd44ad0cb3e659fe6fd49143b62

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [GIT PULL] File locking changes for v4.21

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Mon, 24 Dec 2018 06:06:16 -0500:

> git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git 
> tags/locks-v4.21-1

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/00c569b567c7f1f0da6162868fd02a9f29411805

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [GIT PULL] iomap: updates for v4.21

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Sun, 23 Dec 2018 16:42:04 -0800:

> git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git tags/iomap-4.21-merge-2

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/bc77789a491cdc6f47e5bbd1d04ddd283d64658b

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [GIT PULL] ext4 updates for 4.21-rc1

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Sun, 23 Dec 2018 23:53:34 -0500:

> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git 
> tags/ext4_for_linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/f6b1495fba0b66cfa05efa0ca2370513b79b45b6

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [GIT PULL] dlm updates for 4.21

2018-12-27 Thread pr-tracker-bot

The pull request you sent on Wed, 19 Dec 2018 11:17:22 -0600:

> git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git dlm-4.21

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/4de3aea385f5a054885bc96ffdb9c7d0ea0c9486

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Linus Torvalds

On Thu, Dec 27, 2018 at 5:15 PM Tejun Heo  wrote:
>
> I'm pretty sure enqueue_entity() *has* to be called with rq lock.
> unthrottle_cfs_rq() is called from tg_set_cfs_bandwidth(),
> distribute_cfs_runtime() and unthrottle_offline_cfs_rqs.  The first
> two grabs the rq_lock just around the calls and the last one has a
> lockdep assert on the rq_lock.  What am I missing?

No, I think you're right, and I just didn't follow things deep enough,
didn't see any rq locking in the loop in unthrottle_offline_cfs_rqs(),
and didn't realize that the rq is locked by the caller.

> > But that still makes me go "how come is this only noticed 18 months
> > after the fact"?
>
> Unless I'm totally confused, which is definitely possible, I don't
> think there's a race condition and the only bug is the
> tmp_alone_branch pointer getting dangled, which maybe doesn't happen
> all that much?

Ahh. That would explain the list corruption. The next
list_add_leaf_cfs_rq() could try to add to a removed entry.

How would you reset it? Do something like

   rq->tmp_alone_branch = >leaf_cfs_rq_list;

for every removal, or make it conditional on it matching the removed entry?

Linus

Re: [PATCH v3 0/2] perf tests: Check for ARM [vectors] page

2018-12-27 Thread Florian Fainelli

Le 12/27/18 à 2:55 AM, Namhyung Kim a écrit :
> Hello,
> 
> On Thu, Dec 20, 2018 at 07:43:35PM -0800, Florian Fainelli wrote:
>> Hi all,
>>
>> I just painfully learned that perf would segfault when
>> CONFIG_KUSER_HELPERS is disabled because it unconditionally makes use of
> 
> Could you please elaborate?

Sure, I was debugging why perf was segfaulting on my systems and saw
that the faulting address was within 0x_ (high vectors); and
because CONFIG_KUSER_HELPERS was not enabled, nothing was mapped at that
address so this was a legitimate crash. This was on a variety of ARMv7A
systems, Cortex-A9, Cortex-A5 etc.

Later on, I found that in tools/arch/arm/include/asm/barrier.h the
barriers are unconditionally defined to make use of the [vectors] page
that the ARM kernel only sets up when CONFIG_KUSER_HELPERS is enabled
and this is the reason for the crash.

Testing for the page itself is pretty harmless if you think we should
make something more robust around checking for HAVE_AUXTRACE_SUPPORT
(which appears to be the specific location making use of barriers), let
me know.

Thanks!

> 
> Thanks,
> Namhyung
> 
> 
>> it. This patch series adds an ARM test for that by leveraging the
>> existing find_vdso_map() function and making it more generic and capable
>> of location any map within /proc/self/maps.
>>
>> Changes in v3:
>>
>> - remove find_vdso_map() call find_map() with VDSO__MAP_NAME
>>
>> Changes in v2:
>>
>> - use strlen() instead of sizeof() -1 since we made the page name a
>>   parameter
>> - use TEST_OK/TEST_FAIL in lieu of 0/-1
>> - added an error message indicating CONFIG_KUSER_HELPERS might be
>>   disabled
>>
>> Florian Fainelli (2):
>>   perf tools: Make find_vdso_map() more modular
>>   perf tests: Add a test for the ARM 32-bit [vectors] page
>>
>>  tools/perf/Makefile.perf  |  4 ++--
>>  tools/perf/arch/arm/tests/Build   |  1 +
>>  tools/perf/arch/arm/tests/arch-tests.c|  4 
>>  tools/perf/arch/arm/tests/vectors-page.c  | 24 +++
>>  tools/perf/perf-read-vdso.c   |  6 ++---
>>  tools/perf/tests/tests.h  |  5 
>>  .../perf/util/{find-vdso-map.c => find-map.c} |  7 +++---
>>  tools/perf/util/vdso.c|  6 ++---
>>  8 files changed, 45 insertions(+), 12 deletions(-)
>>  create mode 100644 tools/perf/arch/arm/tests/vectors-page.c
>>  rename tools/perf/util/{find-vdso-map.c => find-map.c} (71%)
>>
>> -- 
>> 2.17.1
>>

-- 
Florian

Re: Fix 80d20d35af1e ("nohz: Fix local_timer_softirq_pending()") may have revealed another problem

2018-12-27 Thread Frederic Weisbecker

On Fri, Dec 28, 2018 at 12:11:12AM +0100, Heiner Kallweit wrote:
> 
> OK, did as you advised and here comes the trace. That's the related dmesg 
> part:
> 
> [ 1479.025092] x86: Booting SMP configuration:
> [ 1479.025129] smpboot: Booting Node 0 Processor 1 APIC 0x2
> [ 1479.094715] NOHZ: local_softirq_pending 202
> [ 1479.096557] smpboot: CPU 1 is now offline
> 
> Hope it helps.
> Heiner
> 
> 
> # tracer: nop
> #
> #  _-=> irqs-off
> # / _=> need-resched
> #| / _---=> hardirq/softirq
> #|| / _--=> preempt-depth
> #||| / delay
> #   TASK-PID   CPU#  TIMESTAMP  FUNCTION
> #  | |   |      | |
[...]
>   -0 [001] d.h2  1479.111017: softirq_raise: vec=9 
> [action=RCU]
>   -0 [001] d.h2  1479.111026: softirq_raise: vec=7 
> [action=SCHED]
>   -0 [001] ..s2  1479.111035: softirq_entry: vec=1 
> [action=TIMER]
>   -0 [001] ..s2  1479.111040: softirq_exit: vec=1 
> [action=TIMER]
>   -0 [001] ..s2  1479.111040: softirq_entry: vec=7 
> [action=SCHED]
>   -0 [001] ..s2  1479.111052: softirq_exit: vec=7 
> [action=SCHED]
>   -0 [001] ..s2  1479.111052: softirq_entry: vec=9 
> [action=RCU]
>   -0 [001] .Ns2  1479.111079: softirq_exit: vec=9 
> [action=RCU]
>  cpuhp/1-13[001] dNh2  1479.112930: softirq_raise: vec=1 
> [action=TIMER]
>  cpuhp/1-13[001] dNh2  1479.112935: softirq_raise: vec=9 
> [action=RCU]

Interesting, the softirq is raised from hardirq but it's not handled in the end 
of
the IRQ. Are you running threaded IRQS by any chance? If so I would expect 
ksoftirqd
to handle the pending work before we go idle. However I can imagine a small 
window
where such an expectation may not be met: if the softirq is raised after the 
ksoftirqd
thread is parked (CPUHP_AP_SMPBOOT_THREADS), which is right before we disable 
the CPU
(CPUHP_TEARDOWN_CPU).

I don't know if we can afford to ignore a softirq even at this late stage. We 
should
probably avoid leaking any. So here is a possible fix, if you don't mind trying:

diff --git a/kernel/softirq.c b/kernel/softirq.c
index d288133..716096b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -56,6 +56,7 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[NR_SOFTIRQS] 
__cacheline_aligned_in_smp;
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+DEFINE_PER_CPU(int, ksoftirqd_parked);
 
 const char * const softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
@@ -363,7 +364,7 @@ static inline void invoke_softirq(void)
if (ksoftirqd_running(local_softirq_pending()))
return;
 
-   if (!force_irqthreads) {
+   if (!force_irqthreads || __this_cpu_read(ksoftirqd_parked)) {
 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
 * We can safely execute softirq on the current stack if
@@ -659,6 +660,22 @@ static void run_ksoftirqd(unsigned int cpu)
local_irq_enable();
 }
 
+static void ksoftirqd_park(unsigned int cpu)
+{
+   local_irq_disable();
+   __this_cpu_write(ksoftirqd_parked, 1);
+
+   if (local_softirq_pending())
+   run_ksoftirqd(cpu);
+
+   local_irq_enable();
+}
+
+static void ksoftirqd_unpark(unsigned int cpu)
+{
+   __this_cpu_write(ksoftirqd_parked, 0);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * tasklet_kill_immediate is called to remove a tasklet which can already be
@@ -724,6 +741,8 @@ static int takeover_tasklets(unsigned int cpu)
 static struct smp_hotplug_thread softirq_threads = {
.store  = ,
.thread_should_run  = ksoftirqd_should_run,
+   .park   = ksoftirqd_park,
+   .unpark = ksoftirqd_unpark,
.thread_fn  = run_ksoftirqd,
.thread_comm= "ksoftirqd/%u",
 };

RE: [PATCH v2 5/5] usb:cdns3 Add Cadence USB3 DRD Driver

2018-12-27 Thread Peter Chen

 
> >
> >@@ -299,6 +306,7 @@ int cdns3_drd_init(struct cdns3 *cdns)
> >cdns->version  = CDNS3_CONTROLLER_V0;
> >cdns->otg_v1_regs = NULL;
> >cdns->otg_regs = regs;
> >+   writel(0x1, >otg_v0_regs->simulate);
> >dev_info(cdns->dev, "DRD version v0 (%08x)\n",
> > readl(>otg_v0_regs->version));
> >} else {
> 
> I have confirmation from HW team that time that driver should wait after de-
> selecting mode is 2-3ms for simulate mode. It's time when FSM is in
> DRD_H_WAIT_VBUS_FAIL.
> Driver cannot re-enable the host/device mode before this time has elapsed.
> 
> 3 ms is the maximum time. Additionally, you can confirm the current FSM state 
> by
> reading the host_otg_state (bit 5:3) or dev_otg_state (2:0)  from OTGSTATE
> register.
> 
> If bit 0 in simulate register is cleared the time is exactly 1s.
> 

Thanks, Pawel.

Would you please add below changes in your next revision?
- Set bit 0 in simulate register
- timeout logic for waiting host_otg_state or dev_otg_state at OTGSTATE
when switch to host or device.

Peter

RE: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% regression

2018-12-27 Thread Wang, Kemi

Hi, Waiman
   Did you post that patch? Let's see if it helps.

-Original Message-
From: LKP [mailto:lkp-boun...@lists.01.org] On Behalf Of Waiman Long
Sent: Tuesday, November 6, 2018 6:40 AM
To: Linus Torvalds ; vba...@suse.cz; Davidlohr 
Bueso 
Cc: yang@linux.alibaba.com; Linux Kernel Mailing List 
; Matthew Wilcox ; 
mho...@kernel.org; Colin King ; Andrew Morton 
; lduf...@linux.vnet.ibm.com; l...@01.org; 
kirill.shute...@linux.intel.com
Subject: Re: [LKP] [mm] 9bc8039e71: will-it-scale.per_thread_ops -64.1% 
regression

On 11/05/2018 05:14 PM, Linus Torvalds wrote:
> On Mon, Nov 5, 2018 at 12:12 PM Vlastimil Babka  wrote:
>> I didn't spot an obvious mistake in the patch itself, so it looks
>> like some bad interaction between scheduler and the mmap downgrade?
> I'm thinking it's RWSEM_SPIN_ON_OWNER that ends up being confused by
> the downgrade.
>
> It looks like the benchmark used to be basically CPU-bound, at about
> 800% CPU, and now it's somewhere in the 200% CPU region:
>
>   will-it-scale.time.percent_of_cpu_this_job_got
>
>   800 +-+---+
>   |.+.+.+.+.+.+.+.  .+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+..+.+.+.+. .+.+.+.|
>   700 +-+ +.+   |
>   | |
>   600 +-+   |
>   | |
>   500 +-+   |
>   | |
>   400 +-+   |
>   | |
>   300 +-+   |
>   | |
>   200 O-O O O O OO  |
>   |   O O O  O O O O   O O O O O O O O O O O|
>   100 +-+---+
>
> which sounds like the downgrade really messes with the "spin waiting
> for lock" logic.
>
> I'm thinking it's the "wake up waiter" logic that has some bad
> interaction with spinning, and breaks that whole optimization.
>
> Adding Waiman and Davidlohr to the participants, because they seem to
> be the obvious experts in this area.
>
> Linus

Optimistic spinning on rwsem is done only on writers spinning on a
writer-owned rwsem. If a write-lock is downgraded to a read-lock, all
the spinning waiters will quit. That may explain the drop in cpu
utilization. I do have a old patch that enable a certain amount of
reader spinning which may help the situation. I can rebase that and send
it out for review if people have interest.

Cheers,
Longman


___
LKP mailing list
l...@lists.01.org
https://lists.01.org/mailman/listinfo/lkp

Re: [PATCH] sched: fix infinity loop in update_blocked_averages

2018-12-27 Thread Tejun Heo

Happy holidays, everyone.

(cc'ing Rik, who has been looking at the scheduler code a lot lately)

On Thu, Dec 27, 2018 at 10:15:17AM -0800, Linus Torvalds wrote:
> [ goes off and looks ]
> 
> Oh. unthrottle_cfs_rq -> enqueue_entity -> list_add_leaf_cfs_rq()
> doesn't actually seem to hold the rq lock at all. It's just called
> under a rcu read lock.

I'm pretty sure enqueue_entity() *has* to be called with rq lock.
unthrottle_cfs_rq() is called from tg_set_cfs_bandwidth(),
distribute_cfs_runtime() and unthrottle_offline_cfs_rqs.  The first
two grabs the rq_lock just around the calls and the last one has a
lockdep assert on the rq_lock.  What am I missing?

> So it all seems to depend on that "on_list" flag for exclusion. Which
> seems fundamentally racy, since it's not protected by a lock.

The only place on_list is accessed without holding rq_lock is
unregister_fair_sched_group().  It's a minor optimization on a
relatively cold path (group destruction), so if it's racy there, I
think we can take out that optimization.  I'd be surprised if anyone
notices that.

That said, I don't think it's broken.  False positive on on_list is
fine and I can't see how a false negative would happen given that the
only event which can set it is the sched entity getting scheduled and
there's no way the removal path can't race against that transition.

> But that still makes me go "how come is this only noticed 18 months
> after the fact"?

Unless I'm totally confused, which is definitely possible, I don't
think there's a race condition and the only bug is the
tmp_alone_branch pointer getting dangled, which maybe doesn't happen
all that much?

Thanks.

-- 
tejun

Re: iMX6 FEC driver Linux-fslc 4.17 - IPV6 Multicast not working when unplugging/plugging ethernet cable

2018-12-27 Thread Florian Fainelli

Le 12/27/18 à 6:21 AM, Stefano Cappa a écrit :
> Hi everyone,
> I already posted this in NXP forum as a comment
> (https://community.nxp.com/thread/359397), in yocto mailing list
> (https://lists.yoctoproject.org/pipermail/yocto/2018-December/043664.html)
> and in meta-freescale mailing list
> (https://lists.yoctoproject.org/pipermail/meta-freescale/2018-December/023625.html)
> A user in meta-freescale's mailing list suggested to resend this
> message to the emails obtained running "./scripts/get_maintainer.pl -F
> drivers/net/ethernet/freescale/fec_main.c".
> 
> 
> The problem is:
> 
> If I boot my iMX6 device with ethernet cable attached and I execute "ping6
> ff02::fb" to ping the multicast address I get this response:
> ~# ping6 ff02::fb
> PING ff02::fb (ff02::fb): 56 data bytes
> 64 bytes from fe80::c2f:eff:fe11:2d71: seq=0 ttl=64 time=2.057 ms
> 64 bytes from fe80::809:1bfb:8d4c:ae54: seq=0 ttl=64 time=73.101 ms (DUP!)
> 64 bytes from fe80::3e28:6dff:feed:5b97: seq=0 ttl=64 time=150.772 ms
> (DUP!)
> 
> 
> Otherwise, If I unplug and plug again ethernet cable, I cannot ping the
> multicast ipv6 address anymore.
> The result is:
> ~# ping6 ff02::fb
> PING ff02::fb (ff02::fb): 56 data bytes
> ping6: sendto: Network is unreachable
> 
> 
> The original NXP discussion was about older version of Linux, however
> this issue is happening with both Linux 4.9.88 and Linux 4.17.
> Probably also with the latest version, but I didn't try.
> 
> 
> Do you have any suggestions? Is this a bug? This is really a
> frustrating and I'm really
> surprised to see the same problem also on Linux 4.17.

All multicast filter programming would occur from within the
set_multicast_list() function. At first glance, there are several things
that could be wrong:

- hash is only 6 bits, and written across a pair of 32-bit registers, so
that would leave only 10 unique multicast address entries if I
understand correctly how the hardware is designed

- if we are exceeding the maximum number of unique hash values/bit
positions then the interface should be put in promiscuous mode to allow
the reception of all traffic, and multicast filtering needs to be done
in software (very few drivers get this right)

I would instrument that function and see what is happening in there at
the time of failure.

> 
> 
> PS: I'm sorry for the double email, but the previous one was in html
> and it was rejected.
> 
> Thank u.
> 


-- 
Florian

[PATCH V2] x86/kexec: fix a kexec_file_load failure

2018-12-27 Thread Dave Young

The code cleanup mentioned in Fixes tag changed the behavior of
kexec_locate_mem_hole.  The kexec_locate_mem_hole will try to
allocate free memory only when kbuf.mem is initialized as zero.

But in x86 kexec_file_load implementation there are a few places
the kbuf.mem is reused like below:
  /* kbuf initialized, kbuf.mem = 0 */
  ...
  kexec_add_buffer()
  ...
  kexec_add_buffer()

  The second kexec_add_buffer will reuse previous kbuf but not
  reinitialize the kbuf.mem.

Thus kexec_file_load failed because the sanity check failed.

So explictily reset kbuf.mem to fix the issue.

Fixes: b6664ba42f14 ("s390, kexec_file: drop arch_kexec_mem_walk()")
Signed-off-by: Dave Young 
Cc: 
---
V1 -> V2: use KEXEC_BUF_MEM_UNKNOWN in code.
 arch/x86/kernel/crash.c   | 1 +
 arch/x86/kernel/kexec-bzimage64.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f631a3f15587..6b7890c7889b 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -469,6 +469,7 @@ int crash_load_segments(struct kimage *image)
 
kbuf.memsz = kbuf.bufsz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
+   kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer();
if (ret) {
vfree((void *)image->arch.elf_headers);
diff --git a/arch/x86/kernel/kexec-bzimage64.c 
b/arch/x86/kernel/kexec-bzimage64.c
index 278cd07228dd..0d5efa34f359 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char 
*kernel,
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
+   kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer();
if (ret)
goto out_free_params;
@@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char 
*kernel,
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
+   kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer();
if (ret)
goto out_free_params;
-- 
2.17.0

Re: [PATCH] x86/kexec: fix a kexec_file_load failure

2018-12-27 Thread Dave Young

On 12/27/18 at 01:06pm, Dave Young wrote:
> The code cleanup mentioned in Fixes tag changed the behavior of
> kexec_locate_mem_hole.  The kexec_locate_mem_hole will try to
> allocate free memory only when kbuf.mem is initialized as zero.
> 
> But in x86 kexec_file_load implementation there are a few places
> the kbuf.mem is reused like below:
>   /* kbuf initialized, kbuf.mem = 0 */
>   ...
>   kexec_add_buffer()
>   ...
>   kexec_add_buffer()
> 
>   The second kexec_add_buffer will reuse previous kbuf but not
>   reinitialize the kbuf.mem.
> 
> Thus kexec_file_load failed because the sanity check failed.
> 
> So explictily reset mem = 0 to fix the issue.
> 
> Fixes: b6664ba42f14 ("s390, kexec_file: drop arch_kexec_mem_walk()")
> Signed-off-by: Dave Young 
> Cc: 
> ---
>  arch/x86/kernel/crash.c   | 1 +
>  arch/x86/kernel/kexec-bzimage64.c | 2 ++
>  2 files changed, 3 insertions(+)
> 
> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
> index f631a3f15587..37147509d2c8 100644
> --- a/arch/x86/kernel/crash.c
> +++ b/arch/x86/kernel/crash.c
> @@ -469,6 +469,7 @@ int crash_load_segments(struct kimage *image)
>  
>   kbuf.memsz = kbuf.bufsz;
>   kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
> + kbuf.mem = 0;

Self NAK, will resend with KEXEC_BUF_MEM_UNKNOWN instead of "0"

>   ret = kexec_add_buffer();
>   if (ret) {
>   vfree((void *)image->arch.elf_headers);
> diff --git a/arch/x86/kernel/kexec-bzimage64.c 
> b/arch/x86/kernel/kexec-bzimage64.c
> index 278cd07228dd..558204bdf412 100644
> --- a/arch/x86/kernel/kexec-bzimage64.c
> +++ b/arch/x86/kernel/kexec-bzimage64.c
> @@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char 
> *kernel,
>   kbuf.memsz = PAGE_ALIGN(header->init_size);
>   kbuf.buf_align = header->kernel_alignment;
>   kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
> + kbuf.mem = 0;
>   ret = kexec_add_buffer();
>   if (ret)
>   goto out_free_params;
> @@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char 
> *kernel,
>   kbuf.bufsz = kbuf.memsz = initrd_len;
>   kbuf.buf_align = PAGE_SIZE;
>   kbuf.buf_min = MIN_INITRD_LOAD_ADDR;
> + kbuf.mem = 0;
>   ret = kexec_add_buffer();
>   if (ret)
>   goto out_free_params;
> -- 
> 2.17.0
>

Re: [PATCH] dsa: return error code upstream

2018-12-27 Thread Florian Fainelli

Le 12/27/18 à 4:22 PM, David Miller a écrit :
> From: Kangjie Lu 
> Date: Tue, 25 Dec 2018 22:08:18 -0600
> 
>> Both bcm_sf2_sw_indir_rw and mdiobus_write_nested could fail, so let's
>> return their error codes upstream.
>>
>> Signed-off-by: Kangjie Lu 
> 
> Applied with Subject line adjusted as per Florian's feedback.
> 

Thanks, technically bcm_sf2_sw_indir_rw() cannot fail and checking its
return value in the write (op == 0) case will always lead to returning 0
anyway. In case this ever changes in the future, we should be more
future proof with that change anyway.
-- 
Florian

Re: [PATCH] rtc: rv8803: Check return value of rv8803_write_reg

2018-12-27 Thread Alexandre Belloni

On 27/12/2018 17:28:33-0600, Kangjie Lu wrote:
> On Thu, Dec 27, 2018 at 4:31 PM Heiner Kallweit 
> wrote:
> 
> > On 27.12.2018 21:28, Aditya Pakki wrote:
> > > In rv8803_handle_irq, rv8803_write_reg can return a failed return
> > > value when attempting to write to the bus. The fix checks the output
> > > and throws a dev_warn notifying of the failure.
> > >
> > > Signed-off-by: Aditya Pakki 
> > > ---
> > >  drivers/rtc/rtc-rv8803.c | 9 +++--
> > >  1 file changed, 7 insertions(+), 2 deletions(-)
> > >
> > You seem to submit the same type of changes throughout very
> > different subsystems. And you do it w/o thinking and testing.
> > If you would have looked at rv8803_write_reg() you would have
> > seen that it prints an error in case of failure. So your
> > patch achieves nothing.
> > You got David Miller upset already and it looks like you
> > want to achieve the same with other maintainers too.
> > I'd strongly suggest that you stop sending patches until
> > you better understand the kernel code.
> >
> 
> Hello Heiner,
> 
> Thanks for your suggestion. Sure, we will try to better understand
> how the kernel works when we are preparing other patches. We recently
> found a lot of potential bugs; due to the significant workload but
> limited labor force, we may make some mistakes, but yes, we will try
> to avoid them.
> 
> One main reason we submit the patches is to seek feedback from Linux
> maintainers who know how the kernel works best.  We hope to get: (1)
> confirmation: if this is indeed a bug;

Come on, this is your job, not the maintainer job to check whether there
is indeed a bug. Else, the maintainer may as well just remove your
authorship because he did all the real work.

> (2) improvement feedback: if
> it is a bug and our fix is problematic, how can we improve it?
> 
> Taking the case in this email as an example, rv8803_write_reg could
> fail, so returning IRQ_HANDLED even when it failed doesn't seem to be
> a good practice. Would "returning IRQ_NONE upon failure" be a better
> fix?
> 
> Thanks again for your suggestion.

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

Re: [PATCH] rtc: rv8803: Check return value of rv8803_write_reg

2018-12-27 Thread Alexandre Belloni

On 27/12/2018 14:28:55-0600, Aditya Pakki wrote:
> In rv8803_handle_irq, rv8803_write_reg can return a failed return
> value when attempting to write to the bus. The fix checks the output
> and throws a dev_warn notifying of the failure.
> 

Is there any point in doing that as the error will self correct later
anyway?

I really doubt there is any user reading the logs on the systems with an
rv8803 and there is no user action needed anyway.

> Signed-off-by: Aditya Pakki 
> ---
>  drivers/rtc/rtc-rv8803.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
> index 450a0b831a2d..5a19d5ecbf57 100644
> --- a/drivers/rtc/rtc-rv8803.c
> +++ b/drivers/rtc/rtc-rv8803.c
> @@ -180,8 +180,13 @@ static irqreturn_t rv8803_handle_irq(int irq, void 
> *dev_id)
>  
>   if (events) {
>   rtc_update_irq(rv8803->rtc, 1, events);
> - rv8803_write_reg(client, RV8803_FLAG, flags);
> - rv8803_write_reg(rv8803->client, RV8803_CTRL, rv8803->ctrl);
> + if (rv8803_write_reg(client, RV8803_FLAG, flags))
> + dev_warn(>dev, "Failed to write RV8803 reg.\n");
> +
> + if (rv8803_write_reg(rv8803->client, RV8803_CTRL,
> + rv8803->ctrl))
> + dev_warn(>client->dev,
> + "Failed to write RV8803_CTRL reg.\n");
>   }
>  
>   mutex_unlock(>flags_lock);
> -- 
> 2.17.1
> 

-- 
Alexandre Belloni, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

Re: [PATCH 07/14] clock: milbeaut: Add Milbeaut M10V clock control

2018-12-27 Thread Stephen Boyd

Quoting Sugaya, Taichi (2018-12-25 17:35:27)
> Hi
> 
> On 2018/11/30 17:31, Stephen Boyd wrote:
> >> +   init.num_parents = parents;
> >> +   init.parent_names = parent_names;
> >> +
> >> +   mcm->cname = clk_name;
> >> +   mcm->parent = 0;
> >> +   mcm->hw.init = 
> >> +
> >> +   clk = clk_register(NULL, >hw);
> >> +   if (IS_ERR(clk))
> >> +   goto err_clk;
> >> +
> >> +   of_clk_add_provider(node, of_clk_src_simple_get, clk);
> >> +   return;
> >> +
> >> +err_clk:
> >> +   kfree(mcm);
> >> +err_mcm:
> >> +   kfree(parent_names);
> >> +}
> >> +CLK_OF_DECLARE(m10v_clk_mux, "socionext,milbeaut-m10v-clk-mux",
> >> +   m10v_clk_mux_setup);
> > 
> > Any chance you can use a platform driver?
> > 
> 
> Excuse me to re-ask you.
> Why do you recommend to use a platform driver? Is that current fad?

Not exactly a fad. We've been doing it for some time now. From an older
email on the list:

Reasons (in no particular order):

  1. We get a dev pointer to use with clk_hw_register()

  2. We can handle probe defer if some resource is not available

  3. Using device model gets us a hook into power management frameworks
 like runtime PM and system PM for things like suspend and hibernate

  4. It encourages a single DT node clk controller style binding
 instead of a single node per clk style binding

  5. We can use non-DT specific functions like devm_ioremap_resource() to map
 registers and acquire other resources, leading to more portable and
 generic code

  6. We may be able to make the device driver a module, which will
 make distros happy if we don't have to compile in all
 these clk drivers to the resulting vmlinux

[PATCH] arm/mach-lpc32xx/pm : use kmemdup instead of duplicating

2018-12-27 Thread Peng Hao

kmemdup has implemented the function that kmalloc() + memcpy().
Prefer to kmemdup rather than code opened implementation.

Signed-off-by: Peng Hao 
---
 arch/arm/mach-lpc32xx/pm.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-lpc32xx/pm.c b/arch/arm/mach-lpc32xx/pm.c
index 6247157..1a2535c 100644
--- a/arch/arm/mach-lpc32xx/pm.c
+++ b/arch/arm/mach-lpc32xx/pm.c
@@ -85,8 +85,11 @@ static int lpc32xx_pm_enter(suspend_state_t state)
int (*lpc32xx_suspend_ptr) (void);
void *iram_swap_area;
 
-   /* Allocate some space for temporary IRAM storage */
-   iram_swap_area = kmalloc(lpc32xx_sys_suspend_sz, GFP_KERNEL);
+   /* Allocate some space for temporary IRAM storage. Then
+* backup a small area of IRAM used for the suspend code.
+*/
+   iram_swap_area = kmemdup((void *) TEMP_IRAM_AREA,
+lpc32xx_sys_suspend_sz, GFP_KERNEL);
if (!iram_swap_area) {
printk(KERN_ERR
   "PM Suspend: cannot allocate memory to save portion "
@@ -94,10 +97,6 @@ static int lpc32xx_pm_enter(suspend_state_t state)
return -ENOMEM;
}
 
-   /* Backup a small area of IRAM used for the suspend code */
-   memcpy(iram_swap_area, (void *) TEMP_IRAM_AREA,
-   lpc32xx_sys_suspend_sz);
-
/*
 * Copy code to suspend system into IRAM. The suspend code
 * needs to run from IRAM as DRAM may no longer be available
-- 
1.8.3.1

Re: [PATCH] net/wan/fsl_ucc_hdlc: Avoid double free in ucc_hdlc_probe()

2018-12-27 Thread David Miller

From: Peng Hao 
Date: Wed, 26 Dec 2018 16:28:30 +0800

> From: Wen Yang 
> 
> This patch fixes potential double frees if register_hdlc_device() fails.
> 
> Signed-off-by: Wen Yang 
> Reviewed-by: Peng Hao 

Applied.

Re: [PATCH] net: marvell: fix a missing check of acpi_match_device

2018-12-27 Thread David Miller

From: Kangjie Lu 
Date: Wed, 26 Dec 2018 00:31:08 -0600

> When acpi_match_device fails, its return value is NULL. Directly using
> the return value without a check may result in a NULL-pointer
> dereference. The fix checks if acpi_match_device fails, and if so,
> returns -EINVAL.
> 
> Signed-off-by: Kangjie Lu 

Applied.

Re: [PATCH] isdn: eicon: fix a missing check of api_parse

2018-12-27 Thread David Miller

From: Kangjie Lu 
Date: Wed, 26 Dec 2018 00:40:32 -0600

> api_parse can fail, and if it fails, we should not use the ss_parms
> which can be incorrect.
> The fix checks its return value and stops using ss_parms if api_parse
> fails.
> 
> Signed-off-by: Kangjie Lu 

This patch does not apply.

Re: [PATCH] tipc: fix a missing check of genlmsg_put

2018-12-27 Thread David Miller

From: Kangjie Lu 
Date: Wed, 26 Dec 2018 00:09:04 -0600

> genlmsg_put could fail. The fix inserts a check of its return value, and
> if it fails, returns -EMSGSIZE.
> 
> Signed-off-by: Kangjie Lu 

Applied.

Re: [PATCH] net: sfc: checks status of efx_mcdi_rpc

2018-12-27 Thread David Miller

From: Kangjie Lu 
Date: Tue, 25 Dec 2018 23:05:17 -0600

> efx_mcdi_rpc() could fail. The fix checks its status and issues an error
> message if it fails.
> 
> Signed-off-by: Kangjie Lu 
> ---
>  drivers/net/ethernet/sfc/mcdi.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
> index dfad93fca0a6..9486e6534dea 100644
> --- a/drivers/net/ethernet/sfc/mcdi.c
> +++ b/drivers/net/ethernet/sfc/mcdi.c
> @@ -1819,6 +1819,9 @@ void efx_mcdi_set_id_led(struct efx_nic *efx, enum 
> efx_led_mode mode)
>  
>   rc = efx_mcdi_rpc(efx, MC_CMD_SET_ID_LED, inbuf, sizeof(inbuf),
> NULL, 0, NULL);
> + if (rc)
> + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n",
> + __func__, rc);

"__func__" did not fail, efx_mcdi_rpc() did.

1 2 3 4 >

1 - 100 of 348 matches

Mail list logo