date:20190806

RE: [PATCH net] hv_netvsc: Fix a warning of suspicious RCU usage

2019-08-06 Thread Dexuan Cui

> From: Jakub Kicinski 
> Sent: Tuesday, August 6, 2019 12:13 PM
> To: Dexuan Cui 
> 
> On Tue, 6 Aug 2019 05:17:44 +, Dexuan Cui wrote:
> > This fixes a warning of "suspicious rcu_dereference_check() usage"
> > when nload runs.
> >
> > Signed-off-by: Stephen Hemminger 
> > Signed-off-by: Dexuan Cui 
> 
> Minor change in behaviour would perhaps be worth acknowledging in the
> commit message (since you check ndev for NULL later now), and a Fixes
> tag would be good.
> 
> But the looks pretty straightforward and correct!

Hi,
Yeah, it looks the minor behavior change doesn't matter, because IMO the 
'nvdev' can only be NULL when the NIC is being removed, or the MTU is
being changed, etc.

The Fixes tag is:
Fixes: 776e726bfb34 ("netvsc: fix RCU warning in get_stats")

If I should send a v2, please let me know.

Thanks,
-- Dexuan

[PATCH V3 08/10] vhost: do not use RCU to synchronize MMU notifier with worker

2019-08-06 Thread Jason Wang

We used to use RCU to synchronize MMU notifier with worker. This leads
calling synchronize_rcu() in invalidate_range_start(). But on a busy
system, there would be many factors that may slow down the
synchronize_rcu() which makes it unsuitable to be called in MMU
notifier.

So this patch switches use seqlock counter to track whether or not the
map was used. The counter was increased when vq try to start or finish
uses the map. This means, when it was even, we're sure there's no
readers and MMU notifier is synchronized. When it was odd, it means
there's a reader we need to wait it to be even again then we are
synchronized. Consider the read critical section is pretty small the
synchronization should be done very fast.

Reported-by: Michael S. Tsirkin 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 141 ++
 drivers/vhost/vhost.h |   7 ++-
 2 files changed, 90 insertions(+), 58 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index cfc11f9ed9c9..57bfbb60d960 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -324,17 +324,16 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue 
*vq)
 
spin_lock(&vq->mmu_lock);
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
-   map[i] = rcu_dereference_protected(vq->maps[i],
- lockdep_is_held(&vq->mmu_lock));
+   map[i] = vq->maps[i];
if (map[i]) {
vhost_set_map_dirty(vq, map[i], i);
-   rcu_assign_pointer(vq->maps[i], NULL);
+   vq->maps[i] = NULL;
}
}
spin_unlock(&vq->mmu_lock);
 
-   /* No need for synchronize_rcu() or kfree_rcu() since we are
-* serialized with memory accessors (e.g vq mutex held).
+   /* No need for synchronization since we are serialized with
+* memory accessors (e.g vq mutex held).
 */
 
for (i = 0; i < VHOST_NUM_ADDRS; i++)
@@ -362,6 +361,40 @@ static bool vhost_map_range_overlap(struct vhost_uaddr 
*uaddr,
return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size);
 }
 
+static void inline vhost_vq_access_map_begin(struct vhost_virtqueue *vq)
+{
+   write_seqcount_begin(&vq->seq);
+}
+
+static void inline vhost_vq_access_map_end(struct vhost_virtqueue *vq)
+{
+   write_seqcount_end(&vq->seq);
+}
+
+static void inline vhost_vq_sync_access(struct vhost_virtqueue *vq)
+{
+   unsigned int seq;
+
+   /* Make sure any changes to map was done before checking seq
+* counter. Paired with smp_wmb() in write_seqcount_begin().
+*/
+   smp_mb();
+   seq = raw_read_seqcount(&vq->seq);
+   /* Odd means the map was currently accessed by vhost worker */
+   if (seq & 0x1) {
+   /* When seq changes, we are sure no reader can see
+* previous map */
+   while (raw_read_seqcount(&vq->seq) == seq) {
+   if (need_resched())
+   schedule();
+   }
+   }
+   /* Make sure seq counter was checked before map is
+* freed. Paired with smp_wmb() in write_seqcount_end().
+*/
+   smp_mb();
+}
+
 static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
  int index,
  unsigned long start,
@@ -376,16 +409,15 @@ static void vhost_invalidate_vq_start(struct 
vhost_virtqueue *vq,
spin_lock(&vq->mmu_lock);
++vq->invalidate_count;
 
-   map = rcu_dereference_protected(vq->maps[index],
-   lockdep_is_held(&vq->mmu_lock));
+   map = vq->maps[index];
if (map) {
vhost_set_map_dirty(vq, map, index);
-   rcu_assign_pointer(vq->maps[index], NULL);
+   vq->maps[index] = NULL;
}
spin_unlock(&vq->mmu_lock);
 
if (map) {
-   synchronize_rcu();
+   vhost_vq_sync_access(vq);
vhost_map_unprefetch(map);
}
 }
@@ -457,7 +489,7 @@ static void vhost_init_maps(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
for (j = 0; j < VHOST_NUM_ADDRS; j++)
-   RCU_INIT_POINTER(vq->maps[j], NULL);
+   vq->maps[j] = NULL;
}
 }
 #endif
@@ -655,6 +687,7 @@ void vhost_dev_init(struct vhost_dev *dev,
vq->indirect = NULL;
vq->heads = NULL;
vq->dev = dev;
+   seqcount_init(&vq->seq);
mutex_init(&vq->mutex);
spin_lock_init(&vq->mmu_lock);
vhost_vq_reset(dev, vq);
@@ -921,7 +954,7 @@ static int vhost_map_prefetch(struct vhost_virtqueue *vq,
map->npages = npages;
map->pa

[PATCH V3 09/10] vhost: correctly set dirty pages in MMU notifiers callback

2019-08-06 Thread Jason Wang

We need make sure there's no reference on the map before trying to
mark set dirty pages.

Reported-by: Michael S. Tsirkin 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 57bfbb60d960..6650a3ff88c1 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -410,14 +410,13 @@ static void vhost_invalidate_vq_start(struct 
vhost_virtqueue *vq,
++vq->invalidate_count;
 
map = vq->maps[index];
-   if (map) {
-   vhost_set_map_dirty(vq, map, index);
+   if (map)
vq->maps[index] = NULL;
-   }
spin_unlock(&vq->mmu_lock);
 
if (map) {
vhost_vq_sync_access(vq);
+   vhost_set_map_dirty(vq, map, index);
vhost_map_unprefetch(map);
}
 }
-- 
2.18.1

Re: [PATCH v4 2/4] RISC-V: Add riscv_isa reprensenting ISA features common across CPUs

2019-08-06 Thread Anup Patel

On Wed, Aug 7, 2019 at 12:21 PM Christoph Hellwig  wrote:
>
> On Fri, Aug 02, 2019 at 09:27:21PM -0700, Atish Patra wrote:
> > From: Anup Patel 
> >
> > This patch adds riscv_isa integer to represent ISA features common
> > across all CPUs. The riscv_isa is not same as elf_hwcap because
> > elf_hwcap will only have ISA features relevant for user-space apps
> > whereas riscv_isa will have ISA features relevant to both kernel
> > and user-space apps.
> >
> > One of the use case is KVM hypervisor where riscv_isa will be used
> > to do following operations:
>
> Please add this to the kvm series.  Right now this is just dead code.

Sure, I will include this patch in KVM series.

Regards,
Anup

[PATCH V3 07/10] vhost: don't do synchronize_rcu() in vhost_uninit_vq_maps()

2019-08-06 Thread Jason Wang

There's no need for RCU synchronization in vhost_uninit_vq_maps()
since we've already serialized with readers (memory accessors). This
also avoid the possible userspace DOS through ioctl() because of the
possible high latency caused by synchronize_rcu().

Reported-by: Michael S. Tsirkin 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c12cdadb0855..cfc11f9ed9c9 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -333,7 +333,9 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
}
spin_unlock(&vq->mmu_lock);
 
-   synchronize_rcu();
+   /* No need for synchronize_rcu() or kfree_rcu() since we are
+* serialized with memory accessors (e.g vq mutex held).
+*/
 
for (i = 0; i < VHOST_NUM_ADDRS; i++)
if (map[i])
-- 
2.18.1

[PATCH V3 06/10] vhost: mark dirty pages during map uninit

2019-08-06 Thread Jason Wang

We don't mark dirty pages if the map was teared down outside MMU
notifier. This will lead untracked dirty pages. Fixing by marking
dirty pages during map uninit.

Reported-by: Michael S. Tsirkin 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2a7217c33668..c12cdadb0855 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -305,6 +305,18 @@ static void vhost_map_unprefetch(struct vhost_map *map)
kfree(map);
 }
 
+static void vhost_set_map_dirty(struct vhost_virtqueue *vq,
+   struct vhost_map *map, int index)
+{
+   struct vhost_uaddr *uaddr = &vq->uaddrs[index];
+   int i;
+
+   if (uaddr->write) {
+   for (i = 0; i < map->npages; i++)
+   set_page_dirty(map->pages[i]);
+   }
+}
+
 static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
 {
struct vhost_map *map[VHOST_NUM_ADDRS];
@@ -314,8 +326,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue 
*vq)
for (i = 0; i < VHOST_NUM_ADDRS; i++) {
map[i] = rcu_dereference_protected(vq->maps[i],
  lockdep_is_held(&vq->mmu_lock));
-   if (map[i])
+   if (map[i]) {
+   vhost_set_map_dirty(vq, map[i], i);
rcu_assign_pointer(vq->maps[i], NULL);
+   }
}
spin_unlock(&vq->mmu_lock);
 
@@ -353,7 +367,6 @@ static void vhost_invalidate_vq_start(struct 
vhost_virtqueue *vq,
 {
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct vhost_map *map;
-   int i;
 
if (!vhost_map_range_overlap(uaddr, start, end))
return;
@@ -364,10 +377,7 @@ static void vhost_invalidate_vq_start(struct 
vhost_virtqueue *vq,
map = rcu_dereference_protected(vq->maps[index],
lockdep_is_held(&vq->mmu_lock));
if (map) {
-   if (uaddr->write) {
-   for (i = 0; i < map->npages; i++)
-   set_page_dirty(map->pages[i]);
-   }
+   vhost_set_map_dirty(vq, map, index);
rcu_assign_pointer(vq->maps[index], NULL);
}
spin_unlock(&vq->mmu_lock);
-- 
2.18.1

[PATCH V3 04/10] vhost: fix vhost map leak

2019-08-06 Thread Jason Wang

We don't free map during vhost_map_unprefetch(). This means it could
be leaked. Fixing by free the map.

Reported-by: Michael S. Tsirkin 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 17f6abea192e..2a3154976277 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -302,9 +302,7 @@ static void vhost_vq_meta_reset(struct vhost_dev *d)
 static void vhost_map_unprefetch(struct vhost_map *map)
 {
kfree(map->pages);
-   map->pages = NULL;
-   map->npages = 0;
-   map->addr = NULL;
+   kfree(map);
 }
 
 static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq)
-- 
2.18.1

[PATCH V3 10/10] vhost: do not return -EAGAIN for non blocking invalidation too early

2019-08-06 Thread Jason Wang

Instead of returning -EAGAIN unconditionally, we'd better do that only
we're sure the range is overlapped with the metadata area.

Reported-by: Jason Gunthorpe 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 32 +++-
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6650a3ff88c1..0271f853fa9c 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -395,16 +395,19 @@ static void inline vhost_vq_sync_access(struct 
vhost_virtqueue *vq)
smp_mb();
 }
 
-static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
- int index,
- unsigned long start,
- unsigned long end)
+static int vhost_invalidate_vq_start(struct vhost_virtqueue *vq,
+int index,
+unsigned long start,
+unsigned long end,
+bool blockable)
 {
struct vhost_uaddr *uaddr = &vq->uaddrs[index];
struct vhost_map *map;
 
if (!vhost_map_range_overlap(uaddr, start, end))
-   return;
+   return 0;
+   else if (!blockable)
+   return -EAGAIN;
 
spin_lock(&vq->mmu_lock);
++vq->invalidate_count;
@@ -419,6 +422,8 @@ static void vhost_invalidate_vq_start(struct 
vhost_virtqueue *vq,
vhost_set_map_dirty(vq, map, index);
vhost_map_unprefetch(map);
}
+
+   return 0;
 }
 
 static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq,
@@ -439,18 +444,19 @@ static int vhost_invalidate_range_start(struct 
mmu_notifier *mn,
 {
struct vhost_dev *dev = container_of(mn, struct vhost_dev,
 mmu_notifier);
-   int i, j;
-
-   if (!mmu_notifier_range_blockable(range))
-   return -EAGAIN;
+   bool blockable = mmu_notifier_range_blockable(range);
+   int i, j, ret;
 
for (i = 0; i < dev->nvqs; i++) {
struct vhost_virtqueue *vq = dev->vqs[i];
 
-   for (j = 0; j < VHOST_NUM_ADDRS; j++)
-   vhost_invalidate_vq_start(vq, j,
- range->start,
- range->end);
+   for (j = 0; j < VHOST_NUM_ADDRS; j++) {
+   ret = vhost_invalidate_vq_start(vq, j,
+   range->start,
+   range->end, blockable);
+   if (ret)
+   return ret;
+   }
}
 
return 0;
-- 
2.18.1

[PATCH V3 05/10] vhost: reset invalidate_count in vhost_set_vring_num_addr()

2019-08-06 Thread Jason Wang

The vhost_set_vring_num_addr() could be called in the middle of
invalidate_range_start() and invalidate_range_end(). If we don't reset
invalidate_count after the un-registering of MMU notifier, the
invalidate_cont will run out of sync (e.g never reach zero). This will
in fact disable the fast accessor path. Fixing by reset the count to
zero.

Reported-by: Michael S. Tsirkin 
Reported-by: Jason Gunthorpe 
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2a3154976277..2a7217c33668 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2073,6 +2073,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
d->has_notifier = false;
}
 
+   /* reset invalidate_count in case we are in the middle of
+* invalidate_start() and invalidate_end().
+*/
+   vq->invalidate_count = 0;
vhost_uninit_vq_maps(vq);
 #endif
 
-- 
2.18.1

[PATCH V3 01/10] vhost: disable metadata prefetch optimization

2019-08-06 Thread Jason Wang

From: "Michael S. Tsirkin" 

This seems to cause guest and host memory corruption.
Disable for now until we get a better handle on that.

Signed-off-by: Michael S. Tsirkin 
---
 drivers/vhost/vhost.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 819296332913..42a8c2a13ab1 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -96,7 +96,7 @@ struct vhost_uaddr {
 };
 
 #if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
-#define VHOST_ARCH_CAN_ACCEL_UACCESS 1
+#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
 #else
 #define VHOST_ARCH_CAN_ACCEL_UACCESS 0
 #endif
-- 
2.18.1

[PATCH V3 03/10] vhost: validate MMU notifier registration

2019-08-06 Thread Jason Wang

The return value of mmu_notifier_register() is not checked in
vhost_vring_set_num_addr(). This will cause an out of sync between mm
and MMU notifier thus a double free. To solve this, introduce a
boolean flag to track whether MMU notifier is registered and only do
unregistering when it was true.

Reported-and-tested-by:
syzbot+e58112d71f77113dd...@syzkaller.appspotmail.com
Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 19 +++
 drivers/vhost/vhost.h |  1 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 488380a581dc..17f6abea192e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -629,6 +629,7 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
+   dev->has_notifier = false;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
INIT_LIST_HEAD(&dev->read_list);
@@ -730,6 +731,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
if (err)
goto err_mmu_notifier;
 #endif
+   dev->has_notifier = true;
 
return 0;
 
@@ -959,7 +961,11 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
}
if (dev->mm) {
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   mmu_notifier_unregister(&dev->mmu_notifier, dev->mm);
+   if (dev->has_notifier) {
+   mmu_notifier_unregister(&dev->mmu_notifier,
+   dev->mm);
+   dev->has_notifier = false;
+   }
 #endif
mmput(dev->mm);
}
@@ -2064,8 +2070,10 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
/* Unregister MMU notifer to allow invalidation callback
 * can access vq->uaddrs[] without holding a lock.
 */
-   if (d->mm)
+   if (d->has_notifier) {
mmu_notifier_unregister(&d->mmu_notifier, d->mm);
+   d->has_notifier = false;
+   }
 
vhost_uninit_vq_maps(vq);
 #endif
@@ -2085,8 +2093,11 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
if (r == 0)
vhost_setup_vq_uaddr(vq);
 
-   if (d->mm)
-   mmu_notifier_register(&d->mmu_notifier, d->mm);
+   if (d->mm) {
+   r = mmu_notifier_register(&d->mmu_notifier, d->mm);
+   if (!r)
+   d->has_notifier = true;
+   }
 #endif
 
mutex_unlock(&vq->mutex);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 42a8c2a13ab1..a9a2a93857d2 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -214,6 +214,7 @@ struct vhost_dev {
int iov_limit;
int weight;
int byte_weight;
+   bool has_notifier;
 };
 
 bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
-- 
2.18.1

[PATCH V3 02/10] vhost: don't set uaddr for invalid address

2019-08-06 Thread Jason Wang

We should not setup uaddr for the invalid address, otherwise we may
try to pin or prefetch mapping of wrong pages.

Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address")
Signed-off-by: Jason Wang 
---
 drivers/vhost/vhost.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 0536f8526359..488380a581dc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2082,7 +2082,8 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d,
}
 
 #if VHOST_ARCH_CAN_ACCEL_UACCESS
-   vhost_setup_vq_uaddr(vq);
+   if (r == 0)
+   vhost_setup_vq_uaddr(vq);
 
if (d->mm)
mmu_notifier_register(&d->mmu_notifier, d->mm);
-- 
2.18.1

[PATCH V3 00/10] Fixes for metadata accelreation

2019-08-06 Thread Jason Wang

Hi all:

This series try to fix several issues introduced by meta data
accelreation series. Please review.

Changes from V2:
- use seqlck helper to synchronize MMU notifier with vhost worker

Changes from V1:

- try not use RCU to syncrhonize MMU notifier with vhost worker
- set dirty pages after no readers
- return -EAGAIN only when we find the range is overlapped with
  metadata

Jason Wang (9):
  vhost: don't set uaddr for invalid address
  vhost: validate MMU notifier registration
  vhost: fix vhost map leak
  vhost: reset invalidate_count in vhost_set_vring_num_addr()
  vhost: mark dirty pages during map uninit
  vhost: don't do synchronize_rcu() in vhost_uninit_vq_maps()
  vhost: do not use RCU to synchronize MMU notifier with worker
  vhost: correctly set dirty pages in MMU notifiers callback
  vhost: do not return -EAGAIN for non blocking invalidation too early

Michael S. Tsirkin (1):
  vhost: disable metadata prefetch optimization

 drivers/vhost/vhost.c | 228 +++---
 drivers/vhost/vhost.h |  10 +-
 2 files changed, 151 insertions(+), 87 deletions(-)

-- 
2.18.1

Re: [PATCH v4 2/4] RISC-V: Add riscv_isa reprensenting ISA features common across CPUs

2019-08-06 Thread Christoph Hellwig

On Fri, Aug 02, 2019 at 09:27:21PM -0700, Atish Patra wrote:
> From: Anup Patel 
> 
> This patch adds riscv_isa integer to represent ISA features common
> across all CPUs. The riscv_isa is not same as elf_hwcap because
> elf_hwcap will only have ISA features relevant for user-space apps
> whereas riscv_isa will have ISA features relevant to both kernel
> and user-space apps.
> 
> One of the use case is KVM hypervisor where riscv_isa will be used
> to do following operations:

Please add this to the kvm series.  Right now this is just dead code.

Re: [PATCH V2 7/9] vhost: do not use RCU to synchronize MMU notifier with worker

2019-08-06 Thread Jason Wang




On 2019/8/6 下午8:04, Jason Gunthorpe wrote:

On Mon, Aug 05, 2019 at 12:20:45PM +0800, Jason Wang wrote:

On 2019/8/2 下午8:46, Jason Gunthorpe wrote:

On Fri, Aug 02, 2019 at 05:40:07PM +0800, Jason Wang wrote:

This must be a proper barrier, like a spinlock, mutex, or
synchronize_rcu.

I start with synchronize_rcu() but both you and Michael raise some
concern.

I've also idly wondered if calling synchronize_rcu() under the various
mm locks is a deadlock situation.


Maybe, that's why I suggest to use vhost_work_flush() which is much
lightweight can can achieve the same function. It can guarantee all previous
work has been processed after vhost_work_flush() return.

If things are already running in a work, then yes, you can piggyback
on the existing spinlocks inside the workqueue and be Ok

However, if that work is doing any copy_from_user, then the flush
becomes dependent on swap and it won't work again...



Yes it do copy_from_user(), so we can't do this.





1) spinlock: add lots of overhead on datapath, this leads 0 performance
improvement.

I think the topic here is correctness not performance improvement>
  

But the whole series is to speed up vhost.

So? Starting with a whole bunch of crazy, possibly broken, locking and
claiming a performance win is not reasonable.



Yes, I admit this patch is tricky, I'm not going to push this. Will post 
a V3.






Spinlock is correct but make the whole series meaningless consider it won't
bring any performance improvement.

You can't invent a faster spinlock by opencoding some wild
scheme. There is nothing special about the usage here, it needs a
blocking lock, plain and simple.

Jason



Will post V3. Let's see if you are happy with that version.

Thanks

Re: [PATCH] riscv: delay: use do_div() instead of __udivdi3()

2019-08-06 Thread Christoph Hellwig

> diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c
> index 87ff89e88f2c..8c686934e0f6 100644
> --- a/arch/riscv/lib/delay.c
> +++ b/arch/riscv/lib/delay.c
> @@ -81,9 +81,14 @@ EXPORT_SYMBOL(__delay);
>  void udelay(unsigned long usecs)
>  {
>   u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT;
> + u64 n;
> + u32 rem;
>  
>   if (unlikely(usecs > MAX_UDELAY_US)) {
> - __delay((u64)usecs * riscv_timebase / 100ULL);
> + n = (u64)usecs * riscv_timebase;
> + rem = do_div(n, 100);
> +
> + __delay(n);
>   return;

A few comments on the variable usage:

I think you really want a variable of type u64 that contains the usecs
value instead of casting it three times.

n and rem can be easily declared inside the branch.

Re: [PATCH] riscv: kbuild: drop CONFIG_RISCV_ISA_C

2019-08-06 Thread Christoph Hellwig

On Tue, Aug 06, 2019 at 07:30:24PM -0700, Paul Walmsley wrote:
> 
> The baseline ISA support requirement for the RISC-V Linux kernel
> mandates compressed instructions, so it doesn't make sense for
> compressed instruction support to be configurable.

Looks good,

Reviewed-by: Christoph Hellwig

Re: [RFC PATCH 1/2] Regulator: Core: Add clock-enable to fixed-regulator

2019-08-06 Thread Philippe Schenker

On Tue, 2019-08-06 at 19:26 +0100, Mark Brown wrote:
> On Tue, Aug 06, 2019 at 12:57:32PM +, Philippe Schenker wrote:
> > On Mon, 2019-08-05 at 17:37 +0100, Mark Brown wrote:
> > > So the capacitor on the input of the p-FET is keeping the switch
> > > on?
> > > When I say it's not switching with the clock I mean it's not
> > > constantly
> > > bouncing on and off at whatever rate the clock is going at.
> > Ah, that's what you mean. Yes, the capacitor gets slowly charged
> > with
> > the
> > resistor but nearly instantly discharged with the n-FET. So this
> > capacitor
> > is used as a Low-Pass filter to get the p-FET to be constantly
> > switched.
> > It is not bouncing on and off with the clock but rather it is
> > switched
> > constantly.
> 
> Good, I guess this might be part of why it's got this poor ramp time.

Yes, I think so too.

> 
> > > I think you are going to end up with a hack no matter what.
> > That's exactly what I'm trying to prevent. To introduce a fixed
> > regulator that can have a clock is not a hack for me.
> > That the hardware solution is a hack is debatable yes, but why
> > should I
> > not try to solve it properly in software?
> 
> A lot of this discussion is around the definition of terms like "hack"
> and "proper".
> 
> > In the end I just want to represent our hardware in software. Would
> > you
> > agree to create a new clock-regulator.c driver?
> > Or would it make more sense to extend fixed.c to support clocks-
> > enable
> > without touching core?
> 
> At least a separate compatible makes sense, I'd have to see the code
> to
> be clear if a completely separate driver makes sense but it'll need
> separate ops at least.  There'd definitely be a lot of overlap though
> so
> it's worth looking at.

Okay, thanks for discussion! I will try to make something that will fit
in mainline kernel and I will learn more about the regulator subsystem
in general so I can make a solution that fits.
But I'll need some time to do that. I will for sure link to that
discussion when I send the patch.

Philippe

Re: Warnings whilst building 5.2.0+

2019-08-06 Thread John Hubbard


On 8/6/19 11:30 PM, Chris Clayton wrote:

On 09/07/2019 12:39, Chris Clayton wrote:

On 09/07/2019 11:37, Enrico Weigelt, metux IT consult wrote:

On 09.07.19 08:06, Chris Clayton wrote:

...

Can you check older versions, too ? Maybe also trying older gcc ?



I see the same warnings building linux-5.2.0 with gcc9. However, I don't see 
the warnings building linux-5.2.0 with the
the 20190705 of gcc8. So the warnings could result from an improvement (i.e. 
the problem was in the kernel, but
undiscovered by gcc8) or from a regression in gcc9.



 From the discussion starting at 
https://marc.info/?l=linux-kernel&m=156401014023908, it would appear that the 
problem is
undiscovered by gcc8. Building a fresh pull of Linus' tree this morning 
(v5.3-rc3-282-g33920f1ec5bf), I see that the
warnings are still being emitted. Adding the participants in the other 
discussion to this one.



The warnings are still there because the fix has not been committed to any
tree yet.

If you could try out my proposed fix [1], and reply to that thread with perhaps 
a
Tested-by tag, that would help encourage the maintainers to accept it.

So far it hasn't made it to the top of their inboxes, but I'm hoping... :)


[1] https://lore.kernel.org/r/20190731054627.5627-2-jhubb...@nvidia.com
("x86/boot: save fields explicitly, zero out everything else")

thanks,
--
John Hubbard
NVIDIA

[PATCH v5 05/10] powerpc/fsl_booke/32: introduce reloc_kernel_entry() helper

2019-08-06 Thread Jason Yan

Add a new helper reloc_kernel_entry() to jump back to the start of the
new kernel. After we put the new kernel in a randomized place we can use
this new helper to enter the kernel and begin to relocate again.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/head_fsl_booke.S | 13 +
 arch/powerpc/mm/mmu_decl.h   |  1 +
 2 files changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 04d124fee17d..2083382dd662 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1143,6 +1143,19 @@ _GLOBAL(create_tlb_entry)
sync
blr
 
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+   mfmsr   r7
+   rlwinm  r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+   mtspr   SPRN_SRR0,r4
+   mtspr   SPRN_SRR1,r7
+   rfi
+
 /*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index a09f89d3aa0f..804da298beb3 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -143,6 +143,7 @@ extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
 void create_tlb_entry(phys_addr_t phys, unsigned long virt, int entry);
+void reloc_kernel_entry(void *fdt, int addr);
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
-- 
2.17.2

[PATCH v5 04/10] powerpc/fsl_booke/32: introduce create_tlb_entry() helper

2019-08-06 Thread Jason Yan

Add a new helper create_tlb_entry() to create a tlb entry by the virtual
and physical address. This is a preparation to support boot kernel at a
randomized address.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/head_fsl_booke.S | 29 
 arch/powerpc/mm/mmu_decl.h   |  1 +
 2 files changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index adf0505dbe02..04d124fee17d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1114,6 +1114,35 @@ __secondary_hold_acknowledge:
.long   -1
 #endif
 
+/*
+ * Create a 64M tlb by address and entry
+ * r3/r4 - physical address
+ * r5 - virtual address
+ * r6 - entry
+ */
+_GLOBAL(create_tlb_entry)
+   lis r7,0x1000   /* Set MAS0(TLBSEL) = 1 */
+   rlwimi  r7,r6,16,4,15   /* Setup MAS0 = TLBSEL | ESEL(r6) */
+   mtspr   SPRN_MAS0,r7/* Write MAS0 */
+
+   lis r6,(MAS1_VALID|MAS1_IPROT)@h
+   ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+   mtspr   SPRN_MAS1,r6/* Write MAS1 */
+
+   lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+   ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+   and r6,r6,r5
+   ori r6,r6,MAS2_M@l
+   mtspr   SPRN_MAS2,r6/* Write MAS2(EPN) */
+
+   ori r8,r4,(MAS3_SW|MAS3_SR|MAS3_SX)
+   mtspr   SPRN_MAS3,r8/* Write MAS3(RPN) */
+
+   tlbwe   /* Write TLB */
+   isync
+   sync
+   blr
+
 /*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 32c1a191c28a..a09f89d3aa0f 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -142,6 +142,7 @@ extern unsigned long calc_cam_sz(unsigned long ram, 
unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_tlb_entry(phys_addr_t phys, unsigned long virt, int entry);
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
-- 
2.17.2

Re: [PATCH v21 00/28] Intel SGX foundations

2019-08-06 Thread Jethro Beekman


On 2019-07-14 07:36, Jarkko Sakkinen wrote:

On Sat, Jul 13, 2019 at 08:07:36PM +0300, Jarkko Sakkinen wrote:

v21:
* Check on mmap() that the VMA does cover an area that does not have
   enclave pages. Only mapping with PROT_NONE can do that to reserve
   initial address space for an enclave.
* Check om mmap() and mprotect() that the VMA permissions do not
   surpass the enclave permissions.
* Remove two refcounts from vma_close(): mm_list and encl->refcount.
   Enclave refcount is only need for swapper/enclave sync and we can
   remove mm_list refcount by destroying mm_struct when the process
   is closed. By not having vm_close() the Linux MM can merge VMAs.
* Do not naturally align MAP_FIXED address.
* Numerous small fixes and clean ups.
* Use SRCU for synchronizing the list of mm_struct's.
* Move to stack based call convention in the vDSO.


I forgot something:

* CONFIG_INTEL_SGX_DRIVER is not bistate i.e. no more LKM support. It is
   still useful to have the compile-time option because VM host does not
   need to have it enabled. Now sgx_init() calls explicitly sgx_drv_init().
   In addition, platform driver has been ripped a way because we no
   longer need ACPI hotplug. In effect, the device is now parentless.



I think you also missed in the changelog that you're now checking page 
permissions in EADD.


--
Jethro Beekman | Fortanix



smime.p7s
Description: S/MIME Cryptographic Signature

[PATCH v5 08/10] powerpc/fsl_booke/kaslr: clear the original kernel if randomized

2019-08-06 Thread Jason Yan

The original kernel still exists in the memory, clear it now.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/kernel/kaslr_booke.c  | 11 +++
 arch/powerpc/mm/mmu_decl.h |  2 ++
 arch/powerpc/mm/nohash/fsl_booke.c |  1 +
 3 files changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index 52b59b05f906..c6b326424b54 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -400,3 +400,14 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
 
reloc_kernel_entry(dt_ptr, kimage_vaddr);
 }
+
+void __init kaslr_late_init(void)
+{
+   /* If randomized, clear the original kernel */
+   if (kimage_vaddr != KERNELBASE) {
+   unsigned long kernel_sz;
+
+   kernel_sz = (unsigned long)_end - kimage_vaddr;
+   memzero_explicit((void *)KERNELBASE, kernel_sz);
+   }
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9332772c8a66..f0a461482dba 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -150,8 +150,10 @@ extern void loadcam_multi(int first_idx, int num, int 
tmp_idx);
 
 #ifdef CONFIG_RANDOMIZE_BASE
 void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
 #else
 static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
 #endif
 
 struct tlbcam {
diff --git a/arch/powerpc/mm/nohash/fsl_booke.c 
b/arch/powerpc/mm/nohash/fsl_booke.c
index 8d25a8dc965f..e88fcc367600 100644
--- a/arch/powerpc/mm/nohash/fsl_booke.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -269,6 +269,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t 
start)
kernstart_addr = start;
if (is_second_reloc) {
virt_phys_offset = PAGE_OFFSET - memstart_addr;
+   kaslr_late_init();
return;
}
 
-- 
2.17.2

[PATCH v5 09/10] powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter

2019-08-06 Thread Jason Yan

One may want to disable kaslr when boot, so provide a cmdline parameter
'nokaslr' to support this.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/kaslr_booke.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index c6b326424b54..436f9a03f385 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -361,6 +361,18 @@ static unsigned long __init kaslr_choose_location(void 
*dt_ptr, phys_addr_t size
return kaslr_offset;
 }
 
+static inline __init bool kaslr_disabled(void)
+{
+   char *str;
+
+   str = strstr(boot_command_line, "nokaslr");
+   if (str == boot_command_line ||
+   (str > boot_command_line && *(str - 1) == ' '))
+   return true;
+
+   return false;
+}
+
 /*
  * To see if we need to relocate the kernel to a random offset
  * void *dt_ptr - address of the device tree
@@ -376,6 +388,8 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)
kernel_sz = (unsigned long)_end - KERNELBASE;
 
kaslr_get_cmdline(dt_ptr);
+   if (kaslr_disabled())
+   return;
 
offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
 
-- 
2.17.2

[PATCH v5 06/10] powerpc/fsl_booke/32: implement KASLR infrastructure

2019-08-06 Thread Jason Yan

This patch add support to boot kernel from places other than KERNELBASE.
Since CONFIG_RELOCATABLE has already supported, what we need to do is
map or copy kernel to a proper place and relocate. Freescale Book-E
parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
entries are not suitable to map the kernel directly in a randomized
region, so we chose to copy the kernel to a proper place and restart to
relocate.

The offset of the kernel was not randomized yet(a fixed 64M is set). We
will randomize it in the next patch.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/Kconfig  | 11 +++
 arch/powerpc/kernel/Makefile  |  1 +
 arch/powerpc/kernel/early_32.c|  2 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 17 ++--
 arch/powerpc/kernel/head_fsl_booke.S  | 13 ++-
 arch/powerpc/kernel/kaslr_booke.c | 84 +++
 arch/powerpc/mm/mmu_decl.h|  6 ++
 arch/powerpc/mm/nohash/fsl_booke.c|  7 +-
 8 files changed, 126 insertions(+), 15 deletions(-)
 create mode 100644 arch/powerpc/kernel/kaslr_booke.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..755378887912 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -548,6 +548,17 @@ config RELOCATABLE
  setting can still be useful to bootwrappers that need to know the
  load address of the kernel (eg. u-boot/mkimage).
 
+config RANDOMIZE_BASE
+   bool "Randomize the address of the kernel image"
+   depends on (FSL_BOOKE && FLATMEM && PPC32)
+   select RELOCATABLE
+   help
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ If unsure, say N.
+
 config RELOCATABLE_TEST
bool "Test relocatable kernel"
depends on (PPC64 && RELOCATABLE)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ea0c69236789..32f6c5b99307 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,6 +106,7 @@ extra-$(CONFIG_PPC_8xx) := head_8xx.o
 extra-y+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)  += reloc_$(BITS).o
+obj-$(CONFIG_RANDOMIZE_BASE)   += kaslr_booke.o
 
 obj-$(CONFIG_PPC32)+= entry_32.o setup_32.o early_32.o
 obj-$(CONFIG_PPC64)+= dma-iommu.o iommu.o
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
index 3482118ffe76..fe8347cdc07d 100644
--- a/arch/powerpc/kernel/early_32.c
+++ b/arch/powerpc/kernel/early_32.c
@@ -32,5 +32,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
 
apply_feature_fixups();
 
-   return KERNELBASE + offset;
+   return kimage_vaddr + offset;
 }
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index de0980945510..de7ee682bb4a 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -155,23 +155,22 @@ skpinv:   addir6,r6,1 /* 
Increment */
 
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kimage_vaddr mapping in TLB1[0] */
lis r6,0x1000   /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 
*/
mtspr   SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr   SPRN_MAS1,r6
-   lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
-   ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
-   mtspr   SPRN_MAS2,r6
+   lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+   ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+   and r6,r6,r20
+   ori r6,r6,M_IF_NEEDED@l
+   mtspr   SPRN_MAS2,r6
mtspr   SPRN_MAS3,r8
tlbwe
 
-/* 7. Jump to KERNELBASE mapping */
-   lis r6,(KERNELBASE & ~0xfff)@h
-   ori r6,r6,(KERNELBASE & ~0xfff)@l
-   rlwinm  r7,r25,0,0x03ff
-   add r6,r7,r6
+/* 7. Jump to kimage_vaddr mapping */
+   mr  r6,r20
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index 2083382dd662..aa55832e7506 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -155,6 +155,8 @@ _ENTRY(_start);
  */
 
 _ENTRY(__early_start)
+   LOAD_REG_ADDR_PIC(r20, kimage_vaddr)
+   lwz r20,0(r20)
 
 #define ENTRY_MAPPING_BOOT_SETUP
 #include "fsl_booke_entry_mapping.S"
@@ -277,8 +279,8

[PATCH v5 03/10] powerpc: introduce kimage_vaddr to store the kernel base

2019-08-06 Thread Jason Yan

Now the kernel base is a fixed value - KERNELBASE. To support KASLR, we
need a variable to store the kernel base.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/page.h | 2 ++
 arch/powerpc/mm/init-common.c   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..60a68d3a54b1 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -315,6 +315,8 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
+extern unsigned long kimage_vaddr;
+
 #include 
 #endif /* __ASSEMBLY__ */
 #include 
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 152ae0d21435..d4801ce48dc5 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -25,6 +25,8 @@ phys_addr_t memstart_addr = (phys_addr_t)~0ull;
 EXPORT_SYMBOL_GPL(memstart_addr);
 phys_addr_t kernstart_addr;
 EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kimage_vaddr = KERNELBASE;
+EXPORT_SYMBOL_GPL(kimage_vaddr);
 
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
-- 
2.17.2

[PATCH v5 07/10] powerpc/fsl_booke/32: randomize the kernel image offset

2019-08-06 Thread Jason Yan

After we have the basic support of relocate the kernel in some
appropriate place, we can start to randomize the offset now.

Entropy is derived from the banner and timer, which will change every
build and boot. This not so much safe so additionally the bootloader may
pass entropy via the /chosen/kaslr-seed node in device tree.

We will use the first 512M of the low memory to randomize the kernel
image. The memory will be split in 64M zones. We will use the lower 8
bit of the entropy to decide the index of the 64M zone. Then we chose a
16K aligned offset inside the 64M zone to put the kernel in.

KERNELBASE

|-->   64M   <--|
|   |
+---+++---+
|   |||kernel||   |
+---+++---+
| |
|->   offset<-|

  kimage_vaddr

We also check if we will overlap with some areas like the dtb area, the
initrd area or the crashkernel area. If we cannot find a proper area,
kaslr will be disabled and boot from the original kernel.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
Reviewed-by: Christophe Leroy 
---
 arch/powerpc/kernel/kaslr_booke.c | 322 +-
 1 file changed, 320 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c
index 30f84c0321b2..52b59b05f906 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -23,6 +23,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -34,15 +36,329 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
+
+#ifdef DEBUG
+#define DBG(fmt...) pr_info(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct regions {
+   unsigned long pa_start;
+   unsigned long pa_end;
+   unsigned long kernel_size;
+   unsigned long dtb_start;
+   unsigned long dtb_end;
+   unsigned long initrd_start;
+   unsigned long initrd_end;
+   unsigned long crash_start;
+   unsigned long crash_end;
+   int reserved_mem;
+   int reserved_mem_addr_cells;
+   int reserved_mem_size_cells;
+};
 
 extern int is_second_reloc;
 
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+   LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+   int node = fdt_path_offset(fdt, "/chosen");
+
+   early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+  size_t size)
+{
+   size_t i;
+   const unsigned long *ptr = area;
+
+   for (i = 0; i < size / sizeof(hash); i++) {
+   /* Rotate by odd number of bits and XOR. */
+   hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+   hash ^= ptr[i];
+   }
+
+   return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+   unsigned long hash = 0;
+
+   hash = rotate_xor(hash, build_str, sizeof(build_str));
+   hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+   return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+   int node, len;
+   fdt64_t *prop;
+   u64 ret;
+
+   node = fdt_path_offset(fdt, "/chosen");
+   if (node < 0)
+   return 0;
+
+   prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+   if (!prop || len != sizeof(u64))
+   return 0;
+
+   ret = fdt64_to_cpu(*prop);
+   *prop = 0;
+   return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+   return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+   u32 end, struct regions *regions)
+{
+   int subnode, len, i;
+   u64 base, size;
+
+   /* check for overlap with /memreserve/ entries */
+   for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+   if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+   continue;
+   if (regions_overlap(start, end, base, base + size))
+   return true;
+   }
+
+   if (regions->reserved_mem < 0)
+   return false;
+
+   /* check for overlap with static reservations in /reserved-memory */
+   for (subnode = fdt_first_subnode(fdt, regions->reserved_mem);
+subnode >= 0;
+subno

Re: [PATCH 00/12] block/bio, fs: convert put_page() to put_user_page*()

2019-08-06 Thread John Hubbard


On 8/6/19 11:34 PM, Christoph Hellwig wrote:

On Mon, Aug 05, 2019 at 03:54:35PM -0700, John Hubbard wrote:

On 7/23/19 11:17 PM, Christoph Hellwig wrote:

...

I think we can do this in a simple and better way.  We have 5 ITER_*
types.  Of those ITER_DISCARD as the name suggests never uses pages, so
we can skip handling it.  ITER_PIPE is rejected іn the direct I/O path,
which leaves us with three.



Hi Christoph,

Are you working on anything like this?


I was hoping I could steer you towards it.  But if you don't want to do
it yourself I'll add it to my ever growing todo list.



Sure, I'm up for this. The bvec-related items are the next logical part
of the gup/dma conversions to work on, and I just wanted to avoid solving the
same problem if you were already in the code.



Or on the put_user_bvec() idea?


I have a prototype from two month ago:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/gup-bvec

but that only survived the most basic testing, so it'll need more work,
which I'm not sure when I'll find time for.



I'll take a peek, and probably pester you with a few questions if I get
confused. :)

thanks,
--
John Hubbard
NVIDIA

[PATCH v5 10/10] powerpc/fsl_booke/kaslr: dump out kernel offset information on panic

2019-08-06 Thread Jason Yan

When kaslr is enabled, the kernel offset is different for every boot.
This brings some difficult to debug the kernel. Dump out the kernel
offset when panic so that we can easily debug the kernel.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/page.h |  5 +
 arch/powerpc/kernel/machine_kexec.c |  1 +
 arch/powerpc/kernel/setup-common.c  | 19 +++
 3 files changed, 25 insertions(+)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 60a68d3a54b1..cd3ac530e58d 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -317,6 +317,11 @@ struct vm_area_struct;
 
 extern unsigned long kimage_vaddr;
 
+static inline unsigned long kaslr_offset(void)
+{
+   return kimage_vaddr - KERNELBASE;
+}
+
 #include 
 #endif /* __ASSEMBLY__ */
 #include 
diff --git a/arch/powerpc/kernel/machine_kexec.c 
b/arch/powerpc/kernel/machine_kexec.c
index c4ed328a7b96..078fe3d76feb 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -86,6 +86,7 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
VMCOREINFO_OFFSET(mmu_psize_def, shift);
 #endif
+   vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
 }
 
 /*
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..064075f02837 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -715,12 +715,31 @@ static struct notifier_block ppc_panic_block = {
.priority = INT_MIN /* may not return; must be done last */
 };
 
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+   pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+kaslr_offset(), KERNELBASE);
+
+   return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+   .notifier_call = dump_kernel_offset
+};
+
 void __init setup_panic(void)
 {
/* PPC64 always does a hard irq disable in its panic handler */
if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return;
atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+   if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+   atomic_notifier_chain_register(&panic_notifier_list,
+  &kernel_offset_notifier);
 }
 
 #ifdef CONFIG_CHECK_CACHE_COHERENCY
-- 
2.17.2

linux-next: manual merge of the akpm-current tree with the arm64 tree

2019-08-06 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the akpm-current tree got a conflict in:

  arch/arm64/include/asm/processor.h

between commit:

  b907b80d7ae7 ("arm64: remove pointless __KERNEL__ guards")

from the arm64 tree and commit:

  cd6ee3f76f64 ("arm64, mm: move generic mmap layout functions to mm")

from the akpm-current tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/arm64/include/asm/processor.h
index ec70762519df,65e2de00913f..
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@@ -280,8 -281,8 +280,6 @@@ static inline void spin_lock_prefetch(c
 "nop") : : "p" (ptr));
  }
  
- #define HAVE_ARCH_PICK_MMAP_LAYOUT
 -#endif
--
  extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
  extern void __init minsigstksz_setup(void);
  


pgp_WZvCdOvWn.pgp
Description: OpenPGP digital signature

[PATCH v5 00/10] implement KASLR for powerpc/fsl_booke/32

2019-08-06 Thread Jason Yan

This series implements KASLR for powerpc/fsl_booke/32, as a security
feature that deters exploit attempts relying on knowledge of the location
of kernel internals.

Since CONFIG_RELOCATABLE has already supported, what we need to do is
map or copy kernel to a proper place and relocate. Freescale Book-E
parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
entries are not suitable to map the kernel directly in a randomized
region, so we chose to copy the kernel to a proper place and restart to
relocate.

Entropy is derived from the banner and timer base, which will change every
build and boot. This not so much safe so additionally the bootloader may
pass entropy via the /chosen/kaslr-seed node in device tree.

We will use the first 512M of the low memory to randomize the kernel
image. The memory will be split in 64M zones. We will use the lower 8
bit of the entropy to decide the index of the 64M zone. Then we chose a
16K aligned offset inside the 64M zone to put the kernel in.

KERNELBASE

|-->   64M   <--|
|   |
+---+++---+
|   |||kernel||   |
+---+++---+
| |
|->   offset<-|

  kimage_vaddr

We also check if we will overlap with some areas like the dtb area, the
initrd area or the crashkernel area. If we cannot find a proper area,
kaslr will be disabled and boot from the original kernel.

Changes since v4:
 - Add Reviewed-by tag from Christophe
 - Remove an unnecessary cast
 - Remove unnecessary parenthesis
 - Fix checkpatch warning

Changes since v3:
 - Add Reviewed-by and Tested-by tag from Diana
 - Change the comment in fsl_booke_entry_mapping.S to be consistent
   with the new code.

Changes since v2:
 - Remove unnecessary #ifdef
 - Use SZ_64M instead of0x400
 - Call early_init_dt_scan_chosen() to init boot_command_line
 - Rename kaslr_second_init() to kaslr_late_init()

Changes since v1:
 - Remove some useless 'extern' keyword.
 - Replace EXPORT_SYMBOL with EXPORT_SYMBOL_GPL
 - Improve some assembly code
 - Use memzero_explicit instead of memset
 - Use boot_command_line and remove early_command_line
 - Do not print kaslr offset if kaslr is disabled

Jason Yan (10):
  powerpc: unify definition of M_IF_NEEDED
  powerpc: move memstart_addr and kernstart_addr to init-common.c
  powerpc: introduce kimage_vaddr to store the kernel base
  powerpc/fsl_booke/32: introduce create_tlb_entry() helper
  powerpc/fsl_booke/32: introduce reloc_kernel_entry() helper
  powerpc/fsl_booke/32: implement KASLR infrastructure
  powerpc/fsl_booke/32: randomize the kernel image offset
  powerpc/fsl_booke/kaslr: clear the original kernel if randomized
  powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter
  powerpc/fsl_booke/kaslr: dump out kernel offset information on panic

 arch/powerpc/Kconfig  |  11 +
 arch/powerpc/include/asm/nohash/mmu-book3e.h  |  10 +
 arch/powerpc/include/asm/page.h   |   7 +
 arch/powerpc/kernel/Makefile  |   1 +
 arch/powerpc/kernel/early_32.c|   2 +-
 arch/powerpc/kernel/exceptions-64e.S  |  10 -
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  27 +-
 arch/powerpc/kernel/head_fsl_booke.S  |  55 ++-
 arch/powerpc/kernel/kaslr_booke.c | 427 ++
 arch/powerpc/kernel/machine_kexec.c   |   1 +
 arch/powerpc/kernel/misc_64.S |   5 -
 arch/powerpc/kernel/setup-common.c|  19 +
 arch/powerpc/mm/init-common.c |   7 +
 arch/powerpc/mm/init_32.c |   5 -
 arch/powerpc/mm/init_64.c |   5 -
 arch/powerpc/mm/mmu_decl.h|  10 +
 arch/powerpc/mm/nohash/fsl_booke.c|   8 +-
 17 files changed, 560 insertions(+), 50 deletions(-)
 create mode 100644 arch/powerpc/kernel/kaslr_booke.c

-- 
2.17.2

[PATCH v5 02/10] powerpc: move memstart_addr and kernstart_addr to init-common.c

2019-08-06 Thread Jason Yan

These two variables are both defined in init_32.c and init_64.c. Move
them to init-common.c.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/mm/init-common.c | 5 +
 arch/powerpc/mm/init_32.c | 5 -
 arch/powerpc/mm/init_64.c | 5 -
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index a84da92920f7..152ae0d21435 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -21,6 +21,11 @@
 #include 
 #include 
 
+phys_addr_t memstart_addr = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
 
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index b04896a88d79..872df48ae41b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -56,11 +56,6 @@
 phys_addr_t total_memory;
 phys_addr_t total_lowmem;
 
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
 #ifdef CONFIG_RELOCATABLE
 /* Used in __va()/__pa() */
 long long virt_phys_offset;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a44f6281ca3a..c836f1269ee7 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -63,11 +63,6 @@
 
 #include 
 
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Given an address within the vmemmap, determine the pfn of the page that
-- 
2.17.2

[PATCH v5 01/10] powerpc: unify definition of M_IF_NEEDED

2019-08-06 Thread Jason Yan

M_IF_NEEDED is defined too many times. Move it to a common place.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Christophe Leroy 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 
---
 arch/powerpc/include/asm/nohash/mmu-book3e.h  | 10 ++
 arch/powerpc/kernel/exceptions-64e.S  | 10 --
 arch/powerpc/kernel/fsl_booke_entry_mapping.S | 10 --
 arch/powerpc/kernel/misc_64.S |  5 -
 4 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h 
b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index 4c9777d256fb..0877362e48fa 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -221,6 +221,16 @@
 #define TLBILX_T_CLASS26
 #define TLBILX_T_CLASS37
 
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define M_IF_NEEDEDMAS2_M
+#else
+#define M_IF_NEEDED0
+#endif
+
 #ifndef __ASSEMBLY__
 #include 
 
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..fd49ec07ce4a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1342,16 +1342,6 @@ skpinv:  addir6,r6,1 /* 
Increment */
sync
isync
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
-
 /* 6. Setup KERNELBASE mapping in TLB[0]
  *
  * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S 
b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index ea065282b303..de0980945510 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -153,16 +153,6 @@ skpinv:addir6,r6,1 /* 
Increment */
tlbivax 0,r9
TLBSYNC
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
-
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
 /* 6. Setup KERNELBASE mapping in TLB1[0] */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b55a7b4cb543..26074f92d4bc 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -432,11 +432,6 @@ kexec_create_tlb:
rlwimi  r9,r10,16,4,15  /* Setup MAS0 = TLBSEL | ESEL(r9) */
 
 /* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDEDMAS2_M
-#else
-#define M_IF_NEEDED0
-#endif
mtspr   SPRN_MAS0,r9
 
lis r9,(MAS1_VALID|MAS1_IPROT)@h
-- 
2.17.2

Re: Bisected: Kernel 4.14 + has 3 times higher write IO latency than Kernel 4.4 with raid1

2019-08-06 Thread Jinpu Wang

On Wed, Aug 7, 2019 at 1:40 AM NeilBrown  wrote:
>
> On Tue, Aug 06 2019, Jinpu Wang wrote:
>
> > On Tue, Aug 6, 2019 at 9:54 AM Jinpu Wang  
> > wrote:
> >>
> >> On Tue, Aug 6, 2019 at 1:46 AM NeilBrown  wrote:
> >> >
> >> > On Mon, Aug 05 2019, Jinpu Wang wrote:
> >> >
> >> > > Hi Neil,
> >> > >
> >> > > For the md higher write IO latency problem, I bisected it to these 
> >> > > commits:
> >> > >
> >> > > 4ad23a97 MD: use per-cpu counter for writes_pending
> >> > > 210f7cd percpu-refcount: support synchronous switch to atomic mode.
> >> > >
> >> > > Do you maybe have an idea? How can we fix it?
> >> >
> >> > Hmmm not sure.
> >> Hi Neil,
> >>
> >> Thanks for reply, detailed result in line.
>
> Thanks for the extra testing.
> ...
> > [  105.133299] md md0 in_sync is 0, sb_flags 2, recovery 3, external
> > 0, safemode 0, recovery_cp 524288
> ...
>
> ahh - the resync was still happening.  That explains why set_in_sync()
> is being called so often.  If you wait for sync to complete (or create
> the array with --assume-clean) you should see more normal behaviour.
I've updated my tests accordingly, thanks for the hint.
>
> This patch should fix it.  I think we can do better but it would be more
> complex so no suitable for backports to -stable.
>
> Once you confirm it works, I'll send it upstream with a
> Reported-and-Tested-by from you.
>
> Thanks,
> NeilBrown

Thanks a lot, Neil, my quick test show, yes, it fixed the problem for me.

I will run more tests to be sure, will report back the test result.

Regards,
Jack Wang

>
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 24638ccedce4..624cf1ac43dc 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8900,6 +8900,7 @@ void md_check_recovery(struct mddev *mddev)
>
> if (mddev_trylock(mddev)) {
> int spares = 0;
> +   bool try_set_sync = mddev->safemode != 0;
>
> if (!mddev->external && mddev->safemode == 1)
> mddev->safemode = 0;
> @@ -8945,7 +8946,7 @@ void md_check_recovery(struct mddev *mddev)
> }
> }
>
> -   if (!mddev->external && !mddev->in_sync) {
> +   if (try_set_sync && !mddev->external && !mddev->in_sync) {
> spin_lock(&mddev->lock);
> set_in_sync(mddev);
> spin_unlock(&mddev->lock);

Re: [PATCH] ALSA: pcm: fix a memory leak bug

2019-08-06 Thread Wenwen Wang

On Wed, Aug 7, 2019 at 2:33 AM Takashi Iwai  wrote:
>
> On Wed, 07 Aug 2019 08:15:17 +0200,
> Wenwen Wang wrote:
> >
> > In hiface_pcm_init(), 'rt' is firstly allocated through kzalloc(). Later
> > on, hiface_pcm_init_urb() is invoked to initialize 'rt->out_urbs[i]'.
> > However, if the initialization fails, 'rt' is not deallocated, leading to a
> > memory leak bug.
> >
> > To fix the above issue, free 'rt' before returning the error.
> >
> > Signed-off-by: Wenwen Wang 
> > ---
> >  sound/usb/hiface/pcm.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
> > index 14fc1e1..5dbcd0d 100644
> > --- a/sound/usb/hiface/pcm.c
> > +++ b/sound/usb/hiface/pcm.c
> > @@ -599,8 +599,10 @@ int hiface_pcm_init(struct hiface_chip *chip, u8
> > extra_freq)
> > for (i = 0; i < PCM_N_URBS; i++) {
> > ret = hiface_pcm_init_urb(&rt->out_urbs[i], chip, OUT_EP,
> > hiface_pcm_out_urb_handler);
> > -   if (ret < 0)
> > +   if (ret < 0) {
> > +   kfree(rt);
> > return ret;
> > +   }
>
> Unfortunately this still leaves some memory.  We need to release
> rt->out_urbs[], too.  The relevant code is already in
> hiface_pcm_destroy(), so factor out the looped kfree() there and call
> it from both places.
>
> Care to resubmit with more fixes?

Thanks for your comments! I also found this issue, and am working on
another patch to fix it.

Wenwen

Re: [PATCH] ALSA: pcm: fix a memory leak bug

2019-08-06 Thread Takashi Iwai

On Wed, 07 Aug 2019 08:15:17 +0200,
Wenwen Wang wrote:
> 
> In hiface_pcm_init(), 'rt' is firstly allocated through kzalloc(). Later
> on, hiface_pcm_init_urb() is invoked to initialize 'rt->out_urbs[i]'.
> However, if the initialization fails, 'rt' is not deallocated, leading to a
> memory leak bug.
> 
> To fix the above issue, free 'rt' before returning the error.
> 
> Signed-off-by: Wenwen Wang 
> ---
>  sound/usb/hiface/pcm.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
> index 14fc1e1..5dbcd0d 100644
> --- a/sound/usb/hiface/pcm.c
> +++ b/sound/usb/hiface/pcm.c
> @@ -599,8 +599,10 @@ int hiface_pcm_init(struct hiface_chip *chip, u8
> extra_freq)
> for (i = 0; i < PCM_N_URBS; i++) {
> ret = hiface_pcm_init_urb(&rt->out_urbs[i], chip, OUT_EP,
> hiface_pcm_out_urb_handler);
> -   if (ret < 0)
> +   if (ret < 0) {
> +   kfree(rt);
> return ret;
> +   }

Unfortunately this still leaves some memory.  We need to release
rt->out_urbs[], too.  The relevant code is already in
hiface_pcm_destroy(), so factor out the looped kfree() there and call
it from both places.

Care to resubmit with more fixes?


thanks,

Takashi

Re: [PATCH v21 18/28] x86/sgx: Add swapping code to the core and SGX driver

2019-08-06 Thread Jethro Beekman


On 2019-07-13 10:07, Jarkko Sakkinen wrote:

Because the kernel is untrusted, swapping pages in/out of the Enclave
Page Cache (EPC) has specialized requirements:

* The kernel cannot directly access EPC memory, i.e. cannot copy data
   to/from the EPC.
* To evict a page from the EPC, the kernel must "prove" to hardware that
   are no valid TLB entries for said page since a stale TLB entry would
   allow an attacker to bypass SGX access controls.
* When loading a page back into the EPC, hardware must be able to verify
   the integrity and freshness of the data.
* When loading an enclave page, e.g. regular pages and Thread Control
   Structures (TCS), hardware must be able to associate the page with a
   Secure Enclave Control Structure (SECS).

To satisfy the above requirements, the CPU provides dedicated ENCLS
functions to support paging data in/out of the EPC:

* EBLOCK:   Mark a page as blocked in the EPC Map (EPCM).  Attempting
 to access a blocked page that misses the TLB will fault.
* ETRACK:   Activate blocking tracking.  Hardware verifies that all
 translations for pages marked as "blocked" have been flushed
from the TLB.
* EPA:  Add version array page to the EPC.  As the name suggests, a
 VA page is an 512-entry array of version numbers that are
used to uniquely identify pages evicted from the EPC.
* EWB:  Write back a page from EPC to memory, e.g. RAM.  Software
 must supply a VA slot, memory to hold the a Paging Crypto
Metadata (PCMD) of the page and obviously backing for the
evicted page.
* ELD{B,U}: Load a page in {un}blocked state from memory to EPC.  The
 driver only uses the ELDU variant as there is no use case
for loading a page as "blocked" in a bare metal environment.

To top things off, all of the above ENCLS functions are subject to
strict concurrency rules, e.g. many operations will #GP fault if two
or more operations attempt to access common pages/structures.

To put it succinctly, paging in/out of the EPC requires coordinating
with the SGX driver where all of an enclave's tracking resides.  But,
simply shoving all reclaim logic into the driver is not desirable as
doing so has unwanted long term implications:

* Oversubscribing EPC to KVM guests, i.e. virtualizing SGX in KVM and
   swapping a guest's EPC pages (without the guest's cooperation) needs
   the same high level flows for reclaim but has painfully different
   semantics in the details.
* Accounting EPC, i.e. adding an EPC cgroup controller, is desirable
   as EPC is effectively a specialized memory type and even more scarce
   than system memory.  Providing a single touchpoint for EPC accounting
   regardless of end consumer greatly simplifies the EPC controller.
* Allowing the userspace-facing driver to be built as a loaded module
   is desirable, e.g. for debug, testing and development.  The cgroup
   infrastructure does not support dependencies on loadable modules.
* Separating EPC swapping from the driver once it has been tightly
   coupled to the driver is non-trivial (speaking from experience).


Some of these points seem stale now.

--
Jethro Beekman | Fortanix



smime.p7s
Description: S/MIME Cryptographic Signature

Re: Warnings whilst building 5.2.0+

2019-08-06 Thread Chris Clayton




On 09/07/2019 12:39, Chris Clayton wrote:
> 
> 
> On 09/07/2019 11:37, Enrico Weigelt, metux IT consult wrote:
>> On 09.07.19 08:06, Chris Clayton wrote:
>>
>> Hi,
>>
>>> I've pulled Linus' tree this morning and, after running 'make oldconfig', 
>>> tried a build. During that build I got the
>>> following warnings, which look to me like they should be fixed. 'git 
>>> describe' shows v5.2-915-g5ad18b2e60b7 and my
>>> compiler is the 20190706 snapshot of gcc 9.
>>
>> Thanks for the report. I'm rebuilding right know anyways, so I'll look
>> out for it.
> 
> Thanks for the reply.
> 
>>> In file included from arch/x86/kernel/head64.c:35:
>>> In function 'sanitize_boot_params',
>>> inlined from 'copy_bootdata' at arch/x86/kernel/head64.c:391:2:
>>> ./arch/x86/include/asm/bootparam_utils.h:40:3: warning: 'memset' offset 
>>> [197, 448] from the object at 'boot_params' is
>>> out of the bounds of referenced subobject 'ext_ramdisk_image' with type 
>>> 'unsigned int' at offset 192 [-Warray-bounds]
>>>40 |   memset(&boot_params->ext_ramdisk_image, 0,
>>>   |   ^~
>>>41 |  (char *)&boot_params->efi_info -
>>>   |  
>>>42 |(char *)&boot_params->ext_ramdisk_image);
>>>   |
>>> ./arch/x86/include/asm/bootparam_utils.h:43:3: warning: 'memset' offset 
>>> [493, 497] from the object at 'boot_params' is
>>> out of the bounds of referenced subobject 'kbd_status' with type 'unsigned 
>>> char' at offset 491 [-Warray-bounds]
>>>43 |   memset(&boot_params->kbd_status, 0,
>>>   |   ^~~
>>>44 |  (char *)&boot_params->hdr -
>>>   |  ~~~
>>>45 |  (char *)&boot_params->kbd_status);
>>>   |  ~
>>
>> Can you check older versions, too ? Maybe also trying older gcc ?
>>
> 
> I see the same warnings building linux-5.2.0 with gcc9. However, I don't see 
> the warnings building linux-5.2.0 with the
> the 20190705 of gcc8. So the warnings could result from an improvement (i.e. 
> the problem was in the kernel, but
> undiscovered by gcc8) or from a regression in gcc9.
> 

>From the discussion starting at 
>https://marc.info/?l=linux-kernel&m=156401014023908, it would appear that the 
>problem is
undiscovered by gcc8. Building a fresh pull of Linus' tree this morning 
(v5.3-rc3-282-g33920f1ec5bf), I see that the
warnings are still being emitted. Adding the participants in the other 
discussion to this one.

>>
>> --mtx
>>

[PATCH] riscv: dts: sifive: Add missing "clock-frequency" to cpu0/cpu1 nodes

2019-08-06 Thread Bin Meng

Add the missing "clock-frequency" property to the cpu0/cpu1 nodes
for consistency with other cpu nodes.

Signed-off-by: Bin Meng 
---

 arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi 
b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 42b5ec2..4befc70 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -22,6 +22,7 @@
#address-cells = <1>;
#size-cells = <0>;
cpu0: cpu@0 {
+   clock-frequency = <0>;
compatible = "sifive,e51", "sifive,rocket0", "riscv";
device_type = "cpu";
i-cache-block-size = <64>;
@@ -37,6 +38,7 @@
};
};
cpu1: cpu@1 {
+   clock-frequency = <0>;
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
-- 
2.7.4

[PATCH] ALSA: pcm: fix a memory leak bug

2019-08-06 Thread Wenwen Wang

In hiface_pcm_init(), 'rt' is firstly allocated through kzalloc(). Later
on, hiface_pcm_init_urb() is invoked to initialize 'rt->out_urbs[i]'.
However, if the initialization fails, 'rt' is not deallocated, leading to a
memory leak bug.

To fix the above issue, free 'rt' before returning the error.

Signed-off-by: Wenwen Wang 
---
 sound/usb/hiface/pcm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
index 14fc1e1..5dbcd0d 100644
--- a/sound/usb/hiface/pcm.c
+++ b/sound/usb/hiface/pcm.c
@@ -599,8 +599,10 @@ int hiface_pcm_init(struct hiface_chip *chip, u8
extra_freq)
for (i = 0; i < PCM_N_URBS; i++) {
ret = hiface_pcm_init_urb(&rt->out_urbs[i], chip, OUT_EP,
hiface_pcm_out_urb_handler);
-   if (ret < 0)
+   if (ret < 0) {
+   kfree(rt);
return ret;
+   }
}

ret = snd_pcm_new(chip->card, "USB-SPDIF Audio", 0, 1, 0, &pcm);
-- 
2.7.4

Re: [PATCHv5 09/37] posix-clocks: Introduce CLOCK_MONOTONIC time namespace offsets

2019-08-06 Thread Thomas Gleixner

On Mon, 29 Jul 2019, Dmitry Safonov wrote:
>  
> +static inline void timens_add_monotonic(struct timespec64 *ts)
> +{
> + struct timens_offsets *ns_offsets = current->nsproxy->time_ns->offsets;
> +
> + if (ns_offsets)
> + *ts = timespec64_add(*ts, ns_offsets->monotonic);
> +}

This helper is not posix timer specific and should be introduced either in
the name space patches or in a separate patch,

Thanks

tglx

Re: [PATCH] ARM: dts: sun8i: a83t: Enable HDMI output on Cubietruck Plus

2019-08-06 Thread Chen-Yu Tsai

On Sun, Jul 28, 2019 at 10:59 PM Chen-Yu Tsai  wrote:
>
> From: Chen-Yu Tsai 
>
> The Cubietruck Plus has an HDMI connector tied to the HDMI output of the
> SoC.
>
> Enables display output via HDMI on the Cubietruck Plus. The connector
> device node is named "hdmi-connector" as there is also a display port
> connector, which is tied to the MIPI DSI output of the SoC through a
> MIPI-DSI-to-DP bridge. This part is not supported yet.
>
> Signed-off-by: Chen-Yu Tsai 

Applied for 5.4.

Re: [PATCHv5 06/37] alarmtimer: Provide get_timespec() callback

2019-08-06 Thread Thomas Gleixner

On Mon, 29 Jul 2019, Dmitry Safonov wrote:
>  /**
> @@ -869,8 +871,10 @@ static int __init alarmtimer_init(void)
>   /* Initialize alarm bases */
>   alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
>   alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real;
> + alarm_bases[ALARM_REALTIME].get_timespec = posix_get_timespec,

That's just wrong:

>  /*
>   * Get monotonic time for posix timers
>   */
> -static int posix_get_timespec(clockid_t which_clock, struct timespec64 *tp)
> +int posix_get_timespec(clockid_t which_clock, struct timespec64 *tp)
>  {
>   ktime_get_ts64(tp);
>   return 0;

Using a proper function name would have avoided this.

Re: [PATCHv5 04/37] posix-clocks: Rename _clock_get() functions into _clock_get_timespec()

2019-08-06 Thread Thomas Gleixner

On Mon, 29 Jul 2019, Dmitry Safonov wrote:
>  static const struct k_clock clock_monotonic = {
>   .clock_getres   = posix_get_hrtimer_res,
> - .clock_get_timespec = posix_ktime_get_ts,
> + .clock_get_timespec = posix_get_timespec,

 posix_get_monotonic_timespec

Please.

Re: [PATCH v2 13/14] PCI/P2PDMA: No longer require no-mmu for host bridge whitelist

2019-08-06 Thread Christoph Hellwig

no-mmu sounds stange, as we use that for linux ports without paging
hardware.  I think an "io" got lost somewhere..

Re: [PATCH v2 11/14] PCI/P2PDMA: Store mapping method in an xarray

2019-08-06 Thread Christoph Hellwig

On Tue, Jul 30, 2019 at 10:35:42AM -0600, Logan Gunthorpe wrote:
> When upstream_bridge_distance() is called store the method required
> to map the DMA transfers in an xarray so that it can be looked up
> efficiently on the hot path in pci_p2pdma_map_sg().
> 
> Signed-off-by: Logan Gunthorpe 
> ---
>  drivers/pci/p2pdma.c | 40 +++-
>  1 file changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index fe647bd8f947..010aa8742bec 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -19,10 +19,19 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +
> +enum pci_p2pdma_map_type {
> + PCI_P2PDMA_MAP_UNKNOWN = 0,
> + PCI_P2PDMA_MAP_NOT_SUPPORTED,
> + PCI_P2PDMA_MAP_BUS_ADDR,
> + PCI_P2PDMA_MAP_THRU_IOMMU,
> +};

So here we add a new enum for the map type, but for the internal code
the previousloading of the distance is kept, which seems a little
strange.

> + if (!(dist & P2PDMA_THRU_HOST_BRIDGE)) {
> + map_type = PCI_P2PDMA_MAP_BUS_ADDR;
> + goto store_map_type_and_return;
> + }
> +
> + if (host_bridge_whitelist(provider, client)) {
> + map_type = PCI_P2PDMA_MAP_THRU_IOMMU;
> + } else {
> + dist |= P2PDMA_NOT_SUPPORTED;
> + map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
> + }
>  
> +store_map_type_and_return:

Why not:

if (dist & P2PDMA_THRU_HOST_BRIDGE) {
if (host_bridge_whitelist(provider, client)) {
map_type = PCI_P2PDMA_MAP_THRU_IOMMU;
} else {
dist |= P2PDMA_NOT_SUPPORTED;
map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
}
}

Re: [PATCH v8 13/14] KVM/x86/vPMU: check the lbr feature before entering guest

2019-08-06 Thread Wei Wang


On 08/06/2019 03:16 PM, Wei Wang wrote:

The guest can access the lbr related msrs only when the vcpu's lbr event
has been assigned the lbr feature. A cpu pinned lbr event (though no such
event usages in the current upstream kernel) could reclaim the lbr feature
from the vcpu's lbr event (task pinned) via ipi calls. If the cpu is
running in the non-root mode, this will cause the cpu to vm-exit to handle
the host ipi and then vm-entry back to the guest. So on vm-entry (where
interrupt has been disabled), we double confirm that the vcpu's lbr event
is still assigned the lbr feature via checking event->oncpu.

The pass-through of the lbr related msrs will be cancelled if the lbr is
reclaimed, and the following guest accesses to the lbr related msrs will
vm-exit to the related msr emulation handler in kvm, which will prevent
the accesses.

Signed-off-by: Wei Wang 
---
  arch/x86/kvm/pmu.c   |  6 ++
  arch/x86/kvm/pmu.h   |  3 +++
  arch/x86/kvm/vmx/pmu_intel.c | 35 +++
  arch/x86/kvm/x86.c   | 13 +
  4 files changed, 57 insertions(+)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index afad092..ed10a57 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -339,6 +339,12 @@ bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu)
return false;
  }
  
+void kvm_pmu_enabled_feature_confirm(struct kvm_vcpu *vcpu)

+{
+   if (kvm_x86_ops->pmu_ops->enabled_feature_confirm)
+   kvm_x86_ops->pmu_ops->enabled_feature_confirm(vcpu);
+}
+
  void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
  {
if (lapic_in_kernel(vcpu))
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index f875721..7467907 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -30,6 +30,7 @@ struct kvm_pmu_ops {
int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
bool (*lbr_enable)(struct kvm_vcpu *vcpu);
+   void (*enabled_feature_confirm)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
@@ -126,6 +127,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void 
__user *argp);
  
  bool is_vmware_backdoor_pmc(u32 pmc_idx);
  
+void kvm_pmu_enabled_feature_confirm(struct kvm_vcpu *vcpu);

+
  extern struct kvm_pmu_ops intel_pmu_ops;
  extern struct kvm_pmu_ops amd_pmu_ops;
  #endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5580f1a..421051aa 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -781,6 +781,40 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
intel_pmu_free_lbr_event(vcpu);
  }
  
+void intel_pmu_lbr_confirm(struct kvm_vcpu *vcpu)

+{
+   struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
+   /*
+* Either lbr_event being NULL or lbr_used being false indicates that
+* the lbr msrs haven't been passed through to the guest, so no need
+* to cancel passthrough.
+*/
+   if (!pmu->lbr_event || !pmu->lbr_used)
+   return;
+
+   /*
+* The lbr feature gets reclaimed via IPI calls, so checking of
+* lbr_event->oncpu needs to be in an atomic context. Just confirm
+* that irq has been disabled already.
+*/
+   lockdep_assert_irqs_disabled();
+
+   /*
+* Cancel the pass-through of the lbr msrs if lbr has been reclaimed
+* by the host perf.
+*/
+   if (pmu->lbr_event->oncpu != -1) {


A mistake here,  should be "pmu->lbr_event->oncpu == -1".
(It didn't seem to affect the profiling result, but generated
more vm-exits due to mistakenly cancelling the passthrough)

Best,
Wei

Re: [PATCH v2 03/14] PCI/P2PDMA: Add constants for not-supported result upstream_bridge_distance()

2019-08-06 Thread Christoph Hellwig

On Tue, Jul 30, 2019 at 10:35:34AM -0600, Logan Gunthorpe wrote:
> Add constant flags to indicate two devices are not supported or whether
> the data path goes through the host bridge instead of using the negative
> values -1 and -2.
> 
> This helps annotate the code better, but the main reason is so we
> can use the information to store the required mapping method in an
> xarray.
> 
> Signed-off-by: Logan Gunthorpe 
> Reviewed-by: Christian König 

Is there really no way to keep the distance separate from the type of
the connection as I requested?  I think that would avoid a lot of
confusion down the road.

Re: [PATCH v5 0/3] Merge m25p80 into spi-nor

2019-08-06 Thread Tudor.Ambarus



On 08/06/2019 08:10 AM, Vignesh Raghavendra wrote:
> External E-Mail
> 
> 
> This is repost of patch 6 and 7 split from from Boris Brezillon's X-X-X
> mode support series[1]
> 
> Background from cover letter for RFC[1]:
> m25p80 is just a simple SPI NOR controller driver (a wrapper around the
> SPI mem API). Not only it shouldn't be named after a specific SPI NOR
> chip, but it also doesn't deserve a specific driver IMO, especially if
> the end goal is to get rid of SPI NOR controller drivers found in
> drivers/mtd/spi-nor/ and replace them by SPI mem drivers (which would
> be placed in drivers/spi/). With this solution, we declare the SPI NOR
> driver as a spi_mem_driver, just like the SPI NAND layer is declared as
> a spi_mem driver (patch 1/2).
> This solution also allows us to check at a fined-grain level (thanks to
> the spi_mem_supports_op() function) which operations are supported and
> which ones are not, while the original m25p80 logic was basing this
> decision on the SPI_{RX,TX}_{DUAL,QUAD,OCTO} flags only (patch 2/2).
> 
> [1] https://patchwork.ozlabs.org/cover/982926/
> 
> Tested on TI' DRA7xx EVM with TI QSPI controller (a spi-mem driver) with
> DMA (s25fl256 and mx66l51235l) flash. I don't see any performance
> regression due to bounce buffer copy introduced by this series
> Also tested with cadence-quadspi (a spi-nor driver) driver
> 
> Boris Brezillon (2):
>   mtd: spi-nor: Move m25p80 code in spi-nor.c
>   mtd: spi-nor: Rework hwcaps selection for the spi-mem case
> 
> Vignesh Raghavendra (1):
>   mtd: spi-nor: always use bounce buffer for register read/writes
> 
>  drivers/mtd/devices/Kconfig   |  18 -
>  drivers/mtd/devices/Makefile  |   1 -
>  drivers/mtd/devices/m25p80.c  | 347 ---
>  drivers/mtd/spi-nor/Kconfig   |   2 +
>  drivers/mtd/spi-nor/spi-nor.c | 814 +++---
>  include/linux/mtd/spi-nor.h   |  24 +-
>  6 files changed, 777 insertions(+), 429 deletions(-)
>  delete mode 100644 drivers/mtd/devices/m25p80.c
> 

The patches are looking good. I'll be out of office starting today and will
return on Monday. I'll let the 0day bot run its tests and then I will do some
tests on a flash or two. I intend to apply your patches on Monday.

Thanks, Vignesh!
ta

Re: [PATCH v5 3/3] mtd: spi-nor: Rework hwcaps selection for the spi-mem case

2019-08-06 Thread Tudor.Ambarus



On 08/06/2019 08:10 AM, Vignesh Raghavendra wrote:
> +static int spi_nor_spimem_check_op(struct spi_nor *nor,
> +struct spi_mem_op *op)
> +{
> + /*
> +  * First test with 4 address bytes. The opcode itself might
> +  * be a 3B addressing opcode but we don't care, because
> +  * SPI controller implementation should not check the opcode,
> +  * but just the sequence.
> +  */
> + op->addr.nbytes = 4;
> + if (!spi_mem_supports_op(nor->spimem, op)) {
> + /* If flash size <16MB, 3 address bytes are sufficient */
> + if (nor->mtd.size <= SZ_16M) {
> + op->addr.nbytes = 3;
> + if (!spi_mem_supports_op(nor->spimem, op))
> + return -ENOTSUPP;
> + return 0;
> + }
> + return -ENOTSUPP;
> + }
> +
> + return 0;
> +}

We can get rid of a level of indentation by writing it as:

static int spi_nor_spimem_check_op(struct spi_nor *nor,
   struct spi_mem_op *op)
{
op->addr.nbytes = 4;
if (!spi_mem_supports_op(nor->spimem, op)) {
if (nor->mtd.size > SZ_16M)
return -ENOTSUPP;

/* If flash size <16MB, 3 address bytes are sufficient */
op->addr.nbytes = 3;
if (!spi_mem_supports_op(nor->spimem, op))
return -ENOTSUPP;
}

return 0;
}

I'll do this by myself when applying, no need to resubmit.

Thanks, Vignesh!
ta

Re: [PATCH 5.2 073/131] dma-direct: correct the physical addr in dma_direct_sync_sg_for_cpu/device

2019-08-06 Thread Christoph Hellwig

On Tue, Aug 06, 2019 at 06:04:48PM -0400, Sasha Levin wrote:
> On Tue, Aug 06, 2019 at 01:57:56PM +0100, Robin Murphy wrote:
>> Given that the two commits touch entirely separate files I'm not sure what 
>> the imagined dependency could be :/
>
>> From the commit message of 3de433c5b38a ("drm/msm: Use the correct
> dma_sync calls in msm_gem"):
>
>Fixes the combination of two patches:
>
>Fixes: 0036bc73ccbe (drm/msm: stop abusing dma_map/unmap for cache)
>Fixes: 449fa54d6815 (dma-direct: correct the physical addr in 
> dma_direct_sync_sg_for_cpu/device)
>
>> 0036bc73ccbe is indeed not a fix (frankly I'm not convinced it's even a 
>> valid change at all) but even conceptually it bears no relation whatsoever 
>> to the genuine bug fixed by 449fa54d6815.
>
> Given that Rob Clark asked me to drop 0036bc73ccbe not because it's
> irrelevant but because it's potentially dangerous, I did not feel
> confident enough ignoring the statement in the commit message and
> dropped this patch instead.

449fa54d6815 fixes swiotlb misbehaving vs the API spec for the call,
something that real users on x86 cought.  Robs fix works around the
fact that msm is badly abusing dma API.  So even if both are genuine
bugs it is pretty clear we need to decide the match for the proper
users of the API and not the single abuser.

Re: [PATCH] riscv: kbuild: add virtual memory system selection

2019-08-06 Thread Christoph Hellwig

On Tue, Aug 06, 2019 at 05:02:03PM -0700, Paul Walmsley wrote:
> The rationale is to encourage others to start laying the groundwork for 
> future Sv48 support.  The immediate trigger for it was Alex's mmap 
> randomization support patch series, which needs to set some Kconfig 
> options differently depending on the selection of Sv32/39/48.  

Writing a formal todo list is much better encouragement than adding
dead code.  Th latter has a tendency of lingering around forever and
actually hurting people.

> 
> > but actively harmful, which is even worse.
> 
> Reflecting on this assertion, the only case that I could come up with is 
> that randconfig or allyesconfig build testing could fail.  Is this the 
> case that you're thinking of, or is there a different one?  If that's the 
> one, I do agree that it would be best to avoid this case, and it looks 
> like there's no obvious way to work around that issue.

randconfig or just a user thinking bigger is better and picking it.

> > Even if we assume we want to implement Sv48 eventually (which seems
> > to be a bit off), we need to make this a runtime choice and not a
> > compile time one to not balloon the number of configs that distributions
> > (and kernel developers) need to support.
> 
> The expectation is that kernels that support multiple virtual memory 
> system modes at runtime will probably incur either a performance or a 
> memory layout penalty for doing so.  So performance-sensitive embedded 
> applications will select only the model that they use, while distribution 
> kernels will likely take the performance hit for broader single-kernel 
> support.

Even if we want to support Sv39 only or Sv39+Sv39 the choice in the
patch doesn't make any sense.  So better do the whole thing when its
ready than doing false "groundwork".

[PATCH 3/9] remoteproc: qcom: pas: Update IMEM PIL info on load

2019-08-06 Thread Bjorn Andersson

Use the sysmon_name as identifier and store the relocated base address
and size of the memory region in the PIL reloation info structure in
IMEM.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/Kconfig |  1 +
 drivers/remoteproc/qcom_q6v5_pas.c | 15 ---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index b88d74632d39..2aa0743fc05b 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -136,6 +136,7 @@ config QCOM_Q6V5_PAS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
select MFD_SYSCON
+   select QCOM_PIL_INFO
select QCOM_MDT_LOADER
select QCOM_Q6V5_COMMON
select QCOM_RPROC_COMMON
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index db4b3c4bacd7..bfb622d36cb3 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -23,6 +23,7 @@
 #include 
 
 #include "qcom_common.h"
+#include "qcom_pil_info.h"
 #include "qcom_q6v5.h"
 #include "remoteproc_internal.h"
 
@@ -52,6 +53,7 @@ struct qcom_adsp {
int pas_id;
int crash_reason_smem;
bool has_aggre2_clk;
+   const char *info_name;
 
struct completion start_done;
struct completion stop_done;
@@ -70,11 +72,17 @@ struct qcom_adsp {
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
 {
struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
+   int ret;
 
-   return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
-adsp->mem_region, adsp->mem_phys, adsp->mem_size,
-&adsp->mem_reloc);
+   ret = qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
+   adsp->mem_region, adsp->mem_phys, adsp->mem_size,
+   &adsp->mem_reloc);
+   if (ret)
+   return ret;
 
+   qcom_pil_info_store(adsp->info_name, adsp->mem_reloc, adsp->mem_size);
+
+   return 0;
 }
 
 static int adsp_start(struct rproc *rproc)
@@ -278,6 +286,7 @@ static int adsp_probe(struct platform_device *pdev)
adsp->rproc = rproc;
adsp->pas_id = desc->pas_id;
adsp->has_aggre2_clk = desc->has_aggre2_clk;
+   adsp->info_name = desc->sysmon_name;
platform_set_drvdata(pdev, adsp);
 
ret = adsp_alloc_memory_region(adsp);
-- 
2.18.0

[PATCH 9/9] remoteproc: qcom: Introduce panic handler for PAS and ADSP

2019-08-06 Thread Bjorn Andersson

Make the PAS and ADSP/CDSP remoteproc drivers implement the panic
handler that will invoke a stop to prepare the remoteprocs for post
mortem debugging.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/qcom_q6v5_adsp.c | 8 
 drivers/remoteproc/qcom_q6v5_pas.c  | 8 
 2 files changed, 16 insertions(+)

diff --git a/drivers/remoteproc/qcom_q6v5_adsp.c 
b/drivers/remoteproc/qcom_q6v5_adsp.c
index e953886b2eb7..3de1683903db 100644
--- a/drivers/remoteproc/qcom_q6v5_adsp.c
+++ b/drivers/remoteproc/qcom_q6v5_adsp.c
@@ -282,12 +282,20 @@ static void *adsp_da_to_va(struct rproc *rproc, u64 da, 
int len)
return adsp->mem_region + offset;
 }
 
+static void adsp_panic(struct rproc *rproc)
+{
+   struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
+
+   qcom_q6v5_panic(&adsp->q6v5);
+}
+
 static const struct rproc_ops adsp_ops = {
.start = adsp_start,
.stop = adsp_stop,
.da_to_va = adsp_da_to_va,
.parse_fw = qcom_register_dump_segments,
.load = adsp_load,
+   .panic = adsp_panic,
 };
 
 static int adsp_init_clock(struct qcom_adsp *adsp, const char **clk_ids)
diff --git a/drivers/remoteproc/qcom_q6v5_pas.c 
b/drivers/remoteproc/qcom_q6v5_pas.c
index bfb622d36cb3..31ff09bcd3ee 100644
--- a/drivers/remoteproc/qcom_q6v5_pas.c
+++ b/drivers/remoteproc/qcom_q6v5_pas.c
@@ -179,12 +179,20 @@ static void *adsp_da_to_va(struct rproc *rproc, u64 da, 
int len)
return adsp->mem_region + offset;
 }
 
+static void adsp_panic(struct rproc *rproc)
+{
+   struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
+
+   qcom_q6v5_panic(&adsp->q6v5);
+}
+
 static const struct rproc_ops adsp_ops = {
.start = adsp_start,
.stop = adsp_stop,
.da_to_va = adsp_da_to_va,
.parse_fw = qcom_register_dump_segments,
.load = adsp_load,
+   .panic = adsp_panic,
 };
 
 static int adsp_init_clock(struct qcom_adsp *adsp)
-- 
2.18.0

[PATCH 5/9] arm64: dts: qcom: qcs404: Add IMEM and PIL info region

2019-08-06 Thread Bjorn Andersson

Add a simple-mfd representing IMEM on QCS404 and define the PIL
relocation info region, so that post mortem tools will be able to locate
the loaded remoteprocs.

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/qcs404.dtsi | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi 
b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index 3d0789775009..1604a9697832 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -845,6 +845,16 @@
status = "disabled";
};
 
+   imem@860 {
+   compatible = "syscon", "simple-mfd";
+   reg = <0x0860 0x1000>;
+
+   pil-reloc {
+   compatible ="qcom,pil-reloc-info";
+   offset = <0x94c>;
+   };
+   };
+
intc: interrupt-controller@b00 {
compatible = "qcom,msm-qgic2";
interrupt-controller;
-- 
2.18.0

[PATCH 7/9] remoteproc: Introduce "panic" callback in ops

2019-08-06 Thread Bjorn Andersson

Introduce a "panic" function in the remoteproc ops table, to allow
remoteproc instances to perform operations needed in order to aid in
post mortem system debugging, such as flushing caches etc, when the
kernel panics.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/remoteproc_core.c | 16 
 include/linux/remoteproc.h   |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index 3c5fbbbfb0f1..cc47797c6496 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1833,6 +1833,16 @@ void rproc_shutdown(struct rproc *rproc)
 }
 EXPORT_SYMBOL(rproc_shutdown);
 
+static int rproc_panic_handler(struct notifier_block *nb, unsigned long event,
+  void *ptr)
+{
+   struct rproc *rproc = container_of(nb, struct rproc, panic_nb);
+
+   rproc->ops->panic(rproc);
+
+   return NOTIFY_DONE;
+}
+
 /**
  * rproc_get_by_phandle() - find a remote processor by phandle
  * @phandle: phandle to the rproc
@@ -2058,6 +2068,12 @@ struct rproc *rproc_alloc(struct device *dev, const char 
*name,
rproc->ops->get_boot_addr = rproc_elf_get_boot_addr;
}
 
+   /* Register panic notifier for remoteprocs with "panic" callback */
+   if (rproc->ops->panic) {
+   rproc->panic_nb.notifier_call = rproc_panic_handler;
+   atomic_notifier_chain_register(&panic_notifier_list, 
&rproc->panic_nb);
+   }
+
mutex_init(&rproc->lock);
 
idr_init(&rproc->notifyids);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 16ad66683ad0..33553f6d8ff0 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -383,6 +383,7 @@ struct rproc_ops {
int (*load)(struct rproc *rproc, const struct firmware *fw);
int (*sanity_check)(struct rproc *rproc, const struct firmware *fw);
u32 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw);
+   void (*panic)(struct rproc *rproc);
 };
 
 /**
@@ -481,6 +482,7 @@ struct rproc_dump_segment {
  * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
  * @nb_vdev: number of vdev currently handled by rproc
+ * @panic_nb: notifier_block for remoteproc's panic handler
  */
 struct rproc {
struct list_head node;
@@ -514,6 +516,7 @@ struct rproc {
bool auto_boot;
struct list_head dump_segments;
int nb_vdev;
+   struct notifier_block panic_nb;
 };
 
 /**
-- 
2.18.0

[PATCH 1/9] remoteproc: qcom: Introduce driver to store pil info in IMEM

2019-08-06 Thread Bjorn Andersson

A region in IMEM is used to communicate load addresses of remoteproc to
post mortem debug tools. Implement a driver that can be used to store
this information in order to enable these tools to process collected
ramdumps.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/Kconfig |   3 +
 drivers/remoteproc/Makefile|   1 +
 drivers/remoteproc/qcom_pil_info.c | 139 +
 drivers/remoteproc/qcom_pil_info.h |   6 ++
 4 files changed, 149 insertions(+)
 create mode 100644 drivers/remoteproc/qcom_pil_info.c
 create mode 100644 drivers/remoteproc/qcom_pil_info.h

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 28ed306982f7..3984bd16e670 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -85,6 +85,9 @@ config KEYSTONE_REMOTEPROC
  It's safe to say N here if you're not interested in the Keystone
  DSPs or just want to use a bare minimum kernel.
 
+config QCOM_PIL_INFO
+   tristate
+
 config QCOM_RPROC_COMMON
tristate
 
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 00f09e658cb3..c1b46e9033cb 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_OMAP_REMOTEPROC) += omap_remoteproc.o
 obj-$(CONFIG_WKUP_M3_RPROC)+= wkup_m3_rproc.o
 obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o
 obj-$(CONFIG_KEYSTONE_REMOTEPROC)  += keystone_remoteproc.o
+obj-$(CONFIG_QCOM_PIL_INFO)+= qcom_pil_info.o
 obj-$(CONFIG_QCOM_RPROC_COMMON)+= qcom_common.o
 obj-$(CONFIG_QCOM_Q6V5_COMMON) += qcom_q6v5.o
 obj-$(CONFIG_QCOM_Q6V5_ADSP)   += qcom_q6v5_adsp.o
diff --git a/drivers/remoteproc/qcom_pil_info.c 
b/drivers/remoteproc/qcom_pil_info.c
new file mode 100644
index ..aa42732016f3
--- /dev/null
+++ b/drivers/remoteproc/qcom_pil_info.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019 Linaro Ltd.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct pil_reloc_entry {
+   char name[8];
+   __le64 base;
+   __le32 size;
+} __packed;
+
+#define PIL_INFO_SIZE  200
+#define PIL_INFO_ENTRIES (PIL_INFO_SIZE / sizeof(struct pil_reloc_entry))
+
+struct pil_reloc {
+   struct device *dev;
+   struct regmap *map;
+   u32 offset;
+   int val_bytes;
+
+   struct pil_reloc_entry entries[PIL_INFO_ENTRIES];
+};
+
+static struct pil_reloc *_reloc;
+static DEFINE_MUTEX(reloc_mutex);
+
+/**
+ * qcom_pil_info_store() - store PIL information of image in IMEM
+ * @image: name of the image
+ * @base:  base address of the loaded image
+ * @size:  size of the loaded image
+ */
+void qcom_pil_info_store(const char *image, phys_addr_t base, size_t size)
+{
+   struct pil_reloc_entry *entry;
+   int idx = -1;
+   int i;
+
+   mutex_lock(&reloc_mutex);
+   if (!_reloc)
+   goto unlock;
+
+   for (i = 0; i < PIL_INFO_ENTRIES; i++) {
+   if (!_reloc->entries[i].name[0]) {
+   if (idx == -1)
+   idx = i;
+   continue;
+   }
+
+   if (!strncmp(_reloc->entries[i].name, image, 8)) {
+   idx = i;
+   goto found;
+   }
+   }
+
+   if (idx) {
+   dev_warn(_reloc->dev, "insufficient PIL info slots\n");
+   goto unlock;
+   }
+
+found:
+   entry = &_reloc->entries[idx];
+   stracpy(entry->name, image);
+   entry->base = base;
+   entry->size = size;
+
+   regmap_bulk_write(_reloc->map, _reloc->offset + idx * sizeof(*entry),
+ entry, sizeof(*entry) / _reloc->val_bytes);
+
+unlock:
+   mutex_unlock(&reloc_mutex);
+}
+EXPORT_SYMBOL_GPL(qcom_pil_info_store);
+
+static int pil_reloc_probe(struct platform_device *pdev)
+{
+   struct pil_reloc *reloc;
+
+   reloc = devm_kzalloc(&pdev->dev, sizeof(*reloc), GFP_KERNEL);
+   if (!reloc)
+   return -ENOMEM;
+
+   reloc->dev = &pdev->dev;
+   reloc->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
+   if (IS_ERR(reloc->map))
+   return PTR_ERR(reloc->map);
+
+   if (of_property_read_u32(pdev->dev.of_node, "offset", &reloc->offset))
+   return -EINVAL;
+
+   reloc->val_bytes = regmap_get_val_bytes(reloc->map);
+   if (reloc->val_bytes < 0)
+   return -EINVAL;
+
+   regmap_bulk_write(reloc->map, reloc->offset, reloc->entries,
+ sizeof(reloc->entries) / reloc->val_bytes);
+
+   mutex_lock(&reloc_mutex);
+   _reloc = reloc;
+   mutex_unlock(&reloc_mutex);
+
+   return 0;
+}
+
+static int pil_reloc_remove(struct platform_device *pdev)
+{
+   mutex_lock(&reloc_mutex);
+   _reloc = NULL;
+   mutex_unlock(&reloc_mu

[PATCH 8/9] remoteproc: qcom: q6v5: Add common panic handler

2019-08-06 Thread Bjorn Andersson

Add a common panic handler that invokes a stop request and sleep enough
to let the remoteproc flush it's caches etc in order to aid post mortem
debugging.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/qcom_q6v5.c | 19 +++
 drivers/remoteproc/qcom_q6v5.h |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c
index 0d33e3079f0d..0aebae893362 100644
--- a/drivers/remoteproc/qcom_q6v5.c
+++ b/drivers/remoteproc/qcom_q6v5.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2014 Sony Mobile Communications AB
  * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
  */
+#include 
 #include 
 #include 
 #include 
@@ -15,6 +16,8 @@
 #include 
 #include "qcom_q6v5.h"
 
+#define Q6V5_PANIC_DELAY_MS200
+
 /**
  * qcom_q6v5_prepare() - reinitialize the qcom_q6v5 context before start
  * @q6v5:  reference to qcom_q6v5 context to be reinitialized
@@ -162,6 +165,22 @@ int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5)
 }
 EXPORT_SYMBOL_GPL(qcom_q6v5_request_stop);
 
+/**
+ * qcom_q6v5_panic() - panic handler to invoke a stop on the remote
+ * @q6v5:  reference to qcom_q6v5 context
+ *
+ * Set the stop bit and sleep in order to allow the remote processor to flush
+ * its caches etc for post mortem debugging.
+ */
+void qcom_q6v5_panic(struct qcom_q6v5 *q6v5)
+{
+   qcom_smem_state_update_bits(q6v5->state,
+   BIT(q6v5->stop_bit), BIT(q6v5->stop_bit));
+
+   mdelay(Q6V5_PANIC_DELAY_MS);
+}
+EXPORT_SYMBOL_GPL(qcom_q6v5_panic);
+
 /**
  * qcom_q6v5_init() - initializer of the q6v5 common struct
  * @q6v5:  handle to be initialized
diff --git a/drivers/remoteproc/qcom_q6v5.h b/drivers/remoteproc/qcom_q6v5.h
index 7ac92c1e0f49..c37e6fd063e4 100644
--- a/drivers/remoteproc/qcom_q6v5.h
+++ b/drivers/remoteproc/qcom_q6v5.h
@@ -42,5 +42,6 @@ int qcom_q6v5_prepare(struct qcom_q6v5 *q6v5);
 int qcom_q6v5_unprepare(struct qcom_q6v5 *q6v5);
 int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5);
 int qcom_q6v5_wait_for_start(struct qcom_q6v5 *q6v5, int timeout);
+void qcom_q6v5_panic(struct qcom_q6v5 *q6v5);
 
 #endif
-- 
2.18.0

[PATCH 4/9] remoteproc: qcom: wcnss: Update IMEM PIL info on load

2019-08-06 Thread Bjorn Andersson

Store the relocated base address and size in the PIL relocation info
structure in IMEM.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/Kconfig  |  1 +
 drivers/remoteproc/qcom_wcnss.c | 14 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 2aa0743fc05b..3f976ce3df3c 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -185,6 +185,7 @@ config QCOM_WCNSS_PIL
depends on QCOM_SMEM
depends on QCOM_SYSMON || QCOM_SYSMON=n
select QCOM_MDT_LOADER
+   select QCOM_PIL_INFO
select QCOM_RPROC_COMMON
select QCOM_SCM
help
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index dc135754bb9c..9db9a3d25af4 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -27,6 +27,7 @@
 
 #include "qcom_common.h"
 #include "remoteproc_internal.h"
+#include "qcom_pil_info.h"
 #include "qcom_wcnss.h"
 
 #define WCNSS_CRASH_REASON_SMEM422
@@ -145,10 +146,17 @@ void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss,
 static int wcnss_load(struct rproc *rproc, const struct firmware *fw)
 {
struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
+   int ret;
+
+   ret = qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID,
+   wcnss->mem_region, wcnss->mem_phys,
+   wcnss->mem_size, &wcnss->mem_reloc);
+   if (ret)
+   return ret;
+
+   qcom_pil_info_store("wcnss", wcnss->mem_reloc, wcnss->mem_size);
 
-   return qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID,
-wcnss->mem_region, wcnss->mem_phys,
-wcnss->mem_size, &wcnss->mem_reloc);
+   return 0;
 }
 
 static void wcnss_indicate_nv_download(struct qcom_wcnss *wcnss)
-- 
2.18.0

[PATCH 6/9] arm64: dts: qcom: sdm845: Add IMEM and PIL info region

2019-08-06 Thread Bjorn Andersson

Add a simple-mfd representing IMEM on SDM845 and define the PIL
relocation info region, so that post mortem tools will be able to locate
the loaded remoteprocs.

Signed-off-by: Bjorn Andersson 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index c8ebe21f7673..38a6b304dba3 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2456,6 +2456,16 @@
cell-index = <0>;
};
 
+   imem@146bf000 {
+   compatible = "syscon", "simple-mfd";
+   reg = <0 0x146bf000 0 0x1000>;
+
+   pil-reloc {
+   compatible ="qcom,pil-reloc-info";
+   offset = <0x94c>;
+   };
+   };
+
apps_smmu: iommu@1500 {
compatible = "qcom,sdm845-smmu-500", "arm,mmu-500";
reg = <0 0x1500 0 0x8>;
-- 
2.18.0

[PATCH 0/9] remoteproc: qcom: post mortem debug support

2019-08-06 Thread Bjorn Andersson

The following series introduces two components that aids in post mortem
debugging of Qualcomm systems. The first part is used to store information
about loaded images in IMEM, for post mortem tools to know where the kernel
loaded the remoteproc firmware. The second part invokes a stop operation on the
remoteprocs during a kernel panic, in order to trigger them to flush caches
etc.

Bjorn Andersson (9):
  remoteproc: qcom: Introduce driver to store pil info in IMEM
  remoteproc: qcom: mss: Update IMEM PIL info on load
  remoteproc: qcom: pas: Update IMEM PIL info on load
  remoteproc: qcom: wcnss: Update IMEM PIL info on load
  arm64: dts: qcom: qcs404: Add IMEM and PIL info region
  arm64: dts: qcom: sdm845: Add IMEM and PIL info region
  remoteproc: Introduce "panic" callback in ops
  remoteproc: qcom: q6v5: Add common panic handler
  remoteproc: qcom: Introduce panic handler for PAS and ADSP

 arch/arm64/boot/dts/qcom/qcs404.dtsi |  10 ++
 arch/arm64/boot/dts/qcom/sdm845.dtsi |  10 ++
 drivers/remoteproc/Kconfig   |   6 ++
 drivers/remoteproc/Makefile  |   1 +
 drivers/remoteproc/qcom_pil_info.c   | 139 +++
 drivers/remoteproc/qcom_pil_info.h   |   6 ++
 drivers/remoteproc/qcom_q6v5.c   |  19 
 drivers/remoteproc/qcom_q6v5.h   |   1 +
 drivers/remoteproc/qcom_q6v5_adsp.c  |   8 ++
 drivers/remoteproc/qcom_q6v5_mss.c   |   3 +
 drivers/remoteproc/qcom_q6v5_pas.c   |  23 -
 drivers/remoteproc/qcom_wcnss.c  |  14 ++-
 drivers/remoteproc/remoteproc_core.c |  16 +++
 include/linux/remoteproc.h   |   3 +
 14 files changed, 253 insertions(+), 6 deletions(-)
 create mode 100644 drivers/remoteproc/qcom_pil_info.c
 create mode 100644 drivers/remoteproc/qcom_pil_info.h

-- 
2.18.0

[PATCH 2/9] remoteproc: qcom: mss: Update IMEM PIL info on load

2019-08-06 Thread Bjorn Andersson

As the MPSS address is calculated during load store it, and the size, in
the PIL info region structure in IMEM.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/Kconfig | 1 +
 drivers/remoteproc/qcom_q6v5_mss.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 3984bd16e670..b88d74632d39 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -119,6 +119,7 @@ config QCOM_Q6V5_MSS
depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
depends on QCOM_SYSMON || QCOM_SYSMON=n
select MFD_SYSCON
+   select QCOM_PIL_INFO
select QCOM_MDT_LOADER
select QCOM_Q6V5_COMMON
select QCOM_RPROC_COMMON
diff --git a/drivers/remoteproc/qcom_q6v5_mss.c 
b/drivers/remoteproc/qcom_q6v5_mss.c
index 8fcf9d28dd73..d9192fa40e05 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -28,6 +28,7 @@
 
 #include "remoteproc_internal.h"
 #include "qcom_common.h"
+#include "qcom_pil_info.h"
 #include "qcom_q6v5.h"
 
 #include 
@@ -1040,6 +1041,8 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
else if (ret < 0)
dev_err(qproc->dev, "MPSS authentication failed: %d\n", ret);
 
+   qcom_pil_info_store("modem", mpss_reloc, qproc->mpss_size);
+
 release_firmware:
release_firmware(fw);
 out:
-- 
2.18.0

Re: [PATCH] ALSA: usb-midi: fix a memory leak bug

2019-08-06 Thread Wenwen Wang

On Wed, Aug 7, 2019 at 1:31 AM Takashi Iwai  wrote:
>
> On Wed, 07 Aug 2019 05:22:09 +0200,
> Wenwen Wang wrote:
> >
> > In __snd_usbmidi_create(), a MIDI streaming interface structure is
> > allocated through kzalloc() and the pointer is saved to 'umidi'. Later on,
> > the endpoint structures are created by invoking
> > snd_usbmidi_create_endpoints_midiman() or snd_usbmidi_create_endpoints(),
> > depending on the type of the audio quirk type. However, if the creation
> > fails, the allocated 'umidi' is not deallocated, leading to a memory leak
> > bug.
> >
> > To fix the above issue, free 'umidi' before returning the error.
> >
> > Signed-off-by: Wenwen Wang 
>
> It's again a false-positive report.  The object is released
> automatically by the destructor of its base snd_rawmidi object.

Thanks for your response! Sorry for the false positives. :(

Wenwen

Re: [PATCH] ALSA: usb-midi: fix a memory leak bug

2019-08-06 Thread Takashi Iwai

On Wed, 07 Aug 2019 05:22:09 +0200,
Wenwen Wang wrote:
> 
> In __snd_usbmidi_create(), a MIDI streaming interface structure is
> allocated through kzalloc() and the pointer is saved to 'umidi'. Later on,
> the endpoint structures are created by invoking
> snd_usbmidi_create_endpoints_midiman() or snd_usbmidi_create_endpoints(),
> depending on the type of the audio quirk type. However, if the creation
> fails, the allocated 'umidi' is not deallocated, leading to a memory leak
> bug.
> 
> To fix the above issue, free 'umidi' before returning the error.
> 
> Signed-off-by: Wenwen Wang 

It's again a false-positive report.  The object is released
automatically by the destructor of its base snd_rawmidi object.


thanks,

Takashi

Re: [PATCH 1/3] mm/migrate: clean up useless code in migrate_vma_collect_pmd()

2019-08-06 Thread Pingfan Liu

On Tue, Aug 06, 2019 at 06:35:03AM -0700, Matthew Wilcox wrote:
> 
> This needs something beyond the subject line.  Maybe ...
> 
> After these assignments, we either restart the loop with a fresh variable,
> or we assign to the variable again without using the value we've assigned.
> 
> Reviewed-by: Matthew Wilcox (Oracle) 
> 
> > goto next;
> > }
> > -   pfn = page_to_pfn(page);
> 
> After you've done all this, as far as I can tell, the 'pfn' variable is
> only used in one arm of the conditions, so it can be moved there.
> 
> ie something like:
> 
> -   unsigned long mpfn, pfn;
> +   unsigned long mpfn;
> ...
> -   pfn = pte_pfn(pte);
> ...
> +   unsigned long pfn = pte_pfn(pte);
> +
> 
This makes code better. Thank you for the suggestion. Will send v2 for
this patch.

Regards,
Pingfan

Re: [PATCH -next] iwlwifi: dbg: work around clang bug by marking debug strings static

2019-08-06 Thread Luciano Coelho

On Tue, 2019-08-06 at 22:15 -0700, Nathan Chancellor wrote:
> On Tue, Aug 06, 2019 at 03:37:42PM -0700, Nick Desaulniers wrote:
> > On Thu, Aug 1, 2019 at 12:11 AM Johannes Berg  
> > wrote:
> > > 
> > > > Luca, you said this was already fixed in your internal tree, and the fix
> > > > would appear soon in next, but I don't see anything in linux-next?
> > > 
> > > Luca is still on vacation, but I just sent out a version of the patch we
> > > had applied internally.
> > > 
> > > Also turns out it wasn't actually _fixed_, just _moved_, so those
> > > internal patches wouldn't have helped anyway.
> > 
> > Thanks for the report. Do you have a link?
> > I'll rebase my patch then.
> > -- 
> > Thanks,
> > ~Nick Desaulniers
> 
> Just for everyone else (since I commented on our issue tracker), this is
> now fixed in Linus's tree as of commit  1f6607250331 ("iwlwifi: dbg_ini:
> fix compile time assert build errors").

Yes, thanks Nathan! I was just digging for this patch to reply to you,
I'm still catching up with what happened during my vacations.

--
Cheers,
Luca.

Re: [PATCH -next] iwlwifi: dbg: work around clang bug by marking debug strings static

2019-08-06 Thread Nathan Chancellor

On Tue, Aug 06, 2019 at 03:37:42PM -0700, Nick Desaulniers wrote:
> On Thu, Aug 1, 2019 at 12:11 AM Johannes Berg  
> wrote:
> >
> >
> > > Luca, you said this was already fixed in your internal tree, and the fix
> > > would appear soon in next, but I don't see anything in linux-next?
> >
> > Luca is still on vacation, but I just sent out a version of the patch we
> > had applied internally.
> >
> > Also turns out it wasn't actually _fixed_, just _moved_, so those
> > internal patches wouldn't have helped anyway.
> 
> Thanks for the report. Do you have a link?
> I'll rebase my patch then.
> -- 
> Thanks,
> ~Nick Desaulniers

Just for everyone else (since I commented on our issue tracker), this is
now fixed in Linus's tree as of commit  1f6607250331 ("iwlwifi: dbg_ini:
fix compile time assert build errors").

Cheers,
Nathan

[PATCH] arm64: mm: add missing PTE_SPECIAL in pte_mkdevmap on arm64

2019-08-06 Thread Jia He

Without this patch, the MAP_SYNC test case will cause a print_bad_pte
warning on arm64 as follows:
[   25.542693] BUG: Bad page map in process mapdax333
pte:2e8000448800f53 pmd:41ff5f003
[   25.546360] page:7e001022 refcount:1 mapcount:-1
mapping:8003e29c7440 index:0x0
[   25.550281] ext4_dax_aops
[   25.550282] name:"__aaabbbcccddd__"
[   25.551553] flags: 0x3001002(referenced|reserved)
[   25.555802] raw: 03001002 8003dfffa908 
8003e29c7440
[   25.559446] raw:   0001fffe

[   25.563075] page dumped because: bad pte
[   25.564938] addr:be05b000 vm_flags:208000fb
anon_vma: mapping:8003e29c7440 index:0
[   25.574272] file:__aaabbbcccddd__ fault:ext4_dax_fault
ap:ext4_file_mmap readpage:0x0
[   25.578799] CPU: 1 PID: 1180 Comm: mapdax333 Not tainted 5.2.0+ #21
[   25.581702] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0
02/06/2015
[   25.585624] Call trace:
[   25.587008]  dump_backtrace+0x0/0x178
[   25.588799]  show_stack+0x24/0x30
[   25.590328]  dump_stack+0xa8/0xcc
[   25.591901]  print_bad_pte+0x18c/0x218
[   25.593628]  unmap_page_range+0x778/0xc00
[   25.595506]  unmap_single_vma+0x94/0xe8
[   25.597304]  unmap_vmas+0x90/0x108
[   25.598901]  unmap_region+0xc0/0x128
[   25.600566]  __do_munmap+0x284/0x3f0
[   25.602245]  __vm_munmap+0x78/0xe0
[   25.603820]  __arm64_sys_munmap+0x34/0x48
[   25.605709]  el0_svc_common.constprop.0+0x78/0x168
[   25.607956]  el0_svc_handler+0x34/0x90
[   25.609698]  el0_svc+0x8/0xc
[   25.611103] Disabling lock debugging due to kernel taint
[   25.613573] BUG: Bad page state in process mapdax333  pfn:448800
[   25.616359] page:7e001022 refcount:0 mapcount:-1
mapping:8003e29c7440 index:0x1
[   25.620236] ext4_dax_aops
[   25.620237] name:"__aaabbbcccddd__"
[   25.621495] flags: 0x300()
[   25.624912] raw: 0300 dead0100 dead0200
8003e29c7440
[   25.628502] raw: 0001  fffe

[   25.632097] page dumped because: non-NULL mapping
[...]
[   25.656567] CPU: 1 PID: 1180 Comm: mapdax333 Tainted: GB
5.2.0+ #21
[   25.660131] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0
02/06/2015
[   25.663324] Call trace:
[   25.664466]  dump_backtrace+0x0/0x178
[   25.666163]  show_stack+0x24/0x30
[   25.667721]  dump_stack+0xa8/0xcc
[   25.669270]  bad_page+0xf0/0x150
[   25.670772]  free_pages_check_bad+0x84/0xa0
[   25.672724]  free_pcppages_bulk+0x45c/0x708
[   25.674675]  free_unref_page_commit+0xcc/0x100
[   25.676751]  free_unref_page_list+0x13c/0x200
[   25.678801]  release_pages+0x350/0x420
[   25.680539]  free_pages_and_swap_cache+0xf8/0x128
[   25.682738]  tlb_flush_mmu+0x164/0x2b0
[   25.684485]  unmap_page_range+0x648/0xc00
[   25.686349]  unmap_single_vma+0x94/0xe8
[   25.688131]  unmap_vmas+0x90/0x108
[   25.689739]  unmap_region+0xc0/0x128
[   25.691392]  __do_munmap+0x284/0x3f0
[   25.693079]  __vm_munmap+0x78/0xe0
[   25.694658]  __arm64_sys_munmap+0x34/0x48
[   25.696530]  el0_svc_common.constprop.0+0x78/0x168
[   25.698772]  el0_svc_handler+0x34/0x90
[   25.700512]  el0_svc+0x8/0xc

The root cause is in _vm_normal_page, without the PTE_SPECIAL bit,
the return value will be incorrectly set to pfn_to_page(pfn) instead
of NULL. Besides, this patch also rewrite the pmd_mkdevmap to avoid
setting PTE_SPECIAL for pmd

The MAP_SYNC test case is as follows(Provided by Yibo Cai)
$#include 
$#include 
$#include 
$#include 
$#include 

$#ifndef MAP_SYNC
$#define MAP_SYNC 0x8
$#endif

/* mount -o dax /dev/pmem0 /mnt */
$#define F "/mnt/__aaabbbcccddd__"

int main(void)
{
int fd;
char buf[4096];
void *addr;

if ((fd = open(F, O_CREAT|O_TRUNC|O_RDWR, 0644)) < 0) {
perror("open1");
return 1;
}

if (write(fd, buf, 4096) != 4096) {
perror("lseek");
return 1;
}

addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_SYNC,
fd, 0);
if (addr == MAP_FAILED) {
perror("mmap");
printf("did you mount with '-o dax'?\n");
return 1;
}

memset(addr, 0x55, 4096);

if (munmap(addr, 4096) == -1) {
perror("munmap");
return 1;
}

close(fd);

return 0;
}

Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support")
Reported-by: Yibo Cai 
Signed-off-by: Jia He 
Acked-by: Robin Murphy 
---
 arch/arm64/include/asm/pgtable.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5fdcfe237338..e09760ece844 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -209,7 +209,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline pte_t pte_mkdevmap(pte_t pte)
 {
-   return set_pte_bit(pte, __pgprot(PTE_DEVMAP));
+   return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL))

[PATCH] tipc: set addr_trail_end when using explicit node addresses

2019-08-06 Thread Chris Packham

When tipc uses auto-generated node addresses it goes through a duplicate
address detection phase to ensure the address is unique.

When using explicitly configured node names the DAD phase is skipped.
However addr_trail_end was being left set to 0 which causes parts of the
tipc state machine to assume that the address is not yet valid and
unnecessarily delays the discovery phase. By setting addr_trail_end to
jiffies when using explicit addresses we ensure that we move straight to
discovery.

Signed-off-by: Chris Packham 
---
 net/tipc/discover.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..f83bfe8c9443 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -361,6 +361,8 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
if (!tipc_own_addr(net)) {
tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+   } else {
+   tn->addr_trial_end = jiffies;
}
memcpy(&d->dest, dest, sizeof(*dest));
d->net = net;
-- 
2.22.0

Re: [PATCH net v2] net: dsa: Check existence of .port_mdb_add callback before calling it

2019-08-06 Thread Vivien Didelot

Hi Chen-Yu,

On Wed, 7 Aug 2019 11:18:28 +0800, Chen-Yu Tsai  wrote:
> On Wed, Aug 7, 2019 at 4:34 AM Vivien Didelot  
> wrote:
> >
> > Hi Chen-Yu,
> >
> > On Wed, 7 Aug 2019 01:49:37 +0800, Chen-Yu Tsai  wrote:
> > > On Wed, Aug 7, 2019 at 1:15 AM Vivien Didelot  
> > > wrote:
> > > >
> > > > Hi Chen-Yu,
> > > >
> > > > On Tue,  6 Aug 2019 15:53:25 +0800, Chen-Yu Tsai  
> > > > wrote:
> > > > > From: Chen-Yu Tsai 
> > > > >
> > > > > With the recent addition of commit 75dad2520fc3 ("net: dsa: b53: 
> > > > > Disable
> > > > > all ports on setup"), users of b53 (BCM53125 on Lamobo R1 in my case)
> > > > > are forced to use the dsa subsystem to enable the switch, instead of
> > > > > having it in the default transparent "forward-to-all" mode.
> > > > >
> > > > > The b53 driver does not support mdb bitmap functions. However the dsa
> > > > > layer does not check for the existence of the .port_mdb_add callback
> > > > > before actually using it. This results in a NULL pointer dereference,
> > > > > as shown in the kernel oops below.
> > > > >
> > > > > The other functions seem to be properly guarded. Do the same for
> > > > > .port_mdb_add in dsa_switch_mdb_add_bitmap() as well.
> > > > >
> > > > > b53 is not the only driver that doesn't support mdb bitmap functions.
> > > > > Others include bcm_sf2, dsa_loop, lantiq_gswip, mt7530, mv88e6060,
> > > > > qca8k, realtek-smi, and vitesse-vsc73xx.
> > > >
> > > > I don't know what you mean by that, there's no "mdb bitmap function"
> > > > support for drivers, only the port_mdb_{prepare,add,del} callbacks...
> > >
> > > The term was coined from commit e6db98db8a95 ("net: dsa: add switch mdb
> > > bitmap functions"). But yeah, .port_mdb_* ops/callbacks would be more
> > > appropriate.
> > >
> > > > > 8<--- cut here ---
> > > > > Unable to handle kernel NULL pointer dereference at virtual 
> > > > > address 
> > > > > pgd = (ptrval)
> > > > > [] *pgd=
> > > > > Internal error: Oops: 8005 [#1] SMP ARM
> > > > > Modules linked in: rtl8xxxu rtl8192cu rtl_usb rtl8192c_common 
> > > > > rtlwifi mac80211 cfg80211
> > > > > CPU: 1 PID: 134 Comm: kworker/1:2 Not tainted 
> > > > > 5.3.0-rc1-00247-gd3519030752a #1
> > > > > Hardware name: Allwinner sun7i (A20) Family
> > > > > Workqueue: events switchdev_deferred_process_work
> > > > > PC is at 0x0
> > > > > LR is at dsa_switch_event+0x570/0x620
> > > > > pc : [<>]lr : []psr: 80070013
> > > > > sp : ee871db8  ip :   fp : ee98d0a4
> > > > > r10: 000c  r9 : 0008  r8 : ee89f710
> > > > > r7 : ee98d040  r6 : ee98d088  r5 : c0f04c48  r4 : ee98d04c
> > > > > r3 :   r2 : ee89f710  r1 : 0008  r0 : ee98d040
> > > > > Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > > Control: 10c5387d  Table: 6deb406a  DAC: 0051
> > > > > Process kworker/1:2 (pid: 134, stack limit = 0x(ptrval))
> > > > > Stack: (0xee871db8 to 0xee872000)
> > > > > 1da0:   
> > > > > ee871e14 103ace2d
> > > > > 1dc0:    ee871e14 0005  
> > > > > c08524a0 
> > > > > 1de0: e000 c014bdfc c0f04c48 ee871e98 c0f04c48 ee9e5000 
> > > > > c0851120 c014bef0
> > > > > 1e00:  b643aea2 ee9b4068 c08509a8 ee2bf940 ee89f710 
> > > > > ee871ecb 
> > > > > 1e20: 0008 103ace2d  c087e248 ee29c868 103ace2d 
> > > > > 0001 
> > > > > 1e40:  ee871e98 0006  c0fb2a50 c087e2d0 
> > > > >  c08523c4
> > > > > 1e60:  c014bdfc 0006 c0fad2d0 ee871e98 ee89f710 
> > > > >  c014c500
> > > > > 1e80:  ee89f3c0 c0f04c48  ee9e5000 c087dfb4 
> > > > > ee9e5000 
> > > > > 1ea0: ee89f710 ee871ecb 0001 103ace2d  c0f04c48 
> > > > >  c087e0a8
> > > > > 1ec0:  efd9a3e0 0089f3c0 103ace2d ee89f700 ee89f710 
> > > > > ee9e5000 0122
> > > > > 1ee0: 0100 c087e130 ee89f700 c0fad2c8 c1003ef0 c087de4c 
> > > > > 2e928000 c0fad2ec
> > > > > 1f00: c0fad2ec ee839580 ef7a62c0 ef7a9400  c087def8 
> > > > > c0fad2ec c01447dc
> > > > > 1f20: ef315640 ef7a62c0 0008 ee839580 ee839594 ef7a62c0 
> > > > > 0008 c0f03d00
> > > > > 1f40: ef7a62d8 ef7a62c0 e000 c0145b84 e000 c0fb2420 
> > > > > c0bfaa8c 
> > > > > 1f60: e000 ee84b600 ee84b5c0  ee87 ee839580 
> > > > > c0145b40 ef0e5ea4
> > > > > 1f80: ee84b61c c014a6f8 0001 ee84b5c0 c014a5b0  
> > > > >  
> > > > > 1fa0:    c01010e8   
> > > > >  
> > > > > 1fc0:       
> > > > >  
> > > > > 1fe0:     0013  
> > > > >  
> > > > > []

Re: [PATCH v2] soc: qcom: socinfo: Annotate switch cases with fall through

2019-08-06 Thread Vaishali Thakkar

On Wed, 7 Aug 2019 at 07:54, Bjorn Andersson  wrote:
>
> Introduce fall through annotations in the switch statements of
> socinfo_debugfs_init() to silence compiler warnings.

Oops, I missed this. Thanks for fixing it!

> Fixes: 9c84c1e78634 ("soc: qcom: socinfo: Expose custom attributes")
> Reported-by: Stephen Rothwell 
> Signed-off-by: Bjorn Andersson 

Acked-by: Vaishali Thakkar 

> ---
>  drivers/soc/qcom/socinfo.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
> index 855353bed19e..a39ea5061dc5 100644
> --- a/drivers/soc/qcom/socinfo.c
> +++ b/drivers/soc/qcom/socinfo.c
> @@ -323,6 +323,7 @@ static void socinfo_debugfs_init(struct qcom_socinfo 
> *qcom_socinfo,
> debugfs_create_x32("raw_device_number", 0400,
>qcom_socinfo->dbg_root,
>&qcom_socinfo->info.raw_device_num);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 11):
> case SOCINFO_VERSION(0, 10):
> case SOCINFO_VERSION(0, 9):
> @@ -330,10 +331,12 @@ static void socinfo_debugfs_init(struct qcom_socinfo 
> *qcom_socinfo,
>
> debugfs_create_u32("foundry_id", 0400, qcom_socinfo->dbg_root,
>&qcom_socinfo->info.foundry_id);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 8):
> case SOCINFO_VERSION(0, 7):
> DEBUGFS_ADD(info, pmic_model);
> DEBUGFS_ADD(info, pmic_die_rev);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 6):
> qcom_socinfo->info.hw_plat_subtype =
> __le32_to_cpu(info->hw_plat_subtype);
> @@ -341,6 +344,7 @@ static void socinfo_debugfs_init(struct qcom_socinfo 
> *qcom_socinfo,
> debugfs_create_u32("hardware_platform_subtype", 0400,
>qcom_socinfo->dbg_root,
>&qcom_socinfo->info.hw_plat_subtype);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 5):
> qcom_socinfo->info.accessory_chip =
> __le32_to_cpu(info->accessory_chip);
> @@ -348,23 +352,27 @@ static void socinfo_debugfs_init(struct qcom_socinfo 
> *qcom_socinfo,
> debugfs_create_u32("accessory_chip", 0400,
>qcom_socinfo->dbg_root,
>&qcom_socinfo->info.accessory_chip);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 4):
> qcom_socinfo->info.plat_ver = __le32_to_cpu(info->plat_ver);
>
> debugfs_create_u32("platform_version", 0400,
>qcom_socinfo->dbg_root,
>&qcom_socinfo->info.plat_ver);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 3):
> qcom_socinfo->info.hw_plat = __le32_to_cpu(info->hw_plat);
>
> debugfs_create_u32("hardware_platform", 0400,
>qcom_socinfo->dbg_root,
>&qcom_socinfo->info.hw_plat);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 2):
> qcom_socinfo->info.raw_ver  = __le32_to_cpu(info->raw_ver);
>
> debugfs_create_u32("raw_version", 0400, 
> qcom_socinfo->dbg_root,
>&qcom_socinfo->info.raw_ver);
> +   /* Fall through */
> case SOCINFO_VERSION(0, 1):
> DEBUGFS_ADD(info, build_id);
> break;
> --
> 2.18.0
>

[PATCH nvmem v2 0/2] nvmem: imx: add i.MX8QM platform support

2019-08-06 Thread fugang . duan

From: Fugang Duan 

The patch set is to add i.MX8QM platform support for i.MX8 SCU
OCOTP driver due to i.MX8QM efuse table has some difference with
i.MX8QXP platform.

V2:
- Add dt-bindings for the new compatible string support.

Fugang Duan (2):
  nvmem: imx: add i.MX8QM platform support
  dt-bindings: fsl: scu: add new compatible string for ocotp

 Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt | 4 +++-
 drivers/nvmem/imx-ocotp-scu.c   | 7 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

-- 
2.7.4

Re: [PATCH v4 2/4] RISC-V: Add riscv_isa reprensenting ISA features common across CPUs

2019-08-06 Thread Anup Patel

On Wed, Aug 7, 2019 at 3:24 AM Paul Walmsley  wrote:
>
> Hi Anup, Atish,
>
> On Fri, 2 Aug 2019, Atish Patra wrote:
>
> > From: Anup Patel 
> >
> > This patch adds riscv_isa integer to represent ISA features common
> > across all CPUs. The riscv_isa is not same as elf_hwcap because
> > elf_hwcap will only have ISA features relevant for user-space apps
> > whereas riscv_isa will have ISA features relevant to both kernel
> > and user-space apps.
> >
> > One of the use case is KVM hypervisor where riscv_isa will be used
> > to do following operations:
> >
> > 1. Check whether hypervisor extension is available
> > 2. Find ISA features that need to be virtualized (e.g. floating
> >point support, vector extension, etc.)
> >
> > Signed-off-by: Anup Patel 
> > Signed-off-by: Atish Patra 
>
> Do you have any opinions on how this patch might change for the Z-prefix
> extensions?  This bitfield approach probably won't scale, and with the
> EXPORT_SYMBOL(), it might be worth trying to put together a approach that
> would work over the long term?

Our plan is to use bitmap instead of bitfield and all Zxyz extensions will be
assigned bit positions "27 + i" where "i" will be based on order in-which they
are defined in RISC-V spec. In general, "i" is just a unique relative index
(starting from 0).

To summarize, the existing bitfield approach can be naturally extended
using bitmap.

We will update this patch accordingly.

Regards,
Anup

[PATCH v1 1/1] i2c: iproc: Add i2c repeated start capability

2019-08-06 Thread Rayagonda Kokatanur

From: Lori Hikichi 

Enable handling of i2c repeated start. The current code
handles a multi msg i2c transfer as separate i2c bus
transactions. This change will now handle this case
using the i2c repeated start protocol. The number of msgs
in a transfer is limited to two, and must be a write
followed by a read.

Signed-off-by: Lori Hikichi 
Signed-off-by: Rayagonda Kokatanur 
Signed-off-by: Icarus Chau 
Signed-off-by: Ray Jui 
Signed-off-by: Shivaraj Shetty 
---
 drivers/i2c/busses/i2c-bcm-iproc.c | 70 +++---
 1 file changed, 57 insertions(+), 13 deletions(-)

diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c 
b/drivers/i2c/busses/i2c-bcm-iproc.c
index d7fd76b..15fedcf 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -81,6 +81,7 @@
 #define M_CMD_PROTOCOL_MASK  0xf
 #define M_CMD_PROTOCOL_BLK_WR0x7
 #define M_CMD_PROTOCOL_BLK_RD0x8
+#define M_CMD_PROTOCOL_PROCESS   0xa
 #define M_CMD_PEC_SHIFT  8
 #define M_CMD_RD_CNT_SHIFT   0
 #define M_CMD_RD_CNT_MASK0xff
@@ -675,13 +676,20 @@ static int bcm_iproc_i2c_xfer_wait(struct 
bcm_iproc_i2c_dev *iproc_i2c,
return 0;
 }
 
-static int bcm_iproc_i2c_xfer_single_msg(struct bcm_iproc_i2c_dev *iproc_i2c,
-struct i2c_msg *msg)
+/*
+ * If 'process_call' is true, then this is a multi-msg transfer that requires
+ * a repeated start between the messages.
+ * More specifically, it must be a write (reg) followed by a read (data).
+ * The i2c quirks are set to enforce this rule.
+ */
+static int bcm_iproc_i2c_xfer_internal(struct bcm_iproc_i2c_dev *iproc_i2c,
+   struct i2c_msg *msgs, bool process_call)
 {
int i;
u8 addr;
u32 val, tmp, val_intr_en;
unsigned int tx_bytes;
+   struct i2c_msg *msg = &msgs[0];
 
/* check if bus is busy */
if (!!(iproc_i2c_rd_reg(iproc_i2c,
@@ -707,14 +715,29 @@ static int bcm_iproc_i2c_xfer_single_msg(struct 
bcm_iproc_i2c_dev *iproc_i2c,
val = msg->buf[i];
 
/* mark the last byte */
-   if (i == msg->len - 1)
-   val |= BIT(M_TX_WR_STATUS_SHIFT);
+   if (!process_call && (i == msg->len - 1))
+   val |= 1 << M_TX_WR_STATUS_SHIFT;
 
iproc_i2c_wr_reg(iproc_i2c, M_TX_OFFSET, val);
}
iproc_i2c->tx_bytes = tx_bytes;
}
 
+   /* Process the read message if this is process call */
+   if (process_call) {
+   msg++;
+   iproc_i2c->msg = msg;  /* point to second msg */
+
+   /*
+* The last byte to be sent out should be a slave
+* address with read operation
+*/
+   addr = msg->addr << 1 | 1;
+   /* mark it the last byte out */
+   val = addr | (1 << M_TX_WR_STATUS_SHIFT);
+   iproc_i2c_wr_reg(iproc_i2c, M_TX_OFFSET, val);
+   }
+
/* mark as incomplete before starting the transaction */
if (iproc_i2c->irq)
reinit_completion(&iproc_i2c->done);
@@ -733,7 +756,7 @@ static int bcm_iproc_i2c_xfer_single_msg(struct 
bcm_iproc_i2c_dev *iproc_i2c,
 * underrun interrupt, which will be triggerred when the TX FIFO is
 * empty. When that happens we can then pump more data into the FIFO
 */
-   if (!(msg->flags & I2C_M_RD) &&
+   if (!process_call && !(msg->flags & I2C_M_RD) &&
msg->len > iproc_i2c->tx_bytes)
val_intr_en |= BIT(IE_M_TX_UNDERRUN_SHIFT);
 
@@ -743,6 +766,8 @@ static int bcm_iproc_i2c_xfer_single_msg(struct 
bcm_iproc_i2c_dev *iproc_i2c,
 */
val = BIT(M_CMD_START_BUSY_SHIFT);
if (msg->flags & I2C_M_RD) {
+   u32 protocol;
+
iproc_i2c->rx_bytes = 0;
if (msg->len > M_RX_FIFO_MAX_THLD_VALUE)
iproc_i2c->thld_bytes = M_RX_FIFO_THLD_VALUE;
@@ -758,7 +783,10 @@ static int bcm_iproc_i2c_xfer_single_msg(struct 
bcm_iproc_i2c_dev *iproc_i2c,
/* enable the RX threshold interrupt */
val_intr_en |= BIT(IE_M_RX_THLD_SHIFT);
 
-   val |= (M_CMD_PROTOCOL_BLK_RD << M_CMD_PROTOCOL_SHIFT) |
+   protocol = process_call ?
+   M_CMD_PROTOCOL_PROCESS : M_CMD_PROTOCOL_BLK_RD;
+
+   val |= (protocol << M_CMD_PROTOCOL_SHIFT) |
   (msg->len << M_CMD_RD_CNT_SHIFT);
} else {
val |= (M_CMD_PROTOCOL_BLK_WR << M_CMD_PROTOCOL_SHIFT);
@@ -774,17 +802,31 @@ static int bcm_iproc_i2c_xfer(struct i2c_adapter *adapter,
  struct i2c_msg msgs[], int num)
 {
struct bcm_iproc_i2c_dev *iproc_i2c = i2c_get_adapdata(adapter);
-   int

[PATCH nvmem v2 1/2] nvmem: imx: add i.MX8QM platform support

2019-08-06 Thread fugang . duan

From: Fugang Duan 

i.MX8QM efuse table has some difference with i.MX8QXP platform,
so add i.MX8QM platform support.

Signed-off-by: Fugang Duan 
---
 drivers/nvmem/imx-ocotp-scu.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/nvmem/imx-ocotp-scu.c b/drivers/nvmem/imx-ocotp-scu.c
index be2f5f0..0d78ab4 100644
--- a/drivers/nvmem/imx-ocotp-scu.c
+++ b/drivers/nvmem/imx-ocotp-scu.c
@@ -16,6 +16,7 @@
 
 enum ocotp_devtype {
IMX8QXP,
+   IMX8QM,
 };
 
 struct ocotp_devtype_data {
@@ -39,6 +40,11 @@ static struct ocotp_devtype_data imx8qxp_data = {
.nregs = 800,
 };
 
+static struct ocotp_devtype_data imx8qm_data = {
+   .devtype = IMX8QM,
+   .nregs = 800,
+};
+
 static int imx_sc_misc_otp_fuse_read(struct imx_sc_ipc *ipc, u32 word,
 u32 *val)
 {
@@ -118,6 +124,7 @@ static struct nvmem_config imx_scu_ocotp_nvmem_config = {
 
 static const struct of_device_id imx_scu_ocotp_dt_ids[] = {
{ .compatible = "fsl,imx8qxp-scu-ocotp", (void *)&imx8qxp_data },
+   { .compatible = "fsl,imx8qm-scu-ocotp", (void *)&imx8qm_data },
{ },
 };
 MODULE_DEVICE_TABLE(of, imx_scu_ocotp_dt_ids);
-- 
2.7.4

[PATCH nvmem v2 2/2] dt-bindings: fsl: scu: add new compatible string for ocotp

2019-08-06 Thread fugang . duan

From: Fugang Duan 

Add new compatible string "fsl,imx8qm-scu-ocotp" into binding
doc  for i.MX8 SCU OCOTP driver.

Signed-off-by: Fugang Duan 
---
 Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt 
b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
index a575e42..c149fad 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
@@ -136,7 +136,9 @@ Required properties:
 OCOTP bindings based on SCU Message Protocol
 
 Required properties:
-- compatible:  Should be "fsl,imx8qxp-scu-ocotp"
+- compatible:  Should be one of:
+   "fsl,imx8qm-scu-ocotp",
+   "fsl,imx8qxp-scu-ocotp".
 - #address-cells:  Must be 1. Contains byte index
 - #size-cells: Must be 1. Contains byte length
 
-- 
2.7.4

RE: [PATCH 1/3] perf: Add capability-related utilities

2019-08-06 Thread Lubashev, Igor

On Wed, July 17 at 2019 7:47 PM  Arnaldo Carvalho de Melo wrote:
> Em Wed, Jul 17, 2019 at 06:05:51PM -0300, Arnaldo Carvalho de Melo
> escreveu:
> > Em Tue, Jul 16, 2019 at 10:46:43AM +0200, Jiri Olsa escreveu:
> > > On Tue, Jul 02, 2019 at 08:10:03PM -0400, Igor Lubashev wrote:
> > > > Add utilities to help checking capabilities of the running process.
> > > > Make perf link with libcap.
> > > >
> > > > Signed-off-by: Igor Lubashev 
> > > > ---
> > > >  tools/perf/Makefile.config |  2 +-
> > > >  tools/perf/util/Build  |  1 +
> > > >  tools/perf/util/cap.c  | 24 
> > > >  tools/perf/util/cap.h  | 10 ++
> > > >  tools/perf/util/event.h|  1 +
> > > >  tools/perf/util/python-ext-sources |  1 +
> > > >  tools/perf/util/util.c |  9 +
> > > >  7 files changed, 47 insertions(+), 1 deletion(-)  create mode
> > > > 100644 tools/perf/util/cap.c  create mode 100644
> > > > tools/perf/util/cap.h
> > > >
> > > > diff --git a/tools/perf/Makefile.config
> > > > b/tools/perf/Makefile.config index 85fbcd265351..21470a50ed39
> > > > 100644
> > > > --- a/tools/perf/Makefile.config
> > > > +++ b/tools/perf/Makefile.config
> > > > @@ -259,7 +259,7 @@ CXXFLAGS += -Wno-strict-aliasing  # adding
> > > > assembler files missing the .GNU-stack linker note.
> > > >  LDFLAGS += -Wl,-z,noexecstack
> > > >
> > > > -EXTLIBS = -lpthread -lrt -lm -ldl
> > > > +EXTLIBS = -lpthread -lrt -lm -ldl -lcap
> > >
> > > I wonder we should detect libcap or it's everywhere.. Arnaldo's
> > > compile test suite might tell
> >
> > I'll add this tentatively and try to build it in my test suite.
> 
> So, not even in my notebook this worked straight away:
> 
>   CC   /tmp/build/perf/util/cap.o
>   CC   /tmp/build/perf/util/config.o
> In file included from util/cap.c:5:
> util/cap.h:6:10: fatal error: sys/capability.h: No such file or directory
> 6 | #include 
>   |  ^~
> compilation terminated.
> mv: cannot stat '/tmp/build/perf/util/.cap.o.tmp': No such file or directory
> 
> 
> I had to first do:
> 
> dnf install libcap-devel
> 
> So we need to have a feature test and fail if that is not installed, i.e. 
> libcap
> becomes a hard req for building perf, which I think is reasonable, one more
> shouldn't hurt, right?
> 
> With all the features enabled:
> 
> [acme@quaco perf]$ ldd ~/bin/perf
>   linux-vdso.so.1 (0x7ffe7278a000)
>   libunwind-x86_64.so.8 => /lib64/libunwind-x86_64.so.8
> (0x7f7be52f1000)
>   libunwind.so.8 => /lib64/libunwind.so.8 (0x7f7be52d7000)
>   liblzma.so.5 => /lib64/liblzma.so.5 (0x7f7be52ae000)
>   libpthread.so.0 => /lib64/libpthread.so.0 (0x7f7be528d000)
>   librt.so.1 => /lib64/librt.so.1 (0x7f7be5283000)
>   libm.so.6 => /lib64/libm.so.6 (0x7f7be513d000)
>   libdl.so.2 => /lib64/libdl.so.2 (0x7f7be5135000)
>   libcap.so.2 => /lib64/libcap.so.2 (0x7f7be512e000)
>   libelf.so.1 => /lib64/libelf.so.1 (0x7f7be5113000)
>   libdw.so.1 => /lib64/libdw.so.1 (0x7f7be50c)
>   libslang.so.2 => /lib64/libslang.so.2 (0x7f7be4de8000)
>   libperl.so.5.28 => /lib64/libperl.so.5.28 (0x7f7be4ac2000)
>   libc.so.6 => /lib64/libc.so.6 (0x7f7be48fa000)
>   libpython2.7.so.1.0 => /lib64/libpython2.7.so.1.0
> (0x7f7be469)
>   libz.so.1 => /lib64/libz.so.1 (0x7f7be4676000)
>   libzstd.so.1 => /lib64/libzstd.so.1 (0x7f7be45d1000)
>   libnuma.so.1 => /lib64/libnuma.so.1 (0x7f7be45c3000)
>   libbabeltrace-ctf.so.1 => /lib64/libbabeltrace-ctf.so.1
> (0x7f7be456d000)
>   libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x7f7be4551000)
>   /lib64/ld-linux-x86-64.so.2 (0x7f7be5331000)
>   libbz2.so.1 => /lib64/libbz2.so.1 (0x7f7be453d000)
>   libcrypt.so.2 => /lib64/libcrypt.so.2 (0x7f7be4502000)
>   libutil.so.1 => /lib64/libutil.so.1 (0x7f7be44fd000)
>   libbabeltrace.so.1 => /lib64/libbabeltrace.so.1
> (0x7f7be44ed000)
>   libpopt.so.0 => /lib64/libpopt.so.0 (0x7f7be44dd000)
>   libuuid.so.1 => /lib64/libuuid.so.1 (0x7f7be44d3000)
>   libgmodule-2.0.so.0 => /lib64/libgmodule-2.0.so.0
> (0x7f7be44cd000)
>   libglib-2.0.so.0 => /lib64/libglib-2.0.so.0 (0x7f7be43a9000)
>   libpcre.so.1 => /lib64/libpcre.so.1 (0x7f7be4335000)
> [acme@quaco perf]$
> 
> ;-)
> 
> So, please check tools/build/feature/ and check how this is done and add a
> test and the warning in tools/perf/Makefile.config so that we get an error
> message stating that libcap-dev or libcap-devel should be installed.

I have just posted v2 of the series 
(https://lkml.kernel.org/lkml/cover.1565146171.git.iluba...@akamai.com).

Instead of making libcap is "hard req", I made it as "soft" one. We can still 
build a useful tool w/o libcap. It will just have to assume that perf is 
running with no capabilities, since we ca

RE: [EXT] Re: [PATCH nvmem 1/1] nvmem: imx: add i.MX8QM platform support

2019-08-06 Thread Andy Duan

From: Srinivas Kandagatla  Sent: Tuesday, 
August 6, 2019 6:04 PM
> On 04/07/2019 15:20, fugang.d...@nxp.com wrote:
> > From: Fugang Duan 
> >
> > i.MX8QM efuse table has some difference with i.MX8QXP platform, so add
> > i.MX8QM platform support.
> >
> > Signed-off-by: Fugang Duan 
> > ---
> >   drivers/nvmem/imx-ocotp-scu.c | 7 +++
> >   1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/nvmem/imx-ocotp-scu.c
> > b/drivers/nvmem/imx-ocotp-scu.c index be2f5f0..0d78ab4 100644
> > --- a/drivers/nvmem/imx-ocotp-scu.c
> > +++ b/drivers/nvmem/imx-ocotp-scu.c
> > @@ -16,6 +16,7 @@
> >
> >   enum ocotp_devtype {
> >   IMX8QXP,
> > + IMX8QM,
> >   };
> >
> >   struct ocotp_devtype_data {
> > @@ -39,6 +40,11 @@ static struct ocotp_devtype_data imx8qxp_data = {
> >   .nregs = 800,
> >   };
> >
> > +static struct ocotp_devtype_data imx8qm_data = {
> > + .devtype = IMX8QM,
> > + .nregs = 800,
> > +};
> > +
> >   static int imx_sc_misc_otp_fuse_read(struct imx_sc_ipc *ipc, u32 word,
> >u32 *val)
> >   {
> > @@ -118,6 +124,7 @@ static struct nvmem_config
> > imx_scu_ocotp_nvmem_config = {
> >
> >   static const struct of_device_id imx_scu_ocotp_dt_ids[] = {
> >   { .compatible = "fsl,imx8qxp-scu-ocotp", (void *)&imx8qxp_data
> > },
> > + { .compatible = "fsl,imx8qm-scu-ocotp", (void *)&imx8qm_data },
> >   { },
> 
> Looks like you forgot to add this new compatible to device tree bindings
> at ./Documentation/devicetree/bindings/nvmem/imx-ocotp.txt or forgot to
> add me to CC.
> 
> Please resend the patch with it, I can not apply this as it is.
> 
> Thanks,
> srini

There have no separated binding documentation for imx-ocotp-scu.c driver.
It is reasonable to add the new compatible string on below binding file 
"fsl,scu.txt":
Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt

> 
> >   };
> >   MODULE_DEVICE_TABLE(of, imx_scu_ocotp_dt_ids);
> >

Re: [PATCH RFC tip/core/rcu 02/14] rcu/nocb: Add bypass callback queueing

2019-08-06 Thread Paul E. McKenney

On Tue, Aug 06, 2019 at 09:24:15PM -0400, Steven Rostedt wrote:
> On Tue, 6 Aug 2019 18:17:07 -0700
> "Paul E. McKenney"  wrote:
> 
> > On Tue, Aug 06, 2019 at 08:40:55PM -0400, Steven Rostedt wrote:
> > > On Tue, 6 Aug 2019 17:35:01 -0700
> > > "Paul E. McKenney"  wrote:
> > >   
> > > > > > +   // Don't use ->nocb_bypass during early boot.
> > > > > 
> > > > > Very minor nit: comment style should be /* */
> > > > 
> > > > I thought that Linus said that "//" was now OK.  Am I confused?  
> > > 
> > > Have a link?  
> > 
> > https://lkml.org/lkml/2016/7/8/625
> 
>   The (c) form is particularly good for things like enum or structure
>   member comments at the end of code, where you might want to align
>   things up, but the ending comment marker ends up being visually pretty
>   distracting (and lining _that_ up is too much make-believe work).
> 
> I think it's still for special occasions, and the above example doesn't
> look like one of them ;-)

It does say "particularly good for", not "only good for.  ;-)

> I basically avoid the '//' comment, as it just adds inconstancy.

It saves me two whacks on the shift key and three whacks on other
keys.  ;-)

Thanx, Paul

Re: Slowness forming TIPC cluster with explicit node addresses

2019-08-06 Thread Chris Packham

Hi Jon,

On Wed, 2019-08-07 at 02:55 +, Jon Maloy wrote:
> 
> > 
> > -Original Message-
> > From: Chris Packham 
> > Sent: 4-Aug-19 19:05
> > To: Jon Maloy ; tipc-
> > discuss...@lists.sourceforge.net
> > Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: Slowness forming TIPC cluster with explicit node
> > addresses
> > 
> > On Sun, 2019-08-04 at 21:53 +, Jon Maloy wrote:
> > > 
> > > 
> > > > 
> > > > 
> > > > -Original Message-
> > > > From: netdev-ow...@vger.kernel.org  > > > g>
> > On
> > > 
> > > > 
> > > > Behalf Of Chris Packham
> > > > Sent: 2-Aug-19 01:11
> > > > To: Jon Maloy ; tipc-
> > > > discuss...@lists.sourceforge.net
> > > > Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: Re: Slowness forming TIPC cluster with explicit node
> > > > addresses
> > > > 
> > > > On Mon, 2019-07-29 at 09:04 +1200, Chris Packham wrote:
> > > > > 
> > > > > 
> > > > > On Fri, 2019-07-26 at 13:31 +, Jon Maloy wrote:
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > -Original Message-
> > > > > > > From: netdev-ow...@vger.kernel.org  > > > ow...@vger.kernel.org>
> > > > > 
> > > > > 
> > > > > > 
> > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > On Behalf Of Chris Packham
> > > > > > > Sent: 25-Jul-19 19:37
> > > > > > > To: tipc-discuss...@lists.sourceforge.net
> > > > > > > Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > > Subject: Slowness forming TIPC cluster with explicit node
> > > > > > > addresses
> > > > > > > 
> > > > > > > Hi,
> > > > > > > 
> > > > > > > I'm having problems forming a TIPC cluster between 2
> > > > > > > nodes.
> > > > > > > 
> > > > > > > This is the basic steps I'm going through on each node.
> > > > > > > 
> > > > > > > modprobe tipc
> > > > > > > ip link set eth2 up
> > > > > > > tipc node set addr 1.1.5 # or 1.1.6 tipc bearer enable
> > > > > > > media
> > > > > > > eth dev eth0
> > > > > > eth2, I assume...
> > > > > > 
> > > > > Yes sorry I keep switching between between Ethernet ports for
> > > > > testing
> > > > > so I hand edited the email.
> > > > > 
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > Then to confirm if the cluster is formed I use tipc link
> > > > > > > list
> > > > > > > 
> > > > > > > [root@node-5 ~]# tipc link list
> > > > > > > broadcast-link: up
> > > > > > > ...
> > > > > > > 
> > > > > > > Looking at tcpdump the two nodes are sending packets
> > > > > > > 
> > > > > > > 22:30:05.782320 TIPC v2.0 1.1.5 > 0.0.0, headerlength 60
> > > > > > > bytes,
> > > > > > > MessageSize
> > > > > > > 76 bytes, Neighbor Detection Protocol internal,
> > > > > > > messageType
> > > > > > > Link
> > > > > > > request
> > > > > > > 22:30:05.863555 TIPC v2.0 1.1.6 > 0.0.0, headerlength 60
> > > > > > > bytes,
> > > > > > > MessageSize
> > > > > > > 76 bytes, Neighbor Detection Protocol internal,
> > > > > > > messageType
> > > > > > > Link
> > > > > > > request
> > > > > > > 
> > > > > > > Eventually (after a few minutes) the link does come up
> > > > > > > 
> > > > > > > [root@node-6 ~]# tipc link list
> > > > > > > broadcast-link: up
> > > > > > > 1001006:eth2-1001005:eth2: up
> > > > > > > 
> > > > > > > [root@node-5 ~]# tipc link list
> > > > > > > broadcast-link: up
> > > > > > > 1001005:eth2-1001006:eth2: up
> > > > > > > 
> > > > > > > When I remove the "tipc node set addr" things seem to
> > > > > > > kick
> > > > > > > into
> > > > > > > life straight away
> > > > > > > 
> > > > > > > [root@node-5 ~]# tipc link list
> > > > > > > broadcast-link: up
> > > > > > > 0050b61bd2aa:eth2-0050b61e6dfa:eth2: up
> > > > > > > 
> > > > > > > So there appears to be some difference in behaviour
> > > > > > > between
> > > > > > > having
> > > > > > > an explicit node address and using the default.
> > > > > > > Unfortunately
> > > > > > > our
> > > > > > > application relies on setting the node addresses.
> > > > > > I do this many times a day, without any problems. If there
> > > > > > would be
> > > > > > any time difference, I would expect the 'auto configurable'
> > > > > > version
> > > > > > to be slower, because it involves a DAD step.
> > > > > > Are you sure you don't have any other nodes running in your
> > > > > > system?
> > > > > > 
> > > > > > ///jon
> > > > > > 
> > > > > Nope the two nodes are connected back to back. Does the
> > > > > number of
> > > > > Ethernet interfaces make a difference? As you can see I've
> > > > > got 3
> > > > > on
> > > > > each node. One is completely disconnected, one is for booting
> > > > > over
> > > > > TFTP
> > > > >  (only used by U-boot) and the other is the USB Ethernet I'm
> > > > > using for
> > > > > testing.
> > > > > 
> > > > So I can still reproduce this on nodes that only have one
> > > > network
> > > > interface and
> > > > are the only things connected.

Re: [PATCH v7 01/20] pinctrl: tegra: Add suspend and resume support

2019-08-06 Thread Sowjanya Komatineni




On 8/6/19 2:51 PM, Sowjanya Komatineni wrote:


On 8/5/19 2:20 AM, Linus Walleij wrote:

On Wed, Jul 31, 2019 at 11:11 PM Sowjanya Komatineni
 wrote:


This patch adds support for Tegra pinctrl driver suspend and resume.

During suspend, context of all pinctrl registers are stored and
on resume they are all restored to have all the pinmux and pad
configuration for normal operation.

Acked-by: Thierry Reding 
Reviewed-by: Dmitry Osipenko 
Signed-off-by: Sowjanya Komatineni 

Patch applied to the pinctrl tree.

This patch seems finished.

Also if the rest don't get merged for v5.4 then at least this is so
your patch stack gets more shallow.

I hope it's fine to merge this separately, else tell me and I'll
pull it out.

Yours,
Linus Walleij


Yes, this patch can be merged separately. But, there's latest feedback 
from Dmitry to add barrier after writes to make sure pinmux register 
writes happen.


So will update this patch to add barrier in v8. So, need to wait for v8.

Thanks

Sowjanya

I see it merged. So will exclude suspend/resume patch and will add patch 
for necessary write barrier fix in v8 version.


Thanks

Sowjanya

[PATCH v2 4/4] perf: Use CAP_SYS_ADMIN instead of euid==0 with ftrace

2019-08-06 Thread Igor Lubashev

Kernel requires CAP_SYS_ADMIN instead of euid==0 to mount debugfs for ftrace.
Make perf do the same.

Signed-off-by: Igor Lubashev 
---
 tools/perf/builtin-ftrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index ae1466aa3b26..d09eac8a6d57 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "debug.h"
 #include 
@@ -21,6 +22,7 @@
 #include "target.h"
 #include "cpumap.h"
 #include "thread_map.h"
+#include "util/cap.h"
 #include "util/config.h"
 
 
@@ -281,7 +283,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int 
argc, const char **argv)
.events = POLLIN,
};
 
-   if (geteuid() != 0) {
+   if (!perf_cap__capable(CAP_SYS_ADMIN)) {
pr_err("ftrace only works for root!\n");
return -1;
}
-- 
2.7.4

[PATCH v2 0/4] perf: Use capabilities instead of uid and euid

2019-08-06 Thread Igor Lubashev

Series v1: 
https://lkml.kernel.org/lkml/1562112605-6235-1-git-send-email-iluba...@akamai.com


Kernel is using capabilities instead of uid and euid to restrict access to
kernel pointers and tracing facilities.  This patch series updates the perf to
better match the security model used by the kernel.

This series enables instructions in Documentation/admin-guide/perf-security.rst
to actually work, even when kernel.perf_event_paranoid=2 and
kernel.kptr_restrict=1.

The series consists of four patches:

  01: perf: Add capability-related utilities
Add utility functions to check capabilities and perf_event_paranoid checks,
if libcap-dev[el] is available. (Otherwise, assume no capabilities.)

  02: perf: Use CAP_SYS_ADMIN with perf_event_paranoid checks
Replace the use of euid==0 with a check for CAP_SYS_ADMIN whenever
perf_event_paranoid level is verified.

  03: perf: Use CAP_SYSLOG with kptr_restrict checks
Replace the use of uid and euid with a check for CAP_SYSLOG when
kptr_restrict is verified (similar to kernel/kallsyms.c and lib/vsprintf.c).
Consult perf_event_paranoid when kptr_restrict==0 (see kernel/kallsyms.c).

  04: perf: Use CAP_SYS_ADMIN instead of euid==0 with ftrace
Replace the use of euid==0 with a check for CAP_SYS_ADMIN before mounting
debugfs for ftrace.

I tested this by following Documentation/admin-guide/perf-security.rst
guidelines and setting sysctls:

   kernel.perf_event_paranoid=2
   kernel.kptr_restrict=1

As an unpriviledged user who is in perf_users group (setup via instructions
above), I executed:
   perf record -a -- sleep 1

Without the patch, perf record did not capture any kernel functions.
With the patch, perf included all kernel funcitons.


Changelog:
v2:  * Added a build feature check for libcap-dev[el] as suggested by Arnaldo


Igor Lubashev (4):
  perf: Add capability-related utilities
  perf: Use CAP_SYS_ADMIN with perf_event_paranoid checks
  perf: Use CAP_SYSLOG with kptr_restrict checks
  perf: Use CAP_SYS_ADMIN instead of euid==0 with ftrace

 tools/build/Makefile.feature |  2 ++
 tools/build/feature/Makefile |  4 
 tools/build/feature/test-libcap.c| 20 
 tools/perf/Makefile.config   | 11 +++
 tools/perf/Makefile.perf |  2 ++
 tools/perf/arch/arm/util/cs-etm.c|  3 ++-
 tools/perf/arch/arm64/util/arm-spe.c |  4 ++--
 tools/perf/arch/x86/util/intel-bts.c |  3 ++-
 tools/perf/arch/x86/util/intel-pt.c  |  2 +-
 tools/perf/builtin-ftrace.c  |  4 +++-
 tools/perf/util/Build|  2 ++
 tools/perf/util/cap.c| 29 +
 tools/perf/util/cap.h| 24 
 tools/perf/util/event.h  |  1 +
 tools/perf/util/evsel.c  |  2 +-
 tools/perf/util/python-ext-sources   |  1 +
 tools/perf/util/symbol.c | 15 +++
 tools/perf/util/util.c   |  9 +
 18 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 tools/build/feature/test-libcap.c
 create mode 100644 tools/perf/util/cap.c
 create mode 100644 tools/perf/util/cap.h

-- 
2.7.4

[PATCH v2 2/4] perf: Use CAP_SYS_ADMIN with perf_event_paranoid checks

2019-08-06 Thread Igor Lubashev

The kernel is using CAP_SYS_ADMIN instead of euid==0 to override
perf_event_paranoid check. Make perf do the same.

Signed-off-by: Igor Lubashev 
---
 tools/perf/arch/arm/util/cs-etm.c| 3 ++-
 tools/perf/arch/arm64/util/arm-spe.c | 4 ++--
 tools/perf/arch/x86/util/intel-bts.c | 3 ++-
 tools/perf/arch/x86/util/intel-pt.c  | 2 +-
 tools/perf/util/evsel.c  | 2 +-
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/perf/arch/arm/util/cs-etm.c 
b/tools/perf/arch/arm/util/cs-etm.c
index 5cb07e8cb296..b87a1ca2968f 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -18,6 +18,7 @@
 #include "../../perf.h"
 #include "../../util/auxtrace.h"
 #include "../../util/cpumap.h"
+#include "../../util/event.h"
 #include "../../util/evlist.h"
 #include "../../util/evsel.h"
 #include "../../util/pmu.h"
@@ -254,7 +255,7 @@ static int cs_etm_recording_options(struct auxtrace_record 
*itr,
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
struct evsel *evsel, *cs_etm_evsel = NULL;
struct perf_cpu_map *cpus = evlist->core.cpus;
-   bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+   bool privileged = perf_event_paranoid_check(-1);
int err = 0;
 
ptr->evlist = evlist;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c 
b/tools/perf/arch/arm64/util/arm-spe.c
index 00915b8fd05b..200bc973371b 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -12,6 +12,7 @@
 #include 
 
 #include "../../util/cpumap.h"
+#include "../../util/event.h"
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
 #include "../../util/session.h"
@@ -65,8 +66,7 @@ static int arm_spe_recording_options(struct auxtrace_record 
*itr,
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
-   struct evsel *evsel, *arm_spe_evsel = NULL;
-   bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+   bool privileged = perf_event_paranoid_check(-1);
struct evsel *tracking_evsel;
int err;
 
diff --git a/tools/perf/arch/x86/util/intel-bts.c 
b/tools/perf/arch/x86/util/intel-bts.c
index 7b23318ebd7b..56a76142e9fd 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -12,6 +12,7 @@
 #include 
 
 #include "../../util/cpumap.h"
+#include "../../util/event.h"
 #include "../../util/evsel.h"
 #include "../../util/evlist.h"
 #include "../../util/session.h"
@@ -107,7 +108,7 @@ static int intel_bts_recording_options(struct 
auxtrace_record *itr,
struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
struct evsel *evsel, *intel_bts_evsel = NULL;
const struct perf_cpu_map *cpus = evlist->core.cpus;
-   bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+   bool privileged = perf_event_paranoid_check(-1);
 
btsr->evlist = evlist;
btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
diff --git a/tools/perf/arch/x86/util/intel-pt.c 
b/tools/perf/arch/x86/util/intel-pt.c
index 218a4e694618..43d5088ee824 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -558,7 +558,7 @@ static int intel_pt_recording_options(struct 
auxtrace_record *itr,
bool have_timing_info, need_immediate = false;
struct evsel *evsel, *intel_pt_evsel = NULL;
const struct perf_cpu_map *cpus = evlist->core.cpus;
-   bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+   bool privileged = perf_event_paranoid_check(-1);
u64 tsc_bit;
int err;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 64bc32ed6dfa..eafc134bf17c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -279,7 +279,7 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr 
*attr, int idx)
 
 static bool perf_event_can_profile_kernel(void)
 {
-   return geteuid() == 0 || perf_event_paranoid() == -1;
+   return perf_event_paranoid_check(-1);
 }
 
 struct evsel *perf_evsel__new_cycles(bool precise)
-- 
2.7.4

[PATCH v2 1/4] perf: Add capability-related utilities

2019-08-06 Thread Igor Lubashev

Add utilities to help checking capabilities of the running procss.
Make perf link with libcap, if it is available. If no libcap-dev[el],
assume no capabilities.

Signed-off-by: Igor Lubashev 
---
 tools/build/Makefile.feature   |  2 ++
 tools/build/feature/Makefile   |  4 
 tools/build/feature/test-libcap.c  | 20 
 tools/perf/Makefile.config | 11 +++
 tools/perf/Makefile.perf   |  2 ++
 tools/perf/util/Build  |  2 ++
 tools/perf/util/cap.c  | 29 +
 tools/perf/util/cap.h  | 24 
 tools/perf/util/event.h|  1 +
 tools/perf/util/python-ext-sources |  1 +
 tools/perf/util/util.c |  9 +
 11 files changed, 105 insertions(+)
 create mode 100644 tools/build/feature/test-libcap.c
 create mode 100644 tools/perf/util/cap.c
 create mode 100644 tools/perf/util/cap.h

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 86b793dffbc4..8a19753cc26a 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -42,6 +42,7 @@ FEATURE_TESTS_BASIC :=  \
 gtk2-infobar\
 libaudit\
 libbfd  \
+libcap  \
 libelf  \
 libelf-getphdrnum   \
 libelf-gelf_getnote \
@@ -110,6 +111,7 @@ FEATURE_DISPLAY ?=  \
  gtk2   \
  libaudit   \
  libbfd \
+ libcap \
  libelf \
  libnuma\
  numa_num_possible_cpus \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0658b8cd0e53..8499385365c0 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -20,6 +20,7 @@ FILES=  \
  test-libbfd-liberty.bin\
  test-libbfd-liberty-z.bin  \
  test-cplus-demangle.bin\
+ test-libcap.bin   \
  test-libelf.bin\
  test-libelf-getphdrnum.bin \
  test-libelf-gelf_getnote.bin   \
@@ -105,6 +106,9 @@ $(OUTPUT)test-fortify-source.bin:
 $(OUTPUT)test-bionic.bin:
$(BUILD)
 
+$(OUTPUT)test-libcap.bin:
+   $(BUILD) -lcap
+
 $(OUTPUT)test-libelf.bin:
$(BUILD) -lelf
 
diff --git a/tools/build/feature/test-libcap.c 
b/tools/build/feature/test-libcap.c
new file mode 100644
index ..d2a2e152195f
--- /dev/null
+++ b/tools/build/feature/test-libcap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+
+int main(void)
+{
+   cap_flag_value_t val;
+   cap_t caps = cap_get_proc();
+
+   if (!caps)
+   return 1;
+
+   if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &val) != 0)
+   return 1;
+
+   if (cap_free(caps) != 0)
+   return 1;
+
+   return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e4988f49ea79..9a06787fedc6 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -824,6 +824,17 @@ ifndef NO_LIBZSTD
   endif
 endif
 
+ifndef NO_LIBCAP
+  ifeq ($(feature-libcap), 1)
+CFLAGS += -DHAVE_LIBCAP_SUPPORT
+EXTLIBS += -lcap
+$(call detected,CONFIG_LIBCAP)
+  else
+msg := $(warning No libcap found, disables capability support, please 
install libcap-devel/libcap-dev);
+NO_LIBCAP := 1
+  endif
+endif
+
 ifndef NO_BACKTRACE
   ifeq ($(feature-backtrace), 1)
 CFLAGS += -DHAVE_BACKTRACE_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 67512a12276b..f9807d8c005b 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -88,6 +88,8 @@ include ../scripts/utilities.mak
 #
 # Define NO_LIBBPF if you do not want BPF support
 #
+# Define NO_LIBCAP if you do not want process capabilities considered by perf
+#
 # Define NO_SDT if you do not want to define SDT event in perf tools,
 # note that it doesn't disable SDT scanning support.
 #
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 7abf05131889..7cda749059a9 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -148,6 +148,8 @@ perf-$(CONFIG_ZLIB) += zlib.o
 perf-$(CONFIG_LZMA) += lzma.o
 perf-$(CONFIG_ZSTD) += zstd.o
 
+perf-$(CONFIG_LIBCAP) += cap.o
+
 perf-y += demangle-java.o
 perf-y += demangle-rust.o
 
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
new file mode 100644
index ..c3ba841bbf37
--- /dev/null
+++ b/tools/perf/util/cap.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Capability utilities
+ */
+
+#ifdef HAVE_LIBCAP_SUPPORT
+
+#include "cap.h"
+#include 
+#include 
+
+bool perf_cap__capable(cap_va

[PATCH v2 3/4] perf: Use CAP_SYSLOG with kptr_restrict checks

2019-08-06 Thread Igor Lubashev

Kernel is using CAP_SYSLOG capability instead of uid==0 and euid==0 when
checking kptr_restrict. Make perf do the same.

Also, the kernel is a more restrictive than "no restrictions" in case of
kptr_restrict==0, so add the same logic to perf.

Signed-off-by: Igor Lubashev 
---
 tools/perf/util/symbol.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 173f3378aaa0..046271103499 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -4,6 +4,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -15,8 +16,10 @@
 #include 
 #include "annotate.h"
 #include "build-id.h"
+#include "cap.h"
 #include "util.h"
 #include "debug.h"
+#include "event.h"
 #include "machine.h"
 #include "map.h"
 #include "symbol.h"
@@ -890,7 +893,11 @@ bool symbol__restricted_filename(const char *filename,
 {
bool restricted = false;
 
-   if (symbol_conf.kptr_restrict) {
+   /* Per kernel/kallsyms.c:
+* we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG
+*/
+   if (symbol_conf.kptr_restrict ||
+   (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG))) {
char *r = realpath(filename, NULL);
 
if (r != NULL) {
@@ -2190,9 +2197,9 @@ static bool symbol__read_kptr_restrict(void)
char line[8];
 
if (fgets(line, sizeof(line), fp) != NULL)
-   value = ((geteuid() != 0) || (getuid() != 0)) ?
-   (atoi(line) != 0) :
-   (atoi(line) == 2);
+   value = perf_cap__capable(CAP_SYSLOG) ?
+   (atoi(line) >= 2) :
+   (atoi(line) != 0);
 
fclose(fp);
}
-- 
2.7.4

[PATCH] RDMA/hns: remove obsolete Kconfig comment

2019-08-06 Thread YueHaibing

Since commit a07fc0bb483e ("RDMA/hns: Fix build error")
these kconfig comment is obsolete, so just remove it.

Signed-off-by: YueHaibing 
---
 drivers/infiniband/hw/hns/Kconfig | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/infiniband/hw/hns/Kconfig 
b/drivers/infiniband/hw/hns/Kconfig
index 5478219..d602b69 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -8,8 +8,6 @@ config INFINIBAND_HNS
  is used in Hisilicon Hip06 and more further ICT SoC based on
  platform device.
 
- To compile HIP06 or HIP08 driver as module, choose M here.
-
 config INFINIBAND_HNS_HIP06
tristate "Hisilicon Hip06 Family RoCE support"
depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
@@ -17,15 +15,9 @@ config INFINIBAND_HNS_HIP06
  RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
  Hip07 SoC. These RoCE engines are platform devices.
 
- To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
- module will be called hns-roce-hw-v1
-
 config INFINIBAND_HNS_HIP08
tristate "Hisilicon Hip08 Family RoCE support"
depends on INFINIBAND_HNS && PCI && HNS3
---help---
  RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
  The RoCE engine is a PCI device.
-
- To compile this driver, choose Y here: if INFINIBAND_HNS is m, this
- module will be called hns-roce-hw-v2.
-- 
2.7.4

[PATCH] ALSA: usb-midi: fix a memory leak bug

2019-08-06 Thread Wenwen Wang

In __snd_usbmidi_create(), a MIDI streaming interface structure is
allocated through kzalloc() and the pointer is saved to 'umidi'. Later on,
the endpoint structures are created by invoking
snd_usbmidi_create_endpoints_midiman() or snd_usbmidi_create_endpoints(),
depending on the type of the audio quirk type. However, if the creation
fails, the allocated 'umidi' is not deallocated, leading to a memory leak
bug.

To fix the above issue, free 'umidi' before returning the error.

Signed-off-by: Wenwen Wang 
---
 sound/usb/midi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index b737f0e..22db37f 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -2476,7 +2476,7 @@ int __snd_usbmidi_create(struct snd_card *card,
else
err = snd_usbmidi_create_endpoints(umidi, endpoints);
if (err < 0)
-   goto exit;
+   goto free_midi;

usb_autopm_get_interface_no_resume(umidi->iface);

-- 
2.7.4

Re: [PATCH v4 09/10] powerpc/fsl_booke/kaslr: support nokaslr cmdline parameter

2019-08-06 Thread Jason Yan





On 2019/8/6 15:59, Christophe Leroy wrote:



Le 05/08/2019 à 08:43, Jason Yan a écrit :

One may want to disable kaslr when boot, so provide a cmdline parameter
'nokaslr' to support this.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 


Reviewed-by: Christophe Leroy 

Tiny comment below.


---
  arch/powerpc/kernel/kaslr_booke.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c

index 4b3f19a663fc..7c3cb41e7122 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -361,6 +361,18 @@ static unsigned long __init 
kaslr_choose_location(void *dt_ptr, phys_addr_t size

  return kaslr_offset;
  }
+static inline __init bool kaslr_disabled(void)
+{
+    char *str;
+
+    str = strstr(boot_command_line, "nokaslr");
+    if ((str == boot_command_line) ||
+    (str > boot_command_line && *(str - 1) == ' '))
+    return true;


I don't think additional () are needed for the left part 'str == 
boot_command_line'




Agree.


+
+    return false;
+}
+
  /*
   * To see if we need to relocate the kernel to a random offset
   * void *dt_ptr - address of the device tree
@@ -376,6 +388,8 @@ notrace void __init kaslr_early_init(void *dt_ptr, 
phys_addr_t size)

  kernel_sz = (unsigned long)_end - KERNELBASE;
  kaslr_get_cmdline(dt_ptr);
+    if (kaslr_disabled())
+    return;
  offset = kaslr_choose_location(dt_ptr, size, kernel_sz);



.

Re: [PATCH] arch/microblaze: add support for get_user() of size 8 bytes

2019-08-06 Thread Leon Romanovsky

On Tue, Aug 06, 2019 at 03:36:37PM -0700, Randy Dunlap wrote:
> From: Randy Dunlap 
>
> arch/microblaze/ is missing support for get_user() of size 8 bytes,
> so add it by using __copy_from_user().
>
> Fixes these build errors:
>drivers/infiniband/core/uverbs_main.o: In function `ib_uverbs_write':
>drivers/infiniband/core/.tmp_gl_uverbs_main.o:(.text+0x13a4): undefined 
> reference to `__user_bad'
>drivers/android/binder.o: In function `binder_thread_write':
>drivers/android/.tmp_gl_binder.o:(.text+0xda6c): undefined reference to 
> `__user_bad'
>drivers/android/.tmp_gl_binder.o:(.text+0xda98): undefined reference to 
> `__user_bad'
>drivers/android/.tmp_gl_binder.o:(.text+0xdf10): undefined reference to 
> `__user_bad'
>drivers/android/.tmp_gl_binder.o:(.text+0xe498): undefined reference to 
> `__user_bad'
>drivers/android/binder.o:drivers/android/.tmp_gl_binder.o:(.text+0xea78): 
> more undefined references to `__user_bad' follow
>
> 'make allmodconfig' now builds successfully for arch/microblaze/.
>
> Fixes: 538722ca3b76 ("microblaze: fix get_user/put_user side-effects")
> Reported-by: kbuild test robot 
> Signed-off-by: Randy Dunlap 
> Cc: Al Viro 
> Cc: Steven J. Magnani 
> Cc: Michal Simek 
> Cc: Jason Gunthorpe 
> Cc: Leon Romanovsky 
> Cc: Andrew Morton 
> ---
>  arch/microblaze/include/asm/uaccess.h |6 ++
>  1 file changed, 6 insertions(+)
>

Thanks, it works for us.
Reviewed-by: Leon Romanovsky

Re: [PATCH net v2] net: dsa: Check existence of .port_mdb_add callback before calling it

2019-08-06 Thread Chen-Yu Tsai

On Wed, Aug 7, 2019 at 4:34 AM Vivien Didelot  wrote:
>
> Hi Chen-Yu,
>
> On Wed, 7 Aug 2019 01:49:37 +0800, Chen-Yu Tsai  wrote:
> > On Wed, Aug 7, 2019 at 1:15 AM Vivien Didelot  
> > wrote:
> > >
> > > Hi Chen-Yu,
> > >
> > > On Tue,  6 Aug 2019 15:53:25 +0800, Chen-Yu Tsai  wrote:
> > > > From: Chen-Yu Tsai 
> > > >
> > > > With the recent addition of commit 75dad2520fc3 ("net: dsa: b53: Disable
> > > > all ports on setup"), users of b53 (BCM53125 on Lamobo R1 in my case)
> > > > are forced to use the dsa subsystem to enable the switch, instead of
> > > > having it in the default transparent "forward-to-all" mode.
> > > >
> > > > The b53 driver does not support mdb bitmap functions. However the dsa
> > > > layer does not check for the existence of the .port_mdb_add callback
> > > > before actually using it. This results in a NULL pointer dereference,
> > > > as shown in the kernel oops below.
> > > >
> > > > The other functions seem to be properly guarded. Do the same for
> > > > .port_mdb_add in dsa_switch_mdb_add_bitmap() as well.
> > > >
> > > > b53 is not the only driver that doesn't support mdb bitmap functions.
> > > > Others include bcm_sf2, dsa_loop, lantiq_gswip, mt7530, mv88e6060,
> > > > qca8k, realtek-smi, and vitesse-vsc73xx.
> > >
> > > I don't know what you mean by that, there's no "mdb bitmap function"
> > > support for drivers, only the port_mdb_{prepare,add,del} callbacks...
> >
> > The term was coined from commit e6db98db8a95 ("net: dsa: add switch mdb
> > bitmap functions"). But yeah, .port_mdb_* ops/callbacks would be more
> > appropriate.
> >
> > > > 8<--- cut here ---
> > > > Unable to handle kernel NULL pointer dereference at virtual address 
> > > > 
> > > > pgd = (ptrval)
> > > > [] *pgd=
> > > > Internal error: Oops: 8005 [#1] SMP ARM
> > > > Modules linked in: rtl8xxxu rtl8192cu rtl_usb rtl8192c_common 
> > > > rtlwifi mac80211 cfg80211
> > > > CPU: 1 PID: 134 Comm: kworker/1:2 Not tainted 
> > > > 5.3.0-rc1-00247-gd3519030752a #1
> > > > Hardware name: Allwinner sun7i (A20) Family
> > > > Workqueue: events switchdev_deferred_process_work
> > > > PC is at 0x0
> > > > LR is at dsa_switch_event+0x570/0x620
> > > > pc : [<>]lr : []psr: 80070013
> > > > sp : ee871db8  ip :   fp : ee98d0a4
> > > > r10: 000c  r9 : 0008  r8 : ee89f710
> > > > r7 : ee98d040  r6 : ee98d088  r5 : c0f04c48  r4 : ee98d04c
> > > > r3 :   r2 : ee89f710  r1 : 0008  r0 : ee98d040
> > > > Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > > > Control: 10c5387d  Table: 6deb406a  DAC: 0051
> > > > Process kworker/1:2 (pid: 134, stack limit = 0x(ptrval))
> > > > Stack: (0xee871db8 to 0xee872000)
> > > > 1da0:   
> > > > ee871e14 103ace2d
> > > > 1dc0:    ee871e14 0005  
> > > > c08524a0 
> > > > 1de0: e000 c014bdfc c0f04c48 ee871e98 c0f04c48 ee9e5000 
> > > > c0851120 c014bef0
> > > > 1e00:  b643aea2 ee9b4068 c08509a8 ee2bf940 ee89f710 
> > > > ee871ecb 
> > > > 1e20: 0008 103ace2d  c087e248 ee29c868 103ace2d 
> > > > 0001 
> > > > 1e40:  ee871e98 0006  c0fb2a50 c087e2d0 
> > > >  c08523c4
> > > > 1e60:  c014bdfc 0006 c0fad2d0 ee871e98 ee89f710 
> > > >  c014c500
> > > > 1e80:  ee89f3c0 c0f04c48  ee9e5000 c087dfb4 
> > > > ee9e5000 
> > > > 1ea0: ee89f710 ee871ecb 0001 103ace2d  c0f04c48 
> > > >  c087e0a8
> > > > 1ec0:  efd9a3e0 0089f3c0 103ace2d ee89f700 ee89f710 
> > > > ee9e5000 0122
> > > > 1ee0: 0100 c087e130 ee89f700 c0fad2c8 c1003ef0 c087de4c 
> > > > 2e928000 c0fad2ec
> > > > 1f00: c0fad2ec ee839580 ef7a62c0 ef7a9400  c087def8 
> > > > c0fad2ec c01447dc
> > > > 1f20: ef315640 ef7a62c0 0008 ee839580 ee839594 ef7a62c0 
> > > > 0008 c0f03d00
> > > > 1f40: ef7a62d8 ef7a62c0 e000 c0145b84 e000 c0fb2420 
> > > > c0bfaa8c 
> > > > 1f60: e000 ee84b600 ee84b5c0  ee87 ee839580 
> > > > c0145b40 ef0e5ea4
> > > > 1f80: ee84b61c c014a6f8 0001 ee84b5c0 c014a5b0  
> > > >  
> > > > 1fa0:    c01010e8   
> > > >  
> > > > 1fc0:       
> > > >  
> > > > 1fe0:     0013  
> > > >  
> > > > [] (dsa_switch_event) from [] 
> > > > (notifier_call_chain+0x48/0x84)
> > > > [] (notifier_call_chain) from [] 
> > > > (raw_notifier_call_chain+0x18/0x20)
> > > > [] (raw_notifier_call_chain) from [] 
> > > > (dsa_port_mdb_add+0x48/0x74)
> > > > [] (dsa_port_mdb_add) from [] 
>

WARNING in cgroup_rstat_updated

2019-08-06 Thread syzbot


Hello,

syzbot found the following crash on:

HEAD commit:31cc088a Merge tag 'drm-next-2019-07-19' of git://anongit...
git tree:   net-next
console output: https://syzkaller.appspot.com/x/log.txt?x=102db48c60
kernel config:  https://syzkaller.appspot.com/x/.config?x=4dba67bf8b8c9ad7
dashboard link: https://syzkaller.appspot.com/bug?extid=370e4739fa489334a4ef
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=16dd57dc60

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+370e4739fa489334a...@syzkaller.appspotmail.com

8021q: adding VLAN 0 to HW filter on device batadv0
WARNING: CPU: 1 PID: 9095 at mm/page_counter.c:62 page_counter_cancel  
mm/page_counter.c:62 [inline]
WARNING: CPU: 1 PID: 9095 at mm/page_counter.c:62  
page_counter_cancel+0x5a/0x70 mm/page_counter.c:55

Kernel panic - not syncing: panic_on_warn set ...
Shutting down cpus with NMI
Kernel Offset: disabled

==
WARNING: possible circular locking dependency detected
5.2.0+ #67 Not tainted
--
syz-executor.2/9306 is trying to acquire lock:
e4252251 ((console_sem).lock){-.-.}, at: down_trylock+0x13/0x70  
kernel/locking/semaphore.c:135


but task is already holding lock:
0fdb8781 (per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu)){-...}, at:  
cgroup_rstat_updated+0x115/0x2f0 kernel/cgroup/rstat.c:49


which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #3 (per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu)){-...}:
   __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
   _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:159
   cgroup_rstat_updated+0x115/0x2f0 kernel/cgroup/rstat.c:49
   cgroup_base_stat_cputime_account_end.isra.0+0x1d/0x60  
kernel/cgroup/rstat.c:361

   __cgroup_account_cputime+0x9e/0xd0 kernel/cgroup/rstat.c:371
   cgroup_account_cputime include/linux/cgroup.h:782 [inline]
   update_curr+0x3c8/0x8d0 kernel/sched/fair.c:862
   dequeue_entity+0x1e/0x1100 kernel/sched/fair.c:4014
   dequeue_task_fair+0x65/0x870 kernel/sched/fair.c:5306
   dequeue_task+0x77/0x2e0 kernel/sched/core.c:1195
   sched_move_task+0x1fb/0x350 kernel/sched/core.c:6847
   cpu_cgroup_attach+0x6d/0xb0 kernel/sched/core.c:6970
   cgroup_migrate_execute+0xc56/0x1350 kernel/cgroup/cgroup.c:2524
   cgroup_migrate+0x14f/0x1f0 kernel/cgroup/cgroup.c:2780
   cgroup_attach_task+0x57f/0x860 kernel/cgroup/cgroup.c:2817
   cgroup_procs_write+0x340/0x400 kernel/cgroup/cgroup.c:4777
   cgroup_file_write+0x241/0x790 kernel/cgroup/cgroup.c:3754
   kernfs_fop_write+0x2b8/0x480 fs/kernfs/file.c:315
   __vfs_write+0x8a/0x110 fs/read_write.c:494
   vfs_write+0x268/0x5d0 fs/read_write.c:558
   ksys_write+0x14f/0x290 fs/read_write.c:611
   __do_sys_write fs/read_write.c:623 [inline]
   __se_sys_write fs/read_write.c:620 [inline]
   __x64_sys_write+0x73/0xb0 fs/read_write.c:620
   do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> #2 (&rq->lock){-.-.}:
   __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
   _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:151
   rq_lock kernel/sched/sched.h:1207 [inline]
   task_fork_fair+0x6a/0x520 kernel/sched/fair.c:9940
   sched_fork+0x3af/0x900 kernel/sched/core.c:2783
   copy_process+0x1b04/0x6b00 kernel/fork.c:1987
   _do_fork+0x146/0xfa0 kernel/fork.c:2369
   kernel_thread+0xbb/0xf0 kernel/fork.c:2456
   rest_init+0x28/0x37b init/main.c:417
   arch_call_rest_init+0xe/0x1b
   start_kernel+0x912/0x951 init/main.c:785
   x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:472
   x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:453
   secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243

-> #1 (&p->pi_lock){-.-.}:
   __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
   _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:159
   try_to_wake_up+0xb0/0x1aa0 kernel/sched/core.c:2432
   wake_up_process+0x10/0x20 kernel/sched/core.c:2548
   __up.isra.0+0x136/0x1a0 kernel/locking/semaphore.c:261
   up+0x9c/0xe0 kernel/locking/semaphore.c:186
   __up_console_sem+0xb7/0x1c0 kernel/printk/printk.c:244
   console_unlock+0x695/0xf10 kernel/printk/printk.c:2481
   vprintk_emit+0x2a0/0x700 kernel/printk/printk.c:1986
   vprintk_default+0x28/0x30 kernel/printk/printk.c:2013
   vprintk_func+0x7e/0x189 kernel/printk/printk_safe.c:386
   printk+0xba/0xed kernel/printk/printk.c:2046
   check_stack_usage kernel/exit.c:765 [inline]
   do_exit.cold+0x18b/0x314 kernel/exit.c:927
   do_group_exit+0x135/0x360 kernel/exit.c:981
   __do_sys_

Re: [PATCH v3] mlx5: Use refcount_t for refcount

2019-08-06 Thread Leon Romanovsky

On Tue, Aug 06, 2019 at 08:40:11PM +, Saeed Mahameed wrote:
> On Tue, 2019-08-06 at 09:59 +0800, Chuhong Yuan wrote:
> > Reference counters are preferred to use refcount_t instead of
> > atomic_t.
> > This is because the implementation of refcount_t can prevent
> > overflows and detect possible use-after-free.
> > So convert atomic_t ref counters to refcount_t.
> >
> > Signed-off-by: Chuhong Yuan 
> > ---
> > Changes in v3:
> >   - Merge v2 patches together.
> >
> >  drivers/infiniband/hw/mlx5/srq_cmd.c | 6 +++---
> >  drivers/net/ethernet/mellanox/mlx5/core/qp.c | 6 +++---
> >  include/linux/mlx5/driver.h  | 3 ++-
> >  3 files changed, 8 insertions(+), 7 deletions(-)
> >
>
> LGTM, Leon, let me know if you are happy with this version,
> this should go to mlx5-next.

Thanks,
Acked-by: Leon Romanovsky

Re: [PATCH v4 07/10] powerpc/fsl_booke/32: randomize the kernel image offset

2019-08-06 Thread Jason Yan





On 2019/8/6 15:56, Christophe Leroy wrote:



Le 05/08/2019 à 08:43, Jason Yan a écrit :

After we have the basic support of relocate the kernel in some
appropriate place, we can start to randomize the offset now.

Entropy is derived from the banner and timer, which will change every
build and boot. This not so much safe so additionally the bootloader may
pass entropy via the /chosen/kaslr-seed node in device tree.

We will use the first 512M of the low memory to randomize the kernel
image. The memory will be split in 64M zones. We will use the lower 8
bit of the entropy to decide the index of the 64M zone. Then we chose a
16K aligned offset inside the 64M zone to put the kernel in.

 KERNELBASE

 |-->   64M   <--|
 |   |
 +---+    ++---+
 |   ||    |kernel|    |   |
 +---+    ++---+
 | |
 |->   offset    <-|

   kimage_vaddr

We also check if we will overlap with some areas like the dtb area, the
initrd area or the crashkernel area. If we cannot find a proper area,
kaslr will be disabled and boot from the original kernel.

Signed-off-by: Jason Yan 
Cc: Diana Craciun 
Cc: Michael Ellerman 
Cc: Christophe Leroy 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Nicholas Piggin 
Cc: Kees Cook 
Reviewed-by: Diana Craciun 
Tested-by: Diana Craciun 


Reviewed-by: Christophe Leroy 



Thanks for your help,


One small comment below


---
  arch/powerpc/kernel/kaslr_booke.c | 322 +-
  1 file changed, 320 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/kaslr_booke.c 
b/arch/powerpc/kernel/kaslr_booke.c

index 30f84c0321b2..97250cad71de 100644
--- a/arch/powerpc/kernel/kaslr_booke.c
+++ b/arch/powerpc/kernel/kaslr_booke.c
@@ -23,6 +23,8 @@
  #include 
  #include 
  #include 
+#include 
+#include 
  #include 
  #include 
  #include 
@@ -34,15 +36,329 @@
  #include 
  #include 
  #include 
+#include 
  #include 
+#include 
+#include 
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_ERR fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+struct regions {
+    unsigned long pa_start;
+    unsigned long pa_end;
+    unsigned long kernel_size;
+    unsigned long dtb_start;
+    unsigned long dtb_end;
+    unsigned long initrd_start;
+    unsigned long initrd_end;
+    unsigned long crash_start;
+    unsigned long crash_end;
+    int reserved_mem;
+    int reserved_mem_addr_cells;
+    int reserved_mem_size_cells;
+};
  extern int is_second_reloc;
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+    LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+    int node = fdt_path_offset(fdt, "/chosen");
+
+    early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void 
*area,

+   size_t size)
+{
+    size_t i;
+    unsigned long *ptr = (unsigned long *)area;


As area is a void *, this cast shouldn't be necessary. Or maybe it is 
necessary because it discards the const ?




It's true the cast is not necessary. The ptr can be made const and then 
remove the cast.



Christophe

[PATCH] team: Add vlan tx offload to hw_enc_features

2019-08-06 Thread YueHaibing

We should also enable bonding's vlan tx offload in hw_enc_features,
pass the vlan packets to the slave devices with vlan tci, let them
to handle vlan tunneling offload implementation.

Fixes: 3268e5cb494d ("team: Advertise tunneling offload features")
Signed-off-by: YueHaibing 
---
 drivers/net/team/team.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index abfa0da..e8089de 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1004,6 +1004,8 @@ static void __team_compute_features(struct team *team)
 
team->dev->vlan_features = vlan_features;
team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+NETIF_F_HW_VLAN_CTAG_TX |
+NETIF_F_HW_VLAN_STAG_TX |
 NETIF_F_GSO_UDP_L4;
team->dev->hard_header_len = max_hard_header_len;
 
-- 
2.7.4

Re: [PATCH 1/2] KEYS: Replace uid/gid/perm permissions checking with an ACL

2019-08-06 Thread Eric Biggers

On Tue, Jul 30, 2019 at 06:16:14PM -0700, Eric Biggers wrote:
> On Mon, Jul 29, 2019 at 08:49:56PM -0700, Eric Biggers wrote:
> > Hi David,
> > 
> > On Tue, Jul 09, 2019 at 06:16:01PM -0700, Eric Biggers wrote:
> > > On Thu, May 23, 2019 at 04:58:27PM +0100, David Howells wrote:
> > > > Replace the uid/gid/perm permissions checking on a key with an ACL to 
> > > > allow
> > > > the SETATTR and SEARCH permissions to be split.  This will also allow a
> > > > greater range of subjects to represented.
> > > > 
> > > 
> > > This patch broke 'keyctl new_session', and hence broke all the fscrypt 
> > > tests:
> > > 
> > > $ keyctl new_session
> > > keyctl_session_to_parent: Permission denied
> > > 
> > > Output of 'keyctl show' is
> > > 
> > > $ keyctl show
> > > Session Keyring
> > >  605894913 --alswrv  0 0  keyring: _ses
> > >  189223103 s-rv  0 0   \_ user: invocation_id
> > > 
> > > - Eric
> > 
> > This bug is still present in next-20190729.
> > 
> > - Eric
> 
> This fixes it:
> 
> diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
> index aa3bfcadbc660..519c94f1cc3c2 100644
> --- a/security/keys/process_keys.c
> +++ b/security/keys/process_keys.c
> @@ -58,7 +58,7 @@ static struct key_acl session_keyring_acl = {
>   .possessor_viewable = true,
>   .nr_ace = 2,
>   .aces = {
> - KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
> + KEY_POSSESSOR_ACE(KEY_ACE__PERMS),
>   KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
>   }
>  };
> 
> 
> The old permissions were KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, so
> I'm not sure why JOIN permission was removed?
> 
> - Eric

Ping.  This is still broken in linux-next.

- Eric

RE: Slowness forming TIPC cluster with explicit node addresses

2019-08-06 Thread Jon Maloy



> -Original Message-
> From: Chris Packham 
> Sent: 4-Aug-19 19:05
> To: Jon Maloy ; tipc-
> discuss...@lists.sourceforge.net
> Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: Slowness forming TIPC cluster with explicit node addresses
> 
> On Sun, 2019-08-04 at 21:53 +, Jon Maloy wrote:
> >
> > >
> > > -Original Message-
> > > From: netdev-ow...@vger.kernel.org 
> On
> > > Behalf Of Chris Packham
> > > Sent: 2-Aug-19 01:11
> > > To: Jon Maloy ; tipc-
> > > discuss...@lists.sourceforge.net
> > > Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: Re: Slowness forming TIPC cluster with explicit node
> > > addresses
> > >
> > > On Mon, 2019-07-29 at 09:04 +1200, Chris Packham wrote:
> > > >
> > > > On Fri, 2019-07-26 at 13:31 +, Jon Maloy wrote:
> > > > >
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > -Original Message-
> > > > > > From: netdev-ow...@vger.kernel.org  > > ow...@vger.kernel.org>
> > > >
> > > > >
> > > > > >
> > > > > > On Behalf Of Chris Packham
> > > > > > Sent: 25-Jul-19 19:37
> > > > > > To: tipc-discuss...@lists.sourceforge.net
> > > > > > Cc: net...@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > Subject: Slowness forming TIPC cluster with explicit node
> > > > > > addresses
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > I'm having problems forming a TIPC cluster between 2 nodes.
> > > > > >
> > > > > > This is the basic steps I'm going through on each node.
> > > > > >
> > > > > > modprobe tipc
> > > > > > ip link set eth2 up
> > > > > > tipc node set addr 1.1.5 # or 1.1.6 tipc bearer enable media
> > > > > > eth dev eth0
> > > > > eth2, I assume...
> > > > >
> > > > Yes sorry I keep switching between between Ethernet ports for
> > > > testing
> > > > so I hand edited the email.
> > > >
> > > > >
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > Then to confirm if the cluster is formed I use tipc link list
> > > > > >
> > > > > > [root@node-5 ~]# tipc link list
> > > > > > broadcast-link: up
> > > > > > ...
> > > > > >
> > > > > > Looking at tcpdump the two nodes are sending packets
> > > > > >
> > > > > > 22:30:05.782320 TIPC v2.0 1.1.5 > 0.0.0, headerlength 60
> > > > > > bytes,
> > > > > > MessageSize
> > > > > > 76 bytes, Neighbor Detection Protocol internal, messageType
> > > > > > Link
> > > > > > request
> > > > > > 22:30:05.863555 TIPC v2.0 1.1.6 > 0.0.0, headerlength 60
> > > > > > bytes,
> > > > > > MessageSize
> > > > > > 76 bytes, Neighbor Detection Protocol internal, messageType
> > > > > > Link
> > > > > > request
> > > > > >
> > > > > > Eventually (after a few minutes) the link does come up
> > > > > >
> > > > > > [root@node-6 ~]# tipc link list
> > > > > > broadcast-link: up
> > > > > > 1001006:eth2-1001005:eth2: up
> > > > > >
> > > > > > [root@node-5 ~]# tipc link list
> > > > > > broadcast-link: up
> > > > > > 1001005:eth2-1001006:eth2: up
> > > > > >
> > > > > > When I remove the "tipc node set addr" things seem to kick
> > > > > > into
> > > > > > life straight away
> > > > > >
> > > > > > [root@node-5 ~]# tipc link list
> > > > > > broadcast-link: up
> > > > > > 0050b61bd2aa:eth2-0050b61e6dfa:eth2: up
> > > > > >
> > > > > > So there appears to be some difference in behaviour between
> > > > > > having
> > > > > > an explicit node address and using the default. Unfortunately
> > > > > > our
> > > > > > application relies on setting the node addresses.
> > > > > I do this many times a day, without any problems. If there
> > > > > would be
> > > > > any time difference, I would expect the 'auto configurable'
> > > > > version
> > > > > to be slower, because it involves a DAD step.
> > > > > Are you sure you don't have any other nodes running in your
> > > > > system?
> > > > >
> > > > > ///jon
> > > > >
> > > > Nope the two nodes are connected back to back. Does the number of
> > > > Ethernet interfaces make a difference? As you can see I've got 3
> > > > on
> > > > each node. One is completely disconnected, one is for booting
> > > > over
> > > > TFTP
> > > >  (only used by U-boot) and the other is the USB Ethernet I'm
> > > > using for
> > > > testing.
> > > >
> > > So I can still reproduce this on nodes that only have one network
> > > interface and
> > > are the only things connected.
> > >
> > > I did find one thing that helps
> > >
> > > diff --git a/net/tipc/discover.c b/net/tipc/discover.c index
> > > c138d68e8a69..49921dad404a 100644
> > > --- a/net/tipc/discover.c
> > > +++ b/net/tipc/discover.c
> > > @@ -358,10 +358,10 @@ int tipc_disc_create(struct net *net, struct
> > > tipc_bearer *b,
> > > tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
> > >
> > > /* Do we need an address trial period first ? */
> > > -   if (!tipc_own_addr(net)) {
> > > +// if (!tipc_own_addr(net)) {
> > > tn->addr_trial_end = jiffies +
> > > msecs_to_jiffies(1000);
> > > msg_set_type(buf_msg(d->s

[PATCH] xilinx_sdfec: Convert to IDA

2019-08-06 Thread Matthew Wilcox

From: "Matthew Wilcox (Oracle)" 

This driver does not use the lookup abilities of the IDR, so convert it
to the more space-efficient IDA.

Signed-off-by: Matthew Wilcox (Oracle) 
---
 drivers/misc/xilinx_sdfec.c | 25 ++---
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index f257d3812110..071b26a8c6a9 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -22,8 +22,7 @@
 
 #define DEV_NAME_LEN 12
 
-static struct idr dev_idr;
-static struct mutex dev_idr_lock;
+static DEFINE_IDA(dev_nrs);
 
 /**
  * struct xsdfec_clks - For managing SD-FEC clocks
@@ -227,13 +226,6 @@ static void xsdfec_disable_all_clks(struct xsdfec_clks 
*clks)
clk_disable_unprepare(clks->axi_clk);
 }
 
-static void xsdfec_idr_remove(struct xsdfec_dev *xsdfec)
-{
-   mutex_lock(&dev_idr_lock);
-   idr_remove(&dev_idr, xsdfec->dev_id);
-   mutex_unlock(&dev_idr_lock);
-}
-
 static int xsdfec_probe(struct platform_device *pdev)
 {
struct xsdfec_dev *xsdfec;
@@ -263,9 +255,7 @@ static int xsdfec_probe(struct platform_device *pdev)
/* Save driver private data */
platform_set_drvdata(pdev, xsdfec);
 
-   mutex_lock(&dev_idr_lock);
-   err = idr_alloc(&dev_idr, xsdfec->dev_name, 0, 0, GFP_KERNEL);
-   mutex_unlock(&dev_idr_lock);
+   err = ida_alloc(&dev_nrs, GFP_KERNEL);
if (err < 0)
goto err_xsdfec_dev;
xsdfec->dev_id = err;
@@ -278,12 +268,12 @@ static int xsdfec_probe(struct platform_device *pdev)
err = misc_register(&xsdfec->miscdev);
if (err) {
dev_err(dev, "error:%d. Unable to register device", err);
-   goto err_xsdfec_idr;
+   goto err_xsdfec_ida;
}
return 0;
 
-err_xsdfec_idr:
-   xsdfec_idr_remove(xsdfec);
+err_xsdfec_ida:
+   ida_free(&dev_nrs, xsdfec->dev_id);
 err_xsdfec_dev:
xsdfec_disable_all_clks(&xsdfec->clks);
return err;
@@ -295,7 +285,7 @@ static int xsdfec_remove(struct platform_device *pdev)
 
xsdfec = platform_get_drvdata(pdev);
misc_deregister(&xsdfec->miscdev);
-   xsdfec_idr_remove(xsdfec);
+   ida_free(&dev_nrs, xsdfec->dev_id);
xsdfec_disable_all_clks(&xsdfec->clks);
return 0;
 }
@@ -321,8 +311,6 @@ static int __init xsdfec_init(void)
 {
int err;
 
-   mutex_init(&dev_idr_lock);
-   idr_init(&dev_idr);
err = platform_driver_register(&xsdfec_driver);
if (err < 0) {
pr_err("%s Unabled to register SDFEC driver", __func__);
@@ -334,7 +322,6 @@ static int __init xsdfec_init(void)
 static void __exit xsdfec_exit(void)
 {
platform_driver_unregister(&xsdfec_driver);
-   idr_destroy(&dev_idr);
 }
 
 module_init(xsdfec_init);
-- 
2.20.1

[PATCH v2 1/1] ixgbe: sync the first fragment unconditionally

2019-08-06 Thread Firo Yang

In Xen environment, if Xen-swiotlb is enabled, ixgbe driver
could possibly allocate a page, DMA memory buffer, for the first
fragment which is not suitable for Xen-swiotlb to do DMA operations.
Xen-swiotlb have to internally allocate another page for doing DMA
operations. It requires syncing between those two pages. However,
since commit f3213d932173 ("ixgbe: Update driver to make use of DMA
attributes in Rx path"), the unmap operation is performed with
DMA_ATTR_SKIP_CPU_SYNC. As a result, the sync is not performed.

To fix this problem, always sync before possibly performing a page
unmap operation.

Fixes: f3213d932173 ("ixgbe: Update driver to make use of DMA
attributes in Rx path")
Reviewed-by: Alexander Duyck 
Signed-off-by: Firo Yang 
---

Changes from v1:
 * Imporved the patch description.
 * Added Reviewed-by: and Fixes: as suggested by Alexander Duyck

 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index cbaf712d6529..200de9838096 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1825,13 +1825,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
 static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
struct sk_buff *skb)
 {
-   /* if the page was released unmap it, else just sync our portion */
-   if (unlikely(IXGBE_CB(skb)->page_released)) {
-   dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
-ixgbe_rx_pg_size(rx_ring),
-DMA_FROM_DEVICE,
-IXGBE_RX_DMA_ATTR);
-   } else if (ring_uses_build_skb(rx_ring)) {
+   if (ring_uses_build_skb(rx_ring)) {
unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
 
dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -1848,6 +1842,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring 
*rx_ring,
  skb_frag_size(frag),
  DMA_FROM_DEVICE);
}
+
+   /* If the page was released, just unmap it. */
+   if (unlikely(IXGBE_CB(skb)->page_released)) {
+   dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
+ixgbe_rx_pg_size(rx_ring),
+DMA_FROM_DEVICE,
+IXGBE_RX_DMA_ATTR);
+   }
 }
 
 /**
-- 
2.16.4

[PATCH] libata-sff: use spin_lock_irqsave instead of spin_lock_irq in IRQ context.

2019-08-06 Thread Fuqian Huang

Function ata_sff_flush_pio_task use spin_lock_irq/spin_unlock_irq
to protect shared data.
spin_unlock_irq will enable interrupts.

In the interrupt handler nv_swncq_interrupt (./drivers/ata/sata_nv.c),
when ap->link.sactive is true, nv_swncq_host_interrupt was called.
nv_swncq_hotplug is called when NV_SWNCQ_IRQ_HOTPLUG is set.
Then it will follow this chain:
nv_swncq_hotplug -> sata_scr_read (./dirvers/ata/libata-core.c)
 -> sata_pmp_scr_read (./drivers/ata/libata-pmp.c)
 -> sata_pmp_read -> ata_exec_internal 
 -> ata_exec_internal_sg -> ata_sff_flush_pio_task

Interrupts are enabled in interrupt handler.
Use spin_lock_irqsave instead of spin_lock_irq to avoid this.

Signed-off-by: Fuqian Huang 
---
 drivers/ata/libata-sff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 10aa27882142..d3143e7e6ec0 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1241,6 +1241,7 @@ EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task);
 
 void ata_sff_flush_pio_task(struct ata_port *ap)
 {
+   unsigned long flags;
DPRINTK("ENTER\n");
 
cancel_delayed_work_sync(&ap->sff_pio_task);
@@ -1253,9 +1254,9 @@ void ata_sff_flush_pio_task(struct ata_port *ap)
 * __ata_sff_port_intr() checks for HSM_ST_IDLE and before it calls
 * ata_sff_hsm_move() causing ata_sff_hsm_move() to BUG().
 */
-   spin_lock_irq(ap->lock);
+   spin_lock_irqsave(ap->lock, flags);
ap->hsm_task_state = HSM_ST_IDLE;
-   spin_unlock_irq(ap->lock);
+   spin_unlock_irqrestore(ap->lock, flags);
 
ap->sff_pio_task_link = NULL;
 
-- 
2.11.0

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1002 matches

Mail list logo