[PATCH] md: Fix missing unused status line of /proc/mdstat

2021-03-17 Thread Jan Glauber
Reading /proc/mdstat with a read buffer size that would not
fit the unused status line in the first read will skip this
line from the output.

So 'dd if=/proc/mdstat bs=64 2>/dev/null' will not print something
like: unused devices: 

Don't return NULL immediately in start() for v=2 but call
show() once to print the status line also for multiple reads.

Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and 
interface")
Signed-off-by: Jan Glauber 
---
 drivers/md/md.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 21da0c48f6c2..cb19d50fa672 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8153,7 +8153,11 @@ static void *md_seq_start(struct seq_file *seq, loff_t 
*pos)
loff_t l = *pos;
struct mddev *mddev;
 
-   if (l >= 0x1)
+   if (l == 0x1) {
+   ++*pos;
+   return (void *)2;
+   }
+   if (l > 0x1)
return NULL;
if (!l--)
/* header */
-- 
2.17.1



[RFC 1/1] edac: Add a counter parameter for edac_device_handle_ue/ce()

2019-07-17 Thread Jan Glauber
On Mon, Jul 15, 2019 at 01:53:07PM +0300, Hanna Hawa wrote:
> Add a counter parameter in order to avoid losing errors count for edac
> device, the error count reports the number of errors reported by an edac
> device similar to the way MC_EDAC do.
> 
> Signed-off-by: Hanna Hawa 
> ---
>  drivers/edac/altera_edac.c  | 20 
>  drivers/edac/amd8111_edac.c |  6 +++---
>  drivers/edac/cpc925_edac.c  |  4 ++--
>  drivers/edac/edac_device.c  | 18 ++
>  drivers/edac/edac_device.h  |  8 ++--
>  drivers/edac/highbank_l2_edac.c |  4 ++--
>  drivers/edac/mpc85xx_edac.c |  4 ++--
>  drivers/edac/mv64x60_edac.c |  4 ++--
>  drivers/edac/octeon_edac-l2c.c  | 20 ++--
>  drivers/edac/octeon_edac-pc.c   |  6 +++---
>  drivers/edac/qcom_edac.c|  8 
>  drivers/edac/thunderx_edac.c| 10 +-
>  drivers/edac/xgene_edac.c   | 26 +-
>  13 files changed, 74 insertions(+), 64 deletions(-)

Hi Hanna,

I'm probably missing something but this patch looks like while it adds
the error_count parameter the passed values all seem to be 1. So is the
new parameter used otherwise, maybe in another patch?

thanks,
Jan

> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
> index 8816f74..747dd43 100644
> --- a/drivers/edac/altera_edac.c
> +++ b/drivers/edac/altera_edac.c
> @@ -616,12 +616,12 @@ static irqreturn_t altr_edac_device_handler(int irq, 
> void *dev_id)
>   if (irq == drvdata->sb_irq) {
>   if (priv->ce_clear_mask)
>   writel(priv->ce_clear_mask, drvdata->base);
> - edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
> + edac_device_handle_ce(dci, 1, 0, 0, drvdata->edac_dev_name);
>   ret_value = IRQ_HANDLED;
>   } else if (irq == drvdata->db_irq) {
>   if (priv->ue_clear_mask)
>   writel(priv->ue_clear_mask, drvdata->base);
> - edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
> + edac_device_handle_ue(dci, 1, 0, 0, drvdata->edac_dev_name);
>   panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>   ret_value = IRQ_HANDLED;
>   } else {
> @@ -919,13 +919,15 @@ static irqreturn_t __maybe_unused 
> altr_edac_a10_ecc_irq(int irq, void *dev_id)
>   if (irq == dci->sb_irq) {
>   writel(ALTR_A10_ECC_SERRPENA,
>  base + ALTR_A10_ECC_INTSTAT_OFST);
> - edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + edac_device_handle_ce(dci->edac_dev, 1, 0, 0,
> +   dci->edac_dev_name);
>  
>   return IRQ_HANDLED;
>   } else if (irq == dci->db_irq) {
>   writel(ALTR_A10_ECC_DERRPENA,
>  base + ALTR_A10_ECC_INTSTAT_OFST);
> - edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + edac_device_handle_ue(dci->edac_dev, 1, 0, 0,
> +   dci->edac_dev_name);
>   if (dci->data->panic)
>   panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>  
> @@ -1308,14 +1310,16 @@ static irqreturn_t altr_edac_a10_l2_irq(int irq, void 
> *dev_id)
>   regmap_write(dci->edac->ecc_mgr_map,
>A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
>A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
> - edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + edac_device_handle_ce(dci->edac_dev, 1, 0, 0,
> +   dci->edac_dev_name);
>  
>   return IRQ_HANDLED;
>   } else if (irq == dci->db_irq) {
>   regmap_write(dci->edac->ecc_mgr_map,
>A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
>A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
> - edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + edac_device_handle_ue(dci->edac_dev, 1, 0, 0,
> +   dci->edac_dev_name);
>   panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>  
>   return IRQ_HANDLED;
> @@ -1652,12 +1656,12 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int 
> irq, void *dev_id)
>   if (irq == ad->sb_irq) {
>   writel(priv->ce_clear_mask,
>  base + ALTR_A10_ECC_INTSTAT_OFST);
> - edac_device_handle_ce(ad->edac_dev, 0, 0, ad->edac_dev_name);
> + edac_device_handle_ce(ad->edac_dev, 1, 0, 0, ad->edac_dev_name);
>   return IRQ_HANDLED;
>   } else if (irq == ad->db_irq) {
>   writel(priv->ue_clear_mask,
>  base + ALTR_A10_ECC_INTSTAT_OFST);
> - edac_device_handle_ue(ad->edac_dev, 0, 0, ad->edac_dev_name);
> + edac_device_handle_ue(ad->edac_dev, 1, 0, 0, ad->edac_dev_name);
>

Re: [PATCH v2 0/5] Add NUMA-awareness to qspinlock

2019-07-03 Thread Jan Glauber
Hi Alex,
I've tried this series on arm64 (ThunderX2 with up to SMT=4  and 224 CPUs)
with the borderline testcase of accessing a single file from all
threads. With that
testcase the qspinlock slowpath is the top spot in the kernel.

The results look really promising:

CPUsnormalnuma-qspinlocks
-
56149.41  73.90
224  576.95  290.31

Also frontend-stalls are reduced to 50% and interconnect traffic is
greatly reduced.
Tested-by: Jan Glauber 

--Jan

Am Fr., 29. März 2019 um 16:23 Uhr schrieb Alex Kogan :
>
> This version addresses feedback from Peter and Waiman. In particular,
> the CNA functionality has been moved to a separate file, and is controlled
> by a config option (enabled by default if NUMA is enabled).
> An optimization has been introduced to reduce the overhead of shuffling
> threads between waiting queues when the lock is only lightly contended.
>
> Summary
> ---
>
> Lock throughput can be increased by handing a lock to a waiter on the
> same NUMA node as the lock holder, provided care is taken to avoid
> starvation of waiters on other NUMA nodes. This patch introduces CNA
> (compact NUMA-aware lock) as the slow path for qspinlock. It can be
> enabled through a configuration option (NUMA_AWARE_SPINLOCKS).
>
> CNA is a NUMA-aware version of the MCS spin-lock. Spinning threads are
> organized in two queues, a main queue for threads running on the same
> node as the current lock holder, and a secondary queue for threads
> running on other nodes. Threads store the ID of the node on which
> they are running in their queue nodes. At the unlock time, the lock
> holder scans the main queue looking for a thread running on the same
> node. If found (call it thread T), all threads in the main queue
> between the current lock holder and T are moved to the end of the
> secondary queue, and the lock is passed to T. If such T is not found, the
> lock is passed to the first node in the secondary queue. Finally, if the
> secondary queue is empty, the lock is passed to the next thread in the
> main queue. To avoid starvation of threads in the secondary queue,
> those threads are moved back to the head of the main queue
> after a certain expected number of intra-node lock hand-offs.
>
> More details are available at https://arxiv.org/abs/1810.05600.
>
> We have done some performance evaluation with the locktorture module
> as well as with several benchmarks from the will-it-scale repo.
> The following locktorture results are from an Oracle X5-4 server
> (four Intel Xeon E7-8895 v3 @ 2.60GHz sockets with 18 hyperthreaded
> cores each). Each number represents an average (over 25 runs) of the
> total number of ops (x10^7) reported at the end of each run. The
> standard deviation is also reported in (), and in general, with a few
> exceptions, is about 3%. The 'stock' kernel is v5.0-rc8,
> commit 28d49e282665 ("locking/lockdep: Shrink struct lock_class_key"),
> compiled in the default configuration. 'patch' is the modified
> kernel compiled with NUMA_AWARE_SPINLOCKS not set; it is included to show
> that any performance changes to the existing qspinlock implementation are
> essentially noise. 'patch-CNA' is the modified kernel with
> NUMA_AWARE_SPINLOCKS set; the speedup is calculated dividing
> 'patch-CNA' by 'stock'.
>
> #thr stock  patchpatch-CNA   speedup (patch-CNA/stock)
>   1  2.731 (0.102)  2.732 (0.093)   2.716 (0.082)  0.995
>   2  3.071 (0.124)  3.084 (0.109)   3.079 (0.113)  1.003
>   4  4.221 (0.138)  4.229 (0.087)   4.408 (0.103)  1.044
>   8  5.366 (0.154)  5.274 (0.094)   6.958 (0.233)  1.297
>  16  6.673 (0.164)  6.689 (0.095)   8.547 (0.145)  1.281
>  32  7.365 (0.177)  7.353 (0.183)   9.305 (0.202)  1.263
>  36  7.473 (0.198)  7.422 (0.181)   9.441 (0.196)  1.263
>  72  6.805 (0.182)  6.699 (0.170)  10.020 (0.218)  1.472
> 108  6.509 (0.082)  6.480 (0.115)  10.027 (0.194)  1.540
> 142  6.223 (0.109)  6.294 (0.100)   9.874 (0.183)  1.587
>
> The following tables contain throughput results (ops/us) from the same
> setup for will-it-scale/open1_threads:
>
> #thr stock  patchpatch-CNA   speedup (patch-CNA/stock)
>   1  0.565 (0.004)  0.567 (0.001)  0.565 (0.003)  0.999
>   2  0.892 (0.021)  0.899 (0.022)  0.900 (0.018)  1.009
>   4  1.503 (0.031)  1.527 (0.038)  1.481 (0.025)  0.985
>   8  1.755 (0.105)  1.714 (0.079)  1.683 (0.106)  0.959
>  16  1.740 (0.095)  1.752 (0.087)  1.693 (0.098)  0.973
>  32  0.884 (0.080)  0.908 (0.090)  1.686 (0.092)  1.906
>  36  0.907 (0.095)  0.894 (0.088)  1.709 (0.081)  1.885
>  72  0.856 (0.041)  0.858 (0.043)  1.707 (0.082)  1.994
> 108  0.858 (0.039)  0.869 (0.037)  1.732 (0.076)  2.020
> 142  0.809 (0.044)  0.

Re: [PATCH] lockref: Limit number of cmpxchg loop retries

2019-06-07 Thread Jan Glauber
On Thu, Jun 06, 2019 at 10:28:12AM +, Jan Glauber wrote:
> On Thu, Jun 06, 2019 at 10:41:54AM +0100, Will Deacon wrote:
> > On Thu, Jun 06, 2019 at 08:03:27AM +0000, Jan Glauber wrote:
> > > On Wed, Jun 05, 2019 at 01:16:46PM -0700, Linus Torvalds wrote:
> > > > On Wed, Jun 5, 2019 at 6:49 AM Jan Glauber  wrote:
> > > > >
> > > > > Add an upper bound to the loop to force the fallback to spinlocks
> > > > > after some time. A retry value of 100 should not impact any hardware
> > > > > that does not have this issue.
> > > > >
> > > > > With the retry limit the performance of an open-close testcase
> > > > > improved between 60-70% on ThunderX2.
> > > > 
> > > > Btw, did you do any kind of performance analysis across different
> > > > retry limit values?
> > > 
> > > I tried 15/50/100/200/500, results were largely identical up to 100.
> > > For SMT=4 a higher retry value might be better, but unless we can add a
> > > sysctl value 100 looked like a good compromise to me.
> > 
> > Perhaps I'm just getting confused pre-morning-coffee, but I thought the
> > original complaint (and the reason for this patch even existing) was that
> > when many CPUs were hammering the lockref then performance tanked? In which
> > case, increasing the threshold as the number of CPUs increases seems
> > counter-intuitive to me because it suggests that the larger the system,
> > the harder we should try to make the cmpxchg work.
> 
> For SMT=4 the top hit I see is queued_spin_lock_slowpath(). Maybe this is more
> costly with more threads, so trying harder to use lockref-cmpxchg makes
> the microbenchmark faster in that case?

To clarify, with 224 threads & CPUs queued_spin_lock_slowpath is the top hit
even without a retry limit in lockref. This could be unrelated to the lockref
fallback, it looks like it's coming from the spinlock in:
do_sys_open -> get_unused_fd_flags -> __alloc_fd

--Jan


Re: [PATCH] lockref: Limit number of cmpxchg loop retries

2019-06-06 Thread Jan Glauber
On Thu, Jun 06, 2019 at 10:41:54AM +0100, Will Deacon wrote:
> On Thu, Jun 06, 2019 at 08:03:27AM +0000, Jan Glauber wrote:
> > On Wed, Jun 05, 2019 at 01:16:46PM -0700, Linus Torvalds wrote:
> > > On Wed, Jun 5, 2019 at 6:49 AM Jan Glauber  wrote:
> > > >
> > > > Add an upper bound to the loop to force the fallback to spinlocks
> > > > after some time. A retry value of 100 should not impact any hardware
> > > > that does not have this issue.
> > > >
> > > > With the retry limit the performance of an open-close testcase
> > > > improved between 60-70% on ThunderX2.
> > > 
> > > Btw, did you do any kind of performance analysis across different
> > > retry limit values?
> > 
> > I tried 15/50/100/200/500, results were largely identical up to 100.
> > For SMT=4 a higher retry value might be better, but unless we can add a
> > sysctl value 100 looked like a good compromise to me.
> 
> Perhaps I'm just getting confused pre-morning-coffee, but I thought the
> original complaint (and the reason for this patch even existing) was that
> when many CPUs were hammering the lockref then performance tanked? In which
> case, increasing the threshold as the number of CPUs increases seems
> counter-intuitive to me because it suggests that the larger the system,
> the harder we should try to make the cmpxchg work.

For SMT=4 the top hit I see is queued_spin_lock_slowpath(). Maybe this is more
costly with more threads, so trying harder to use lockref-cmpxchg makes
the microbenchmark faster in that case?

--Jan


Re: [PATCH] lockref: Limit number of cmpxchg loop retries

2019-06-06 Thread Jan Glauber
On Wed, Jun 05, 2019 at 01:16:46PM -0700, Linus Torvalds wrote:
> On Wed, Jun 5, 2019 at 6:49 AM Jan Glauber  wrote:
> >
> > Add an upper bound to the loop to force the fallback to spinlocks
> > after some time. A retry value of 100 should not impact any hardware
> > that does not have this issue.
> >
> > With the retry limit the performance of an open-close testcase
> > improved between 60-70% on ThunderX2.
> 
> Btw, did you do any kind of performance analysis across different
> retry limit values?

I tried 15/50/100/200/500, results were largely identical up to 100.
For SMT=4 a higher retry value might be better, but unless we can add a
sysctl value 100 looked like a good compromise to me.

--Jan

> I'm perfectly happy to just pick a random number and '100' looks fine
> to me, so this is mainly out of curiosity.
> 
>Linus


[PATCH] lockref: Limit number of cmpxchg loop retries

2019-06-05 Thread Jan Glauber
The lockref cmpxchg loop is unbound as long as the spinlock is not
taken. Depending on the hardware implementation of compare-and-swap
a high number of loop retries might happen.

Add an upper bound to the loop to force the fallback to spinlocks
after some time. A retry value of 100 should not impact any hardware
that does not have this issue.

With the retry limit the performance of an open-close testcase
improved between 60-70% on ThunderX2.

Suggested-by: Linus Torvalds 
Signed-off-by: Jan Glauber 
---
 lib/lockref.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/lockref.c b/lib/lockref.c
index 3d468b53d4c9..5b34bbd3eba8 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -9,6 +9,7 @@
  * failure case.
  */
 #define CMPXCHG_LOOP(CODE, SUCCESS) do {   
\
+   int retry = 100;
\
struct lockref old; 
\
BUILD_BUG_ON(sizeof(old) != 8); 
\
old.lock_count = READ_ONCE(lockref->lock_count);
\
@@ -21,6 +22,8 @@
if (likely(old.lock_count == prev.lock_count)) {
\
SUCCESS;
\
}   
\
+   if (!--retry)   
\
+   break;  
\
cpu_relax();
\
}   
\
 } while (0)
-- 
2.21.0



Re: [RFC] Disable lockref on arm64

2019-05-02 Thread Jan Glauber
On Wed, May 01, 2019 at 05:01:40PM +0100, Will Deacon wrote:
> Hi Jan,
> 
> [+Peter and Linus, since they enjoy this stuff]
> 
> On Mon, Apr 29, 2019 at 02:52:11PM +0000, Jan Glauber wrote:
> > I've been looking into performance issues that were reported for several
> > test-cases, for instance an nginx benchmark.
> 
> Could you share enough specifics here so that we can reproduce the issue
> locally, please? That would help us in our attempts to develop a fix without
> simply disabling the option for everybody else.

I can send my test-case which is a trivial open-read-close loop with one
thread per CPU and increasing read sizes.

> > It turned out the issue we have on ThunderX2 is the file open-close sequence
> > with small read sizes. If the used files are opened read-only the
> > lockref code (enabled by ARCH_USE_CMPXCHG_LOCKREF) is used.
> > 
> > The lockref CMPXCHG_LOOP uses an unbound (as long as the associated
> > spinlock isn't taken) while loop to change the lock count. This behaves
> > badly under heavy contention (~25x retries for one cmpxchg to succeed
> > with 28 threads operating on the same file). In case of a NUMA system
> > it also behaves badly as the access from the other socket is much slower.
> 
> It's surprising that this hasn't been reported on x86. I suspect their
> implementation of cmpxchg is a little more forgiving under contention.
> 
> > The fact that on ThunderX2 cpu_relax() turns only into one NOP
> > instruction doesn't help either. On Intel pause seems to block the thread
> > much longer, avoiding the heavy contention thereby.
> 
> NOPing out the yield instruction seems like a poor choice for an SMT CPU
> such as TX2. That said, the yield was originally added to cpu_relax() as
> a scheduling hint for QEMU.

The issue is not limited to SMT, it also shows without SMT.

> > With the queued spinlocks implementation I can see a major improvement
> > when I disable lockref. A trivial open-close test-case improves by
> > factor 2 while system time is decreasing also 2x. Looking at kernel compile
> > and dbench numbers didn't show any regression with lockref disabled.
> > 
> > Can we simply disable lockref? Is anyone else seeing this issue? Is there
> > an arm64 platform that actually implements yield?
> 
> There are two issues with disabling lockref like this:
> 
>   1. It's a compile-time thing, so systems that would benefit from the code
>  are unfairly penalised.
> 
>   2. You're optimising for the contended case at the cost of the
>  uncontended case, which should actually be the common case as well.

I completely agree with 2). Nevertheless limiting the retry attempts
like Linus suggested looks like a fair change that should not penalize
anyone and would still help the contented case.

--Jan

> Now, nobody expects contended CAS to scale well, so the middle ground
> probably involves backing off to the lock under contention, a bit like
> an optimistic trylock(). Unfortunately, that will need some tuning, hence
> my initial request for a reproducer.
> 
> Cheers,
> 
> Will


Re: [RFC] Disable lockref on arm64

2019-05-02 Thread Jan Glauber
On Wed, May 01, 2019 at 09:41:08AM -0700, Linus Torvalds wrote:
> On Mon, Apr 29, 2019 at 7:52 AM Jan Glauber  wrote:
> >
> > It turned out the issue we have on ThunderX2 is the file open-close sequence
> > with small read sizes. If the used files are opened read-only the
> > lockref code (enabled by ARCH_USE_CMPXCHG_LOCKREF) is used.
> >
> > The lockref CMPXCHG_LOOP uses an unbound (as long as the associated
> > spinlock isn't taken) while loop to change the lock count. This behaves
> > badly under heavy contention
> 
> Ok, excuse me when I rant a bit.
> 
> Since you're at Marvell, maybe you can forward this rant to the proper
> guilty parties?

Sure :)

> Who was the absolute *GENIUS* who went
> 
>  Step 1: "Oh, we have a middling CPU that isn't world-class on its own"
> 
>  Step 2: "BUT! We can put a lot of them on a die, because that's 'easy'"
> 
>  Step 3: "But let's make sure the interconnect isn't all that special,
> because that would negate the the whole 'easy' part, and really strong
> interconnects are even harder than CPU's and use even more power, so
> that wouldn't work"
> 
>  Step 4: "I wonder why this thing scales badly?"
> 
> Seriously. Why are you guys doing this? Has nobody ever looked at the
> fundamental thought process above and gone "Hmm"?
> 
> If you try to compensate for a not-great core by putting twice the
> number of them in a system, you need a cache system and interconnect
> between them that is more than twice as good as the competition.
> 
> And honestly, from everything that I hear, you don't have it. The
> whole chip is designed for "throughput when there is no contention".
> Is it really a huge surprise that it then falls flat on its face when
> there's something fancy going on?

I'll see how x86 runs the same testcase, I thought that playing
cacheline ping-pong is not the optimal use case for any CPU.

My assumption was that x86 probably doesn't suffer that much because
of cpu_relax() -> pause insn could slow down the retry rate.

> So now you want to penalize everybody else in the ARM community
> because you have a badly balanced system?

Not really, as I intentionally did not include a patch and sent this as
RFC.

> Ok, rant over.
> 
> The good news is that we can easily fix _this_ particular case by just
> limiting the CMPXCHG_LOOP to a maximum number of retries, since the
> loop is already designed to fail quickly if the spin lock value isn't
> unlocked, and all the lockref code is already organized to fall back
> to spinlocks.
> 
> So the attached three-liner patch may just work for you. Once _one_
> thread hits the maximum retry case and goes into the spinlocked case,
> everybody else will also fall back to spinlocks because they now see
> that the lockref is contended. So the "retry" value probably isn't all
> that important, but let's make it big enough that it probably never
> happens on a well-balanced system.

Agreed, your patch would solve the issue for ThunderX2. Limiting the
retry attempts was one of the things I tried beside extending the number
of NOPs in cpu_relax().

> But seriously: the whole "let's just do lots of CPU cores because it's
> easy" needs to stop. It's fine if you have a network processor and
> you're doing independent things, but it's not a GP processor approach.
> 
> Your hardware people need to improve on your CPU core (maybe the
> server version of Cortex A76 is starting to approach being good
> enough?) and your interconnect (seriously!) instead of just slapping
> 32 cores on a die and calling it a day.
> 
> Linus "not a fan of the flock of chickens" Torvalds

>  lib/lockref.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/lib/lockref.c b/lib/lockref.c
> index 3d468b53d4c9..a6762f8f45c9 100644
> --- a/lib/lockref.c
> +++ b/lib/lockref.c
> @@ -9,6 +9,7 @@
>   * failure case.
>   */
>  #define CMPXCHG_LOOP(CODE, SUCCESS) do { 
> \
> + int retry = 15; /* Guaranteed random number */  
> \
>   struct lockref old; 
> \
>   BUILD_BUG_ON(sizeof(old) != 8); 
> \
>   old.lock_count = READ_ONCE(lockref->lock_count);
> \
> @@ -21,6 +22,8 @@
>   if (likely(old.lock_count == prev.lock_count)) {
> \
>   SUCCESS;
> \
>   }   
> \
> + if (!--retry)   
> \
> + break;  
> \
>   cpu_relax();
> \
>   }   
> \
>  } while (0)



Re: dcache_readdir NULL inode oops

2019-04-30 Thread Jan Glauber
Hi Al,

On Fri, Nov 30, 2018 at 04:32:28PM +, Will Deacon wrote:
> On Fri, Nov 30, 2018 at 04:08:52PM +, Al Viro wrote:
> > On Fri, Nov 30, 2018 at 09:16:49AM -0600, Eric W. Biederman wrote:
> > > >> > +   inode_lock(parent->d_inode);
> > > >> > dentry->d_fsdata = NULL;
> > > >> > drop_nlink(dentry->d_inode);
> > > >> > d_delete(dentry);
> > > >> > +   inode_unlock(parent->d_inode);
> > > >> > +
> > > >> > dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
> > > >> >  }
> > > >
> > > > This feels right but getting some feedback from others would be good.
> > >
> > > This is going to be special at least because we are not coming through
> > > the normal unlink path and we are manipulating the dcache.
> > >
> > > This looks plausible.  If this is whats going on then we have had this
> > > bug for a very long time.  I will see if I can make some time.
> > >
> > > It looks like in the general case everything is serialized by the
> > > devpts_mutex.  I wonder if just changing the order of operations
> > > here would be enough.
> > >
> > > AKA: drop_nlink d_delete then dentry->d_fsdata.  Ugh d_fsdata is not
> > > implicated so that won't help here.
> >
> > It certainly won't.  The thing is, this
> > if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
> >   d_inode(next)->i_ino, dt_type(d_inode(next
> > in dcache_readdir() obviously can block, so all we can hold over it is
> > blocking locks.  Which we do - specifically, ->i_rwsem on our directory.
> >
> > It's actually worse than missing inode_lock() - consider the effects
> > of mount --bind /mnt/foo /dev/pts/42.  What happens when that thing
> > goes away?  Right, a lost mount...
> 
> Ha, I hadn't even considered that scenario. Urgh!
> 
> > I'll resurrect the "kernel-internal rm -rf done right" series and
> > post it; devpts is not the only place suffering such problem (binfmt_misc,
> > etc.)

I've not seen anything merged regarding this issue so I guess this is
still open? We see a similar crash (dcache_readdir hitting a NULL inode
ptr) but this time not with devpts.

Debugging is ongoing and we're not even sure which filesystem is having
the issue. Is my assumption correct that we should only see this when
d_delete(dentry) is called?

thanks,
Jan



[RFC] Disable lockref on arm64

2019-04-29 Thread Jan Glauber
Hi Catalin & Will,

I've been looking into performance issues that were reported for several
test-cases, for instance an nginx benchmark.

It turned out the issue we have on ThunderX2 is the file open-close sequence
with small read sizes. If the used files are opened read-only the
lockref code (enabled by ARCH_USE_CMPXCHG_LOCKREF) is used.

The lockref CMPXCHG_LOOP uses an unbound (as long as the associated
spinlock isn't taken) while loop to change the lock count. This behaves
badly under heavy contention (~25x retries for one cmpxchg to succeed
with 28 threads operating on the same file). In case of a NUMA system
it also behaves badly as the access from the other socket is much slower.

The fact that on ThunderX2 cpu_relax() turns only into one NOP
instruction doesn't help either. On Intel pause seems to block the thread
much longer, avoiding the heavy contention thereby.

With the queued spinlocks implementation I can see a major improvement
when I disable lockref. A trivial open-close test-case improves by
factor 2 while system time is decreasing also 2x. Looking at kernel compile
and dbench numbers didn't show any regression with lockref disabled.

Can we simply disable lockref? Is anyone else seeing this issue? Is there
an arm64 platform that actually implements yield?

Thanks,
Jan


Re: [PATCH 4/7] crypto: cavium: zip: no need to check return value of debugfs_create functions

2019-01-23 Thread Jan Glauber
On Tue, Jan 22, 2019 at 04:14:19PM +0100, Greg Kroah-Hartman wrote:
> When calling debugfs functions, there is no need to ever check the
> return value.  The function can work or not, but the code logic should
> never do something different based on this.
> 
> Cc: Herbert Xu 
> Cc: "David S. Miller" 
> Cc: Robert Richter 
> Cc: Jan Glauber 
> Cc: linux-cry...@vger.kernel.org
> Signed-off-by: Greg Kroah-Hartman 
> ---
>  drivers/crypto/cavium/zip/zip_main.c | 52 ++--
>  1 file changed, 11 insertions(+), 41 deletions(-)

Reviewed-by: Jan Glauber 

thanks,
Jan


Re: dcache_readdir NULL inode oops

2018-11-29 Thread Jan Glauber
On Wed, Nov 28, 2018 at 08:08:06PM +, Will Deacon wrote:
> I spent some more time looking at this today...
> 
> On Fri, Nov 23, 2018 at 06:05:25PM +, Will Deacon wrote:
> > Doing some more debugging, it looks like the usual failure case is where
> > one CPU clears the inode field in the dentry via:
> >
> >   devpts_pty_kill()
> >   -> d_delete()   // dentry->d_lockref.count == 1
> >   -> dentry_unlink_inode()
> >
> > whilst another CPU gets a pointer to the dentry via:
> >
> >   sys_getdents64()
> >   -> iterate_dir()
> >   -> dcache_readdir()
> >   -> next_positive()
> >
> > and explodes on the subsequent inode dereference when trying to pass the
> > inode number to dir_emit():
> >
> >   if (!dir_emit(..., d_inode(next)->i_ino, ...))
> >
> > Indeed, the hack below triggers a warning, indicating that the inode
> > is being cleared concurrently.
> >
> > I can't work out whether the getdents64() path should hold a refcount
> > to stop d_delete() in its tracks, or whether devpts_pty_kill() shouldn't
> > be calling d_delete() like this at all.
> 
> So the issue is that opening /dev/pts/ptmx creates a new pty in /dev/pts,
> which disappears when you close /dev/pts/ptmx. Consequently, when we tear
> down the dentry for the magic new file, we have to take the i_node rwsem of
> the *parent* so that concurrent path walkers don't trip over it whilst its
> being freed. I wrote a simple concurrent program to getdents(/dev/pts/) in
> one thread, whilst another opens and closes /dev/pts/ptmx: it crashes the
> kernel in seconds.

I also made a testcase and verified that your fix is fine. I also tried
replacing open-close on /dev/ptmx with mkdir-rmdir but that does not
trigger the error.

> Patch below, but I'd still like somebody else to look at this, please.

I wonder why no inode_lock on parent is needed for devpts_pty_new(), but
I'm obviously not a VFS expert... So your patch looks good to me and
clearly solves the issue.

thanks,
Jan

> Will
> 
> --->8
> 
> diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
> index c53814539070..50ddb95ff84c 100644
> --- a/fs/devpts/inode.c
> +++ b/fs/devpts/inode.c
> @@ -619,11 +619,17 @@ void *devpts_get_priv(struct dentry *dentry)
>   */
>  void devpts_pty_kill(struct dentry *dentry)
>  {
> -   WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
> +   struct super_block *sb = dentry->d_sb;
> +   struct dentry *parent = sb->s_root;
> 
> +   WARN_ON_ONCE(sb->s_magic != DEVPTS_SUPER_MAGIC);
> +
> +   inode_lock(parent->d_inode);
> dentry->d_fsdata = NULL;
> drop_nlink(dentry->d_inode);
> d_delete(dentry);
> +   inode_unlock(parent->d_inode);
> +
> dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
>  }


Re: dcache_readdir NULL inode oops

2018-11-29 Thread Jan Glauber
On Wed, Nov 28, 2018 at 08:08:06PM +, Will Deacon wrote:
> I spent some more time looking at this today...
> 
> On Fri, Nov 23, 2018 at 06:05:25PM +, Will Deacon wrote:
> > Doing some more debugging, it looks like the usual failure case is where
> > one CPU clears the inode field in the dentry via:
> >
> >   devpts_pty_kill()
> >   -> d_delete()   // dentry->d_lockref.count == 1
> >   -> dentry_unlink_inode()
> >
> > whilst another CPU gets a pointer to the dentry via:
> >
> >   sys_getdents64()
> >   -> iterate_dir()
> >   -> dcache_readdir()
> >   -> next_positive()
> >
> > and explodes on the subsequent inode dereference when trying to pass the
> > inode number to dir_emit():
> >
> >   if (!dir_emit(..., d_inode(next)->i_ino, ...))
> >
> > Indeed, the hack below triggers a warning, indicating that the inode
> > is being cleared concurrently.
> >
> > I can't work out whether the getdents64() path should hold a refcount
> > to stop d_delete() in its tracks, or whether devpts_pty_kill() shouldn't
> > be calling d_delete() like this at all.
> 
> So the issue is that opening /dev/pts/ptmx creates a new pty in /dev/pts,
> which disappears when you close /dev/pts/ptmx. Consequently, when we tear
> down the dentry for the magic new file, we have to take the i_node rwsem of
> the *parent* so that concurrent path walkers don't trip over it whilst its
> being freed. I wrote a simple concurrent program to getdents(/dev/pts/) in
> one thread, whilst another opens and closes /dev/pts/ptmx: it crashes the
> kernel in seconds.

I also made a testcase and verified that your fix is fine. I also tried
replacing open-close on /dev/ptmx with mkdir-rmdir but that does not
trigger the error.

> Patch below, but I'd still like somebody else to look at this, please.

I wonder why no inode_lock on parent is needed for devpts_pty_new(), but
I'm obviously not a VFS expert... So your patch looks good to me and
clearly solves the issue.

thanks,
Jan

> Will
> 
> --->8
> 
> diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
> index c53814539070..50ddb95ff84c 100644
> --- a/fs/devpts/inode.c
> +++ b/fs/devpts/inode.c
> @@ -619,11 +619,17 @@ void *devpts_get_priv(struct dentry *dentry)
>   */
>  void devpts_pty_kill(struct dentry *dentry)
>  {
> -   WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
> +   struct super_block *sb = dentry->d_sb;
> +   struct dentry *parent = sb->s_root;
> 
> +   WARN_ON_ONCE(sb->s_magic != DEVPTS_SUPER_MAGIC);
> +
> +   inode_lock(parent->d_inode);
> dentry->d_fsdata = NULL;
> drop_nlink(dentry->d_inode);
> d_delete(dentry);
> +   inode_unlock(parent->d_inode);
> +
> dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
>  }


Re: dcache_readdir NULL inode oops

2018-11-21 Thread Jan Glauber
On Tue, Nov 20, 2018 at 07:03:17PM +, Will Deacon wrote:
> On Tue, Nov 20, 2018 at 06:28:54PM +, Will Deacon wrote:
> > On Sat, Nov 10, 2018 at 11:17:03AM +0000, Jan Glauber wrote:
> > > On Fri, Nov 09, 2018 at 03:58:56PM +, Will Deacon wrote:
> > > > On Fri, Nov 09, 2018 at 02:37:51PM +, Jan Glauber wrote:
> > > > > I'm seeing the following oops reproducible with upstream kernel on 
> > > > > arm64
> > > > > (ThunderX2):
> > > >
> > > > [...]
> > > >
> > > > > It happens after 1-3 hours of running 'stress-ng --dev 128'. This 
> > > > > testcase
> > > > > does a scandir of /dev and then calls random stuff like ioctl, lseek,
> > > > > open/close etc. on the entries. I assume no files are deleted under 
> > > > > /dev
> > > > > during the testcase.
> > > > >
> > > > > The NULL pointer is the inode pointer of next. The next 
> > > > > dentry->d_flags is
> > > > > DCACHE_RCUACCESS when this happens.
> > > > >
> > > > > Any hints on how to further debug this?
> > > >
> > > > Can you reproduce the issue with vanilla -rc1 and do you have a "known 
> > > > good"
> > > > kernel?
> > >
> > > I can try out -rc1, but IIRC this wasn't bisectible as the bug was 
> > > present at
> > > least back to 4.14. I need to double check that as there were other issues
> > > that are resolved now so I may confuse things here. I've defintely seen
> > > the same bug with 4.18.
> > >
> > > Unfortunately I lost access to the machine as our data center seems to be
> > > moving currently so it might take some days until I can try -rc1.
> >
> > Ok, I've just managed to reproduce this in a KVM guest running v4.20-rc3 on
> > both the host and the guest, so if anybody has any ideas of things to try 
> > then
> > I'm happy to give them a shot. In the meantime, I'll try again with a bunch 
> > of
> > debug checks enabled.

Hi Will,

good that you can reproduce the issue. I've verified that the issue is
indeed reproducible with 4.14.

> 
> Weee, I eventually hit a use-after-free from KASAN. See below.

I ran KASAN (and all the other debug stuff) but didn't trigger anything
in the host.

--Jan


Re: dcache_readdir NULL inode oops

2018-11-21 Thread Jan Glauber
On Tue, Nov 20, 2018 at 07:03:17PM +, Will Deacon wrote:
> On Tue, Nov 20, 2018 at 06:28:54PM +, Will Deacon wrote:
> > On Sat, Nov 10, 2018 at 11:17:03AM +0000, Jan Glauber wrote:
> > > On Fri, Nov 09, 2018 at 03:58:56PM +, Will Deacon wrote:
> > > > On Fri, Nov 09, 2018 at 02:37:51PM +, Jan Glauber wrote:
> > > > > I'm seeing the following oops reproducible with upstream kernel on 
> > > > > arm64
> > > > > (ThunderX2):
> > > >
> > > > [...]
> > > >
> > > > > It happens after 1-3 hours of running 'stress-ng --dev 128'. This 
> > > > > testcase
> > > > > does a scandir of /dev and then calls random stuff like ioctl, lseek,
> > > > > open/close etc. on the entries. I assume no files are deleted under 
> > > > > /dev
> > > > > during the testcase.
> > > > >
> > > > > The NULL pointer is the inode pointer of next. The next 
> > > > > dentry->d_flags is
> > > > > DCACHE_RCUACCESS when this happens.
> > > > >
> > > > > Any hints on how to further debug this?
> > > >
> > > > Can you reproduce the issue with vanilla -rc1 and do you have a "known 
> > > > good"
> > > > kernel?
> > >
> > > I can try out -rc1, but IIRC this wasn't bisectible as the bug was 
> > > present at
> > > least back to 4.14. I need to double check that as there were other issues
> > > that are resolved now so I may confuse things here. I've defintely seen
> > > the same bug with 4.18.
> > >
> > > Unfortunately I lost access to the machine as our data center seems to be
> > > moving currently so it might take some days until I can try -rc1.
> >
> > Ok, I've just managed to reproduce this in a KVM guest running v4.20-rc3 on
> > both the host and the guest, so if anybody has any ideas of things to try 
> > then
> > I'm happy to give them a shot. In the meantime, I'll try again with a bunch 
> > of
> > debug checks enabled.

Hi Will,

good that you can reproduce the issue. I've verified that the issue is
indeed reproducible with 4.14.

> 
> Weee, I eventually hit a use-after-free from KASAN. See below.

I ran KASAN (and all the other debug stuff) but didn't trigger anything
in the host.

--Jan


WARN_ON after gic_reserve_range

2018-11-20 Thread Jan Glauber
Hi Marc,

with 4.20-rc3 I see two WARN_ON's firing on a ThunderX2 system that come from
commit 3fb68faee867 (irqchip/gic-v3-its: Register LPI tables with EFI config 
table).

Global efi_memreserve_root is NULL as it will only be set when early initcalls 
are
running, but from the backtrace the WARN_ON's are running even earlier 
(init_IRQ).

Am I the only one seeing this?

[0.00] WARNING: CPU: 0 PID: 0 at drivers/irqchip/irq-gic-v3-its.c:1696 
its_init+0x36c/0x608
[0.00] Modules linked in:
[0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.20.0-rc3-jang+ #69
[0.00] pstate: 6089 (nZCv daIf -PAN -UAO)
[0.00] pc : its_init+0x36c/0x608
[0.00] lr : its_init+0x368/0x608
[0.00] sp : 08e53c60
[0.00] x29: 08e53c60 x28: 7dfffe6807a4 
[0.00] x27: 0001 x26: 80267b4c2100 
[0.00] x25: 08ffe930 x24: 08e5d9c8 
[0.00] x23: 80267bc10300 x22: 08e5d9c8 
[0.00] x21: 08e59000 x20: 08f12000 
[0.00] x19: 08ffe000 x18:  
[0.00] x17: 0009 x16:  
[0.00] x15: 08e59648 x14: 88fa616f 
[0.00] x13: 08fa617d x12: 08e7e000 
[0.00] x11: 08e53900 x10: 08e59eb0 
[0.00] x9 : 08e3e018 x8 : 0875ac58 
[0.00] x7 : 206f6e203a746e65 x6 : 018c 
[0.00] x5 : 0001 x4 :  
[0.00] x3 : 0001 x2 : 4f419eae5e0eb800 
[0.00] x1 :  x0 : ffed 
[0.00] Call trace:
[0.00]  its_init+0x36c/0x608
[0.00]  gic_init_bases+0x288/0x300
[0.00]  gic_acpi_init+0x124/0x248
[0.00]  acpi_match_madt+0x4c/0x88
[0.00]  acpi_table_parse_entries_array+0x134/0x220
[0.00]  acpi_table_parse_entries+0x70/0x98
[0.00]  acpi_table_parse_madt+0x40/0x50
[0.00]  __acpi_probe_device_table+0x88/0xe4
[0.00]  irqchip_init+0x38/0x40
[0.00]  init_IRQ+0xcc/0x100
[0.00]  start_kernel+0x2fc/0x490
[0.00] ---[ end trace 41868d15bb5cf8f6 ]---

--Jan


WARN_ON after gic_reserve_range

2018-11-20 Thread Jan Glauber
Hi Marc,

with 4.20-rc3 I see two WARN_ON's firing on a ThunderX2 system that come from
commit 3fb68faee867 (irqchip/gic-v3-its: Register LPI tables with EFI config 
table).

Global efi_memreserve_root is NULL as it will only be set when early initcalls 
are
running, but from the backtrace the WARN_ON's are running even earlier 
(init_IRQ).

Am I the only one seeing this?

[0.00] WARNING: CPU: 0 PID: 0 at drivers/irqchip/irq-gic-v3-its.c:1696 
its_init+0x36c/0x608
[0.00] Modules linked in:
[0.00] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.20.0-rc3-jang+ #69
[0.00] pstate: 6089 (nZCv daIf -PAN -UAO)
[0.00] pc : its_init+0x36c/0x608
[0.00] lr : its_init+0x368/0x608
[0.00] sp : 08e53c60
[0.00] x29: 08e53c60 x28: 7dfffe6807a4 
[0.00] x27: 0001 x26: 80267b4c2100 
[0.00] x25: 08ffe930 x24: 08e5d9c8 
[0.00] x23: 80267bc10300 x22: 08e5d9c8 
[0.00] x21: 08e59000 x20: 08f12000 
[0.00] x19: 08ffe000 x18:  
[0.00] x17: 0009 x16:  
[0.00] x15: 08e59648 x14: 88fa616f 
[0.00] x13: 08fa617d x12: 08e7e000 
[0.00] x11: 08e53900 x10: 08e59eb0 
[0.00] x9 : 08e3e018 x8 : 0875ac58 
[0.00] x7 : 206f6e203a746e65 x6 : 018c 
[0.00] x5 : 0001 x4 :  
[0.00] x3 : 0001 x2 : 4f419eae5e0eb800 
[0.00] x1 :  x0 : ffed 
[0.00] Call trace:
[0.00]  its_init+0x36c/0x608
[0.00]  gic_init_bases+0x288/0x300
[0.00]  gic_acpi_init+0x124/0x248
[0.00]  acpi_match_madt+0x4c/0x88
[0.00]  acpi_table_parse_entries_array+0x134/0x220
[0.00]  acpi_table_parse_entries+0x70/0x98
[0.00]  acpi_table_parse_madt+0x40/0x50
[0.00]  __acpi_probe_device_table+0x88/0xe4
[0.00]  irqchip_init+0x38/0x40
[0.00]  init_IRQ+0xcc/0x100
[0.00]  start_kernel+0x2fc/0x490
[0.00] ---[ end trace 41868d15bb5cf8f6 ]---

--Jan


Re: dcache_readdir NULL inode oops

2018-11-10 Thread Jan Glauber
On Fri, Nov 09, 2018 at 03:58:56PM +, Will Deacon wrote:
> On Fri, Nov 09, 2018 at 02:37:51PM +0000, Jan Glauber wrote:
> > I'm seeing the following oops reproducible with upstream kernel on arm64
> > (ThunderX2):
> 
> [...]
> 
> > It happens after 1-3 hours of running 'stress-ng --dev 128'. This testcase
> > does a scandir of /dev and then calls random stuff like ioctl, lseek,
> > open/close etc. on the entries. I assume no files are deleted under /dev
> > during the testcase.
> >
> > The NULL pointer is the inode pointer of next. The next dentry->d_flags is
> > DCACHE_RCUACCESS when this happens.
> >
> > Any hints on how to further debug this?
> 
> Can you reproduce the issue with vanilla -rc1 and do you have a "known good"
> kernel?

I can try out -rc1, but IIRC this wasn't bisectible as the bug was present at
least back to 4.14. I need to double check that as there were other issues
that are resolved now so I may confuse things here. I've defintely seen
the same bug with 4.18.

Unfortunately I lost access to the machine as our data center seems to be
moving currently so it might take some days until I can try -rc1.

thanks,
Jan


Re: dcache_readdir NULL inode oops

2018-11-10 Thread Jan Glauber
On Fri, Nov 09, 2018 at 03:58:56PM +, Will Deacon wrote:
> On Fri, Nov 09, 2018 at 02:37:51PM +0000, Jan Glauber wrote:
> > I'm seeing the following oops reproducible with upstream kernel on arm64
> > (ThunderX2):
> 
> [...]
> 
> > It happens after 1-3 hours of running 'stress-ng --dev 128'. This testcase
> > does a scandir of /dev and then calls random stuff like ioctl, lseek,
> > open/close etc. on the entries. I assume no files are deleted under /dev
> > during the testcase.
> >
> > The NULL pointer is the inode pointer of next. The next dentry->d_flags is
> > DCACHE_RCUACCESS when this happens.
> >
> > Any hints on how to further debug this?
> 
> Can you reproduce the issue with vanilla -rc1 and do you have a "known good"
> kernel?

I can try out -rc1, but IIRC this wasn't bisectible as the bug was present at
least back to 4.14. I need to double check that as there were other issues
that are resolved now so I may confuse things here. I've defintely seen
the same bug with 4.18.

Unfortunately I lost access to the machine as our data center seems to be
moving currently so it might take some days until I can try -rc1.

thanks,
Jan


dcache_readdir NULL inode oops

2018-11-09 Thread Jan Glauber
Hi Al,

I'm seeing the following oops reproducible with upstream kernel on arm64 
(ThunderX2):

[ 5428.795719] Unable to handle kernel NULL pointer dereference at virtual 
address 0040
[ 5428.813838] Mem abort info:
[ 5428.820721]   ESR = 0x9606
[ 5428.828476]   Exception class = DABT (current EL), IL = 32 bits
[ 5428.841590]   SET = 0, FnV = 0
[ 5428.848939]   EA = 0, S1PTW = 0
[ 5428.855941] Data abort info:
[ 5428.862422]   ISV = 0, ISS = 0x0006
[ 5428.870787]   CM = 0, WnR = 0
[ 5428.877359] user pgtable: 4k pages, 48-bit VAs, pgdp = 52f9e034
[ 5428.891098] [0040] pgd=007ebb0d6003, pud=007ed3073003, 
pmd=
[ 5428.909251] Internal error: Oops: 9606 [#1] SMP
[ 5428.919122] Modules linked in: xt_conntrack nf_conntrack nf_defrag_ipv6 
nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc 
ip6table_filter ip6_tables iptable_filter ipmi_ssif ip_tables x_tables ipv6 
crc32_ce bnx2x crct10dif_ce igb nvme nvme_core i2c_algo_bit mdio gpio_xlp 
i2c_xlp9xx
[ 5428.972724] CPU: 45 PID: 220018 Comm: stress-ng-dev Not tainted 4.19.0-jang+ 
#45
[ 5428.987664] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[ 5429.006819] pstate: 6049 (nZCv daif +PAN -UAO)
[ 5429.016567] pc : dcache_readdir+0xfc/0x1a8
[ 5429.024903] lr : dcache_readdir+0x134/0x1a8
[ 5429.033376] sp : 2d553d70
[ 5429.040101] x29: 2d553d70 x28: 807db4988000
[ 5429.050892] x27:  x26: 
[ 5429.061679] x25: 5600 x24: 8024577106c0
[ 5429.072457] x23:  x22: 80267b92a480
[ 5429.083248] x21: 80267b92a520 x20: 8024575e5e00
[ 5429.094029] x19: 2d553e40 x18: 
[ 5429.104805] x17:  x16: 
[ 5429.115553] x15:  x14: 
[ 5429.126332] x13:  x12: 
[ 5429.137096] x11:  x10: 80266b398228
[ 5429.147849] x9 : 80266b398000 x8 : 7e4e
[ 5429.158580] x7 :  x6 : 0830d190
[ 5429.169362] x5 :  x4 : 0d7506a8
[ 5429.180123] x3 : 0002 x2 : 0002
[ 5429.190890] x1 : 8024575e5e38 x0 : 2d553e40
[ 5429.201715] Process stress-ng-dev (pid: 220018, stack limit = 
0x9437ac28)
[ 5429.216828] Call trace:
[ 5429.221855]  dcache_readdir+0xfc/0x1a8
[ 5429.229459]  iterate_dir+0x8c/0x1a0
[ 5429.236561]  ksys_getdents64+0xa4/0x188
[ 5429.244357]  __arm64_sys_getdents64+0x28/0x38
[ 5429.253201]  el0_svc_handler+0x7c/0x100
[ 5429.260989]  el0_svc+0x8/0xc
[ 5429.266878] Code: a9429681 aa1303e0 b9402682 a9400e66 (f94020a4)
[ 5429.279192] ---[ end trace 5c1e28c07cf016c5 ]---

It happens after 1-3 hours of running 'stress-ng --dev 128'. This testcase does 
a scandir of /dev
and then calls random stuff like ioctl, lseek, open/close etc. on the entries. 
I assume no files are
deleted under /dev during the testcase.

The NULL pointer is the inode pointer of next. The next dentry->d_flags is 
DCACHE_RCUACCESS
when this happens.

Any hints on how to further debug this?

--Jan


dcache_readdir NULL inode oops

2018-11-09 Thread Jan Glauber
Hi Al,

I'm seeing the following oops reproducible with upstream kernel on arm64 
(ThunderX2):

[ 5428.795719] Unable to handle kernel NULL pointer dereference at virtual 
address 0040
[ 5428.813838] Mem abort info:
[ 5428.820721]   ESR = 0x9606
[ 5428.828476]   Exception class = DABT (current EL), IL = 32 bits
[ 5428.841590]   SET = 0, FnV = 0
[ 5428.848939]   EA = 0, S1PTW = 0
[ 5428.855941] Data abort info:
[ 5428.862422]   ISV = 0, ISS = 0x0006
[ 5428.870787]   CM = 0, WnR = 0
[ 5428.877359] user pgtable: 4k pages, 48-bit VAs, pgdp = 52f9e034
[ 5428.891098] [0040] pgd=007ebb0d6003, pud=007ed3073003, 
pmd=
[ 5428.909251] Internal error: Oops: 9606 [#1] SMP
[ 5428.919122] Modules linked in: xt_conntrack nf_conntrack nf_defrag_ipv6 
nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc 
ip6table_filter ip6_tables iptable_filter ipmi_ssif ip_tables x_tables ipv6 
crc32_ce bnx2x crct10dif_ce igb nvme nvme_core i2c_algo_bit mdio gpio_xlp 
i2c_xlp9xx
[ 5428.972724] CPU: 45 PID: 220018 Comm: stress-ng-dev Not tainted 4.19.0-jang+ 
#45
[ 5428.987664] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[ 5429.006819] pstate: 6049 (nZCv daif +PAN -UAO)
[ 5429.016567] pc : dcache_readdir+0xfc/0x1a8
[ 5429.024903] lr : dcache_readdir+0x134/0x1a8
[ 5429.033376] sp : 2d553d70
[ 5429.040101] x29: 2d553d70 x28: 807db4988000
[ 5429.050892] x27:  x26: 
[ 5429.061679] x25: 5600 x24: 8024577106c0
[ 5429.072457] x23:  x22: 80267b92a480
[ 5429.083248] x21: 80267b92a520 x20: 8024575e5e00
[ 5429.094029] x19: 2d553e40 x18: 
[ 5429.104805] x17:  x16: 
[ 5429.115553] x15:  x14: 
[ 5429.126332] x13:  x12: 
[ 5429.137096] x11:  x10: 80266b398228
[ 5429.147849] x9 : 80266b398000 x8 : 7e4e
[ 5429.158580] x7 :  x6 : 0830d190
[ 5429.169362] x5 :  x4 : 0d7506a8
[ 5429.180123] x3 : 0002 x2 : 0002
[ 5429.190890] x1 : 8024575e5e38 x0 : 2d553e40
[ 5429.201715] Process stress-ng-dev (pid: 220018, stack limit = 
0x9437ac28)
[ 5429.216828] Call trace:
[ 5429.221855]  dcache_readdir+0xfc/0x1a8
[ 5429.229459]  iterate_dir+0x8c/0x1a0
[ 5429.236561]  ksys_getdents64+0xa4/0x188
[ 5429.244357]  __arm64_sys_getdents64+0x28/0x38
[ 5429.253201]  el0_svc_handler+0x7c/0x100
[ 5429.260989]  el0_svc+0x8/0xc
[ 5429.266878] Code: a9429681 aa1303e0 b9402682 a9400e66 (f94020a4)
[ 5429.279192] ---[ end trace 5c1e28c07cf016c5 ]---

It happens after 1-3 hours of running 'stress-ng --dev 128'. This testcase does 
a scandir of /dev
and then calls random stuff like ioctl, lseek, open/close etc. on the entries. 
I assume no files are
deleted under /dev during the testcase.

The NULL pointer is the inode pointer of next. The next dentry->d_flags is 
DCACHE_RCUACCESS
when this happens.

Any hints on how to further debug this?

--Jan


Re: KASAN: use-after-scope in ext4_group_desc_csum

2018-10-09 Thread Jan Glauber
On Fri, Oct 05, 2018 at 05:32:07PM +0200, Dmitry Vyukov wrote:
[...]

> This all makes me think that somebody else has left these 0xf8 in
> shadow before ext4_map_blocks started executing.
> Unfortunately debugging garbage in stack shadow is not completely
> trivial and there is no common recipe. I don't have setup to run arm64
> kernel at the moment. I would try to locate that garbage in stack
> shadow earlier, e.g. calling another function before ext4_map_blocks,
> implementing that function in mm/kasan/kasan.c (non-instrumented
> itself) and then try to scan stack and verify presence of 0xf8
> garbage. If this works out, then try to catch garbage earlier and/or
> try to figure out what function left that garbage (that's possible by
> locating 0x41b58ab3 magic:
> https://bugzilla.kernel.org/show_bug.cgi?id=198435).

Thanks a lot for your analysis! I'll try to debug this further but as
you pointed out it might be difficult to catch who writes beforehand to
that location.

--Jan


Re: KASAN: use-after-scope in ext4_group_desc_csum

2018-10-09 Thread Jan Glauber
On Fri, Oct 05, 2018 at 05:32:07PM +0200, Dmitry Vyukov wrote:
[...]

> This all makes me think that somebody else has left these 0xf8 in
> shadow before ext4_map_blocks started executing.
> Unfortunately debugging garbage in stack shadow is not completely
> trivial and there is no common recipe. I don't have setup to run arm64
> kernel at the moment. I would try to locate that garbage in stack
> shadow earlier, e.g. calling another function before ext4_map_blocks,
> implementing that function in mm/kasan/kasan.c (non-instrumented
> itself) and then try to scan stack and verify presence of 0xf8
> garbage. If this works out, then try to catch garbage earlier and/or
> try to figure out what function left that garbage (that's possible by
> locating 0x41b58ab3 magic:
> https://bugzilla.kernel.org/show_bug.cgi?id=198435).

Thanks a lot for your analysis! I'll try to debug this further but as
you pointed out it might be difficult to catch who writes beforehand to
that location.

--Jan


Re: KASAN: use-after-scope in ext4_group_desc_csum

2018-10-05 Thread Jan Glauber
On Fri, Oct 05, 2018 at 01:13:52PM +0200, Dmitry Vyukov wrote:
> On Fri, Oct 5, 2018 at 12:16 PM, Jan Glauber  wrote:
> > Hi,
> >
> > I'm getting below warning when I enable CONFIG_KASAN_EXTRA=y on a arm64 
> > ThunderX2 system.
> > As far as I can tell this is present since KASAN_EXTRA was introduced 
> > (4.16).
> >
> > [   64.547333] 
> > ==
> > [   64.561933] BUG: KASAN: use-after-scope in 
> > ext4_es_lookup_extent+0x130/0x980
> > [   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075
> >
> > [   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
> > [   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
> > O.E.M., BIOS 0ACKL018 03/30/2018
> > [   64.624750] Call trace:
> > [   64.629666]  dump_backtrace+0x0/0x360
> > [   64.637024]  show_stack+0x24/0x30
> > [   64.643687]  dump_stack+0x12c/0x1b4
> > [   64.650699]  print_address_description+0x68/0x2c8
> > [   64.660152]  kasan_report+0x130/0x300
> > [   64.667509]  __asan_store4+0x84/0xa8
> > [   64.674693]  ext4_es_lookup_extent+0x130/0x980
> > [   64.683623]  ext4_map_blocks+0xe0/0x990
> > [   64.691330]  _ext4_get_block+0x130/0x2b8
> > [   64.699211]  ext4_get_block+0x40/0x50
> > [   64.706571]  generic_block_bmap+0x104/0x178
> > [   64.714977]  ext4_bmap+0xc4/0x198
> > [   64.721636]  bmap+0x54/0x70
> > [   64.727250]  jbd2_journal_init_inode+0x2c/0x208
> > [   64.736355]  ext4_fill_super+0x5080/0x5c90
> > [   64.744587]  mount_bdev+0x1e0/0x228
> > [   64.751597]  ext4_mount+0x44/0x58
> > [   64.758255]  mount_fs+0x58/0x1b8
> > [   64.764740]  vfs_kern_mount.part.2+0xc0/0x2a8
> > [   64.773495]  do_mount+0x7a8/0x13e8
> > [   64.780327]  ksys_mount+0x9c/0x110
> > [   64.787160]  __arm64_sys_mount+0x70/0x88
> > [   64.795043]  el0_svc_handler+0xac/0x150
> > [   64.802749]  el0_svc+0x8/0xc
> >
> > [   64.811521] The buggy address belongs to the page:
> > [   64.821149] page:7e0088b607c0 count:0 mapcount:0 
> > mapping: index:0x0
> > [   64.837249] flags: 0x1000()
> > [   64.844959] raw: 1000 7e0088b607c8 7e0088b607c8 
> > 
> > [   64.860527] raw:    
> > 
> > [   64.876093] page dumped because: kasan: bad access detected
> >
> > [   64.890278] Memory state around the buggy address:
> > [   64.899907]  80222d81ef80: f2 f2 f2 f2 00 f2 f2 f2 f2 f2 f2 f2 00 f2 
> > f2 f2
> > [   64.914426]  80222d81f000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
> > f8 f8
> > [   64.928945] >80222d81f080: f8 f8 f8 f8 f8 f8 f1 f1 f1 f1 f8 f8 f8 f8 
> > 00 f2
> > [   64.943463]   ^
> > [   64.956759]  80222d81f100: f2 f2 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
> > f8 f8
> > [   64.971278]  80222d81f180: f8 f8 f8 f8 f1 f1 f1 f1 00 00 00 f2 f8 f8 
> > f8 f8
> > [   64.985795] 
> > ==
> > [   65.000312] Disabling lock debugging due to kernel taint
> > [   65.037509] EXT4-fs (sda2): mounted filesystem with ordered data mode. 
> > Opts: (null)
> >
> > I'm not seeing any issues like filesystem corruption or misbehaviour that 
> > could be related
> > the warning.
> >
> > Is this a false positive? Any thoughts?
> 
> 
> Hi Jan,
> 
> What kernel commit are you using? Kernel config? Compiler?
> Please symbolize the report with scripts/decode_stacktrace.sh in
> kernel tree or 
> https://github.com/google/sanitizers/blob/master/address-sanitizer/tools/kasan_symbolize.py

Hi Dmitry,

I can reproduce this since 4.16, the report above is from 4.19-rc6.
Kernel config:
https://paste.debian.net/1046031/

Compiler is the stock gcc from Ubuntu 18.04.1:
gcc version 7.3.0 (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04)

Here is the decoded stacktrace:
[   64.547333] 
==
[   64.561933] BUG: KASAN: use-after-scope in ext4_es_lookup_extent 
(fs/ext4/extents_status.c:795)
[   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075

[   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
[   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[   64.624750] Call trace:
[   64.629666] dump_backtrace (arch/arm64/kernel/traps.c:102)
[   64.637024] show_stack (arch/arm64/kernel/traps.c:154)
[ 

Re: KASAN: use-after-scope in ext4_group_desc_csum

2018-10-05 Thread Jan Glauber
On Fri, Oct 05, 2018 at 01:13:52PM +0200, Dmitry Vyukov wrote:
> On Fri, Oct 5, 2018 at 12:16 PM, Jan Glauber  wrote:
> > Hi,
> >
> > I'm getting below warning when I enable CONFIG_KASAN_EXTRA=y on a arm64 
> > ThunderX2 system.
> > As far as I can tell this is present since KASAN_EXTRA was introduced 
> > (4.16).
> >
> > [   64.547333] 
> > ==
> > [   64.561933] BUG: KASAN: use-after-scope in 
> > ext4_es_lookup_extent+0x130/0x980
> > [   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075
> >
> > [   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
> > [   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
> > O.E.M., BIOS 0ACKL018 03/30/2018
> > [   64.624750] Call trace:
> > [   64.629666]  dump_backtrace+0x0/0x360
> > [   64.637024]  show_stack+0x24/0x30
> > [   64.643687]  dump_stack+0x12c/0x1b4
> > [   64.650699]  print_address_description+0x68/0x2c8
> > [   64.660152]  kasan_report+0x130/0x300
> > [   64.667509]  __asan_store4+0x84/0xa8
> > [   64.674693]  ext4_es_lookup_extent+0x130/0x980
> > [   64.683623]  ext4_map_blocks+0xe0/0x990
> > [   64.691330]  _ext4_get_block+0x130/0x2b8
> > [   64.699211]  ext4_get_block+0x40/0x50
> > [   64.706571]  generic_block_bmap+0x104/0x178
> > [   64.714977]  ext4_bmap+0xc4/0x198
> > [   64.721636]  bmap+0x54/0x70
> > [   64.727250]  jbd2_journal_init_inode+0x2c/0x208
> > [   64.736355]  ext4_fill_super+0x5080/0x5c90
> > [   64.744587]  mount_bdev+0x1e0/0x228
> > [   64.751597]  ext4_mount+0x44/0x58
> > [   64.758255]  mount_fs+0x58/0x1b8
> > [   64.764740]  vfs_kern_mount.part.2+0xc0/0x2a8
> > [   64.773495]  do_mount+0x7a8/0x13e8
> > [   64.780327]  ksys_mount+0x9c/0x110
> > [   64.787160]  __arm64_sys_mount+0x70/0x88
> > [   64.795043]  el0_svc_handler+0xac/0x150
> > [   64.802749]  el0_svc+0x8/0xc
> >
> > [   64.811521] The buggy address belongs to the page:
> > [   64.821149] page:7e0088b607c0 count:0 mapcount:0 
> > mapping: index:0x0
> > [   64.837249] flags: 0x1000()
> > [   64.844959] raw: 1000 7e0088b607c8 7e0088b607c8 
> > 
> > [   64.860527] raw:    
> > 
> > [   64.876093] page dumped because: kasan: bad access detected
> >
> > [   64.890278] Memory state around the buggy address:
> > [   64.899907]  80222d81ef80: f2 f2 f2 f2 00 f2 f2 f2 f2 f2 f2 f2 00 f2 
> > f2 f2
> > [   64.914426]  80222d81f000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
> > f8 f8
> > [   64.928945] >80222d81f080: f8 f8 f8 f8 f8 f8 f1 f1 f1 f1 f8 f8 f8 f8 
> > 00 f2
> > [   64.943463]   ^
> > [   64.956759]  80222d81f100: f2 f2 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
> > f8 f8
> > [   64.971278]  80222d81f180: f8 f8 f8 f8 f1 f1 f1 f1 00 00 00 f2 f8 f8 
> > f8 f8
> > [   64.985795] 
> > ==
> > [   65.000312] Disabling lock debugging due to kernel taint
> > [   65.037509] EXT4-fs (sda2): mounted filesystem with ordered data mode. 
> > Opts: (null)
> >
> > I'm not seeing any issues like filesystem corruption or misbehaviour that 
> > could be related
> > the warning.
> >
> > Is this a false positive? Any thoughts?
> 
> 
> Hi Jan,
> 
> What kernel commit are you using? Kernel config? Compiler?
> Please symbolize the report with scripts/decode_stacktrace.sh in
> kernel tree or 
> https://github.com/google/sanitizers/blob/master/address-sanitizer/tools/kasan_symbolize.py

Hi Dmitry,

I can reproduce this since 4.16, the report above is from 4.19-rc6.
Kernel config:
https://paste.debian.net/1046031/

Compiler is the stock gcc from Ubuntu 18.04.1:
gcc version 7.3.0 (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04)

Here is the decoded stacktrace:
[   64.547333] 
==
[   64.561933] BUG: KASAN: use-after-scope in ext4_es_lookup_extent 
(fs/ext4/extents_status.c:795)
[   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075

[   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
[   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[   64.624750] Call trace:
[   64.629666] dump_backtrace (arch/arm64/kernel/traps.c:102)
[   64.637024] show_stack (arch/arm64/kernel/traps.c:154)
[ 

KASAN: use-after-scope in ext4_group_desc_csum

2018-10-05 Thread Jan Glauber
Hi,

I'm getting below warning when I enable CONFIG_KASAN_EXTRA=y on a arm64 
ThunderX2 system.
As far as I can tell this is present since KASAN_EXTRA was introduced (4.16).

[   64.547333] 
==
[   64.561933] BUG: KASAN: use-after-scope in ext4_es_lookup_extent+0x130/0x980
[   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075

[   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
[   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[   64.624750] Call trace:
[   64.629666]  dump_backtrace+0x0/0x360
[   64.637024]  show_stack+0x24/0x30
[   64.643687]  dump_stack+0x12c/0x1b4
[   64.650699]  print_address_description+0x68/0x2c8
[   64.660152]  kasan_report+0x130/0x300
[   64.667509]  __asan_store4+0x84/0xa8
[   64.674693]  ext4_es_lookup_extent+0x130/0x980
[   64.683623]  ext4_map_blocks+0xe0/0x990
[   64.691330]  _ext4_get_block+0x130/0x2b8
[   64.699211]  ext4_get_block+0x40/0x50
[   64.706571]  generic_block_bmap+0x104/0x178
[   64.714977]  ext4_bmap+0xc4/0x198
[   64.721636]  bmap+0x54/0x70
[   64.727250]  jbd2_journal_init_inode+0x2c/0x208
[   64.736355]  ext4_fill_super+0x5080/0x5c90
[   64.744587]  mount_bdev+0x1e0/0x228
[   64.751597]  ext4_mount+0x44/0x58
[   64.758255]  mount_fs+0x58/0x1b8
[   64.764740]  vfs_kern_mount.part.2+0xc0/0x2a8
[   64.773495]  do_mount+0x7a8/0x13e8
[   64.780327]  ksys_mount+0x9c/0x110
[   64.787160]  __arm64_sys_mount+0x70/0x88
[   64.795043]  el0_svc_handler+0xac/0x150
[   64.802749]  el0_svc+0x8/0xc

[   64.811521] The buggy address belongs to the page:
[   64.821149] page:7e0088b607c0 count:0 mapcount:0 
mapping: index:0x0
[   64.837249] flags: 0x1000()
[   64.844959] raw: 1000 7e0088b607c8 7e0088b607c8 

[   64.860527] raw:    

[   64.876093] page dumped because: kasan: bad access detected

[   64.890278] Memory state around the buggy address:
[   64.899907]  80222d81ef80: f2 f2 f2 f2 00 f2 f2 f2 f2 f2 f2 f2 00 f2 f2 
f2
[   64.914426]  80222d81f000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
f8
[   64.928945] >80222d81f080: f8 f8 f8 f8 f8 f8 f1 f1 f1 f1 f8 f8 f8 f8 00 
f2
[   64.943463]   ^
[   64.956759]  80222d81f100: f2 f2 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
f8
[   64.971278]  80222d81f180: f8 f8 f8 f8 f1 f1 f1 f1 00 00 00 f2 f8 f8 f8 
f8
[   64.985795] 
==
[   65.000312] Disabling lock debugging due to kernel taint
[   65.037509] EXT4-fs (sda2): mounted filesystem with ordered data mode. Opts: 
(null)

I'm not seeing any issues like filesystem corruption or misbehaviour that could 
be related
the warning.

Is this a false positive? Any thoughts?

--Jan


KASAN: use-after-scope in ext4_group_desc_csum

2018-10-05 Thread Jan Glauber
Hi,

I'm getting below warning when I enable CONFIG_KASAN_EXTRA=y on a arm64 
ThunderX2 system.
As far as I can tell this is present since KASAN_EXTRA was introduced (4.16).

[   64.547333] 
==
[   64.561933] BUG: KASAN: use-after-scope in ext4_es_lookup_extent+0x130/0x980
[   64.576105] Write of size 4 at addr 80222d81f0ec by task exe/4075

[   64.592044] CPU: 102 PID: 4075 Comm: exe Not tainted 4.19.0-rc6-jang+ #29
[   64.605690] Hardware name: To be filled by O.E.M. Saber/To be filled by 
O.E.M., BIOS 0ACKL018 03/30/2018
[   64.624750] Call trace:
[   64.629666]  dump_backtrace+0x0/0x360
[   64.637024]  show_stack+0x24/0x30
[   64.643687]  dump_stack+0x12c/0x1b4
[   64.650699]  print_address_description+0x68/0x2c8
[   64.660152]  kasan_report+0x130/0x300
[   64.667509]  __asan_store4+0x84/0xa8
[   64.674693]  ext4_es_lookup_extent+0x130/0x980
[   64.683623]  ext4_map_blocks+0xe0/0x990
[   64.691330]  _ext4_get_block+0x130/0x2b8
[   64.699211]  ext4_get_block+0x40/0x50
[   64.706571]  generic_block_bmap+0x104/0x178
[   64.714977]  ext4_bmap+0xc4/0x198
[   64.721636]  bmap+0x54/0x70
[   64.727250]  jbd2_journal_init_inode+0x2c/0x208
[   64.736355]  ext4_fill_super+0x5080/0x5c90
[   64.744587]  mount_bdev+0x1e0/0x228
[   64.751597]  ext4_mount+0x44/0x58
[   64.758255]  mount_fs+0x58/0x1b8
[   64.764740]  vfs_kern_mount.part.2+0xc0/0x2a8
[   64.773495]  do_mount+0x7a8/0x13e8
[   64.780327]  ksys_mount+0x9c/0x110
[   64.787160]  __arm64_sys_mount+0x70/0x88
[   64.795043]  el0_svc_handler+0xac/0x150
[   64.802749]  el0_svc+0x8/0xc

[   64.811521] The buggy address belongs to the page:
[   64.821149] page:7e0088b607c0 count:0 mapcount:0 
mapping: index:0x0
[   64.837249] flags: 0x1000()
[   64.844959] raw: 1000 7e0088b607c8 7e0088b607c8 

[   64.860527] raw:    

[   64.876093] page dumped because: kasan: bad access detected

[   64.890278] Memory state around the buggy address:
[   64.899907]  80222d81ef80: f2 f2 f2 f2 00 f2 f2 f2 f2 f2 f2 f2 00 f2 f2 
f2
[   64.914426]  80222d81f000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
f8
[   64.928945] >80222d81f080: f8 f8 f8 f8 f8 f8 f1 f1 f1 f1 f8 f8 f8 f8 00 
f2
[   64.943463]   ^
[   64.956759]  80222d81f100: f2 f2 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 
f8
[   64.971278]  80222d81f180: f8 f8 f8 f8 f1 f1 f1 f1 00 00 00 f2 f8 f8 f8 
f8
[   64.985795] 
==
[   65.000312] Disabling lock debugging due to kernel taint
[   65.037509] EXT4-fs (sda2): mounted filesystem with ordered data mode. Opts: 
(null)

I'm not seeing any issues like filesystem corruption or misbehaviour that could 
be related
the warning.

Is this a false positive? Any thoughts?

--Jan


Re: [PATCH] EDAC, thunderx: Remove VLA usage

2018-07-06 Thread Jan Glauber
On Fri, Jun 29, 2018 at 11:48:50AM -0700, Kees Cook wrote:
> In the quest to remove all stack VLA usage from the kernel[1], this
> switches to using a kmalloc-allocated buffer instead of stack space.
> This should be fine since the existing routine is allocating memory
> too.
> 
> [1] 
> https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qpxydaacu1rq...@mail.gmail.com
> 
> Cc: David Daney 
> Cc: Jan Glauber 
> Cc: Borislav Petkov 
> Cc: Mauro Carvalho Chehab 
> Cc: linux-e...@vger.kernel.org
> Signed-off-by: Kees Cook 

Acked-by: Jan Glauber 

thanks,
Jan

> ---
>  drivers/edac/thunderx_edac.c | 14 +-
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
> index 4803c6468bab..c009d94f40c5 100644
> --- a/drivers/edac/thunderx_edac.c
> +++ b/drivers/edac/thunderx_edac.c
> @@ -408,26 +408,29 @@ static ssize_t thunderx_lmc_inject_ecc_write(struct 
> file *file,
>size_t count, loff_t *ppos)
>  {
>   struct thunderx_lmc *lmc = file->private_data;
> -
>   unsigned int cline_size = cache_line_size();
> -
> - u8 tmp[cline_size];
> + u8 *tmp;
>   void __iomem *addr;
>   unsigned int offs, timeout = 10;
>  
>   atomic_set(>ecc_int, 0);
>  
>   lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
> -
>   if (!lmc->mem)
>   return -ENOMEM;
>  
> + tmp = kmalloc(cline_size, GFP_KERNEL);
> + if (!tmp) {
> + __free_pages(lmc->mem, 0);
> + return -ENOMEM;
> + }
> +
>   addr = page_address(lmc->mem);
>  
>   while (!atomic_read(>ecc_int) && timeout--) {
>   stop_machine(inject_ecc_fn, lmc, NULL);
>  
> - for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
> + for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
>   /*
>* Do a load from the previously rigged location
>* This should generate an error interrupt.
> @@ -437,6 +440,7 @@ static ssize_t thunderx_lmc_inject_ecc_write(struct file 
> *file,
>   }
>   }
>  
> + kfree(tmp);
>   __free_pages(lmc->mem, 0);
>  
>   return count;
> -- 
> 2.17.1
> 
> 
> -- 
> Kees Cook
> Pixel Security


Re: [PATCH] EDAC, thunderx: Remove VLA usage

2018-07-06 Thread Jan Glauber
On Fri, Jun 29, 2018 at 11:48:50AM -0700, Kees Cook wrote:
> In the quest to remove all stack VLA usage from the kernel[1], this
> switches to using a kmalloc-allocated buffer instead of stack space.
> This should be fine since the existing routine is allocating memory
> too.
> 
> [1] 
> https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qpxydaacu1rq...@mail.gmail.com
> 
> Cc: David Daney 
> Cc: Jan Glauber 
> Cc: Borislav Petkov 
> Cc: Mauro Carvalho Chehab 
> Cc: linux-e...@vger.kernel.org
> Signed-off-by: Kees Cook 

Acked-by: Jan Glauber 

thanks,
Jan

> ---
>  drivers/edac/thunderx_edac.c | 14 +-
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
> index 4803c6468bab..c009d94f40c5 100644
> --- a/drivers/edac/thunderx_edac.c
> +++ b/drivers/edac/thunderx_edac.c
> @@ -408,26 +408,29 @@ static ssize_t thunderx_lmc_inject_ecc_write(struct 
> file *file,
>size_t count, loff_t *ppos)
>  {
>   struct thunderx_lmc *lmc = file->private_data;
> -
>   unsigned int cline_size = cache_line_size();
> -
> - u8 tmp[cline_size];
> + u8 *tmp;
>   void __iomem *addr;
>   unsigned int offs, timeout = 10;
>  
>   atomic_set(>ecc_int, 0);
>  
>   lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
> -
>   if (!lmc->mem)
>   return -ENOMEM;
>  
> + tmp = kmalloc(cline_size, GFP_KERNEL);
> + if (!tmp) {
> + __free_pages(lmc->mem, 0);
> + return -ENOMEM;
> + }
> +
>   addr = page_address(lmc->mem);
>  
>   while (!atomic_read(>ecc_int) && timeout--) {
>   stop_machine(inject_ecc_fn, lmc, NULL);
>  
> - for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
> + for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
>   /*
>* Do a load from the previously rigged location
>* This should generate an error interrupt.
> @@ -437,6 +440,7 @@ static ssize_t thunderx_lmc_inject_ecc_write(struct file 
> *file,
>   }
>   }
>  
> + kfree(tmp);
>   __free_pages(lmc->mem, 0);
>  
>   return count;
> -- 
> 2.17.1
> 
> 
> -- 
> Kees Cook
> Pixel Security


Re: [PATCH 4/4] i2c: xlp9xx: Add MAINTAINERS entry

2018-05-22 Thread Jan Glauber
On Tue, May 22, 2018 at 01:54:14PM +0200, Wolfram Sang wrote:
> On Wed, May 16, 2018 at 12:00:19AM -0700, George Cherian wrote:
> > The i2c XLP9xx driver is maintained by Cavium.
> > Add George Cherian and Jan Glauber as the Maintainers.
> > 
> > Signed-off-by: George Cherian <george.cher...@cavium.com>
> > ---
> >  MAINTAINERS | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index df6e9bb..68da265 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -15509,6 +15509,14 @@ L: linux-kernel@vger.kernel.org
> >  S: Supported
> >  F: drivers/char/xillybus/
> >  
> > +XLP9XX I2C DRIVER
> > +M: George Cherian <george.cher...@cavium.com>
> > +M: Jan Glauber <jglau...@cavium.com>
> 
> Jan, I need an explicit ACK from you for this change.
> 

Sure:
Acked-by: Jan Glauber <jglau...@cavium.com>

Cheers,
Jan


Re: [PATCH 4/4] i2c: xlp9xx: Add MAINTAINERS entry

2018-05-22 Thread Jan Glauber
On Tue, May 22, 2018 at 01:54:14PM +0200, Wolfram Sang wrote:
> On Wed, May 16, 2018 at 12:00:19AM -0700, George Cherian wrote:
> > The i2c XLP9xx driver is maintained by Cavium.
> > Add George Cherian and Jan Glauber as the Maintainers.
> > 
> > Signed-off-by: George Cherian 
> > ---
> >  MAINTAINERS | 8 
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index df6e9bb..68da265 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -15509,6 +15509,14 @@ L: linux-kernel@vger.kernel.org
> >  S: Supported
> >  F: drivers/char/xillybus/
> >  
> > +XLP9XX I2C DRIVER
> > +M: George Cherian 
> > +M: Jan Glauber 
> 
> Jan, I need an explicit ACK from you for this change.
> 

Sure:
Acked-by: Jan Glauber 

Cheers,
Jan


Re: [PATCH 1/2] arm64: defconfig: enable THUNDER_NIC_VF

2018-04-30 Thread Jan Glauber
On Fri, Mar 02, 2018 at 03:37:36PM +0100, Jan Glauber wrote:
> Without this option the NIC on ThunderX1 is not coming up
> so enable it to get a working network interface.
> 
> Signed-off-by: Jan Glauber <jglau...@cavium.com>
> ---
>  arch/arm64/configs/defconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index 634b373785c4..3594aefa496f 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -197,6 +197,7 @@ CONFIG_AMD_XGBE=y
>  CONFIG_NET_XGENE=y
>  CONFIG_MACB=y
>  CONFIG_THUNDER_NIC_PF=y
> +CONFIG_THUNDER_NIC_VF=y
>  CONFIG_HNS_DSAF=y
>  CONFIG_HNS_ENET=y
>  CONFIG_E1000E=y
> -- 
> 2.7.4

Hi Arnd,

could you take this one?

thanks,
Jan



Re: [PATCH 1/2] arm64: defconfig: enable THUNDER_NIC_VF

2018-04-30 Thread Jan Glauber
On Fri, Mar 02, 2018 at 03:37:36PM +0100, Jan Glauber wrote:
> Without this option the NIC on ThunderX1 is not coming up
> so enable it to get a working network interface.
> 
> Signed-off-by: Jan Glauber 
> ---
>  arch/arm64/configs/defconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index 634b373785c4..3594aefa496f 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -197,6 +197,7 @@ CONFIG_AMD_XGBE=y
>  CONFIG_NET_XGENE=y
>  CONFIG_MACB=y
>  CONFIG_THUNDER_NIC_PF=y
> +CONFIG_THUNDER_NIC_VF=y
>  CONFIG_HNS_DSAF=y
>  CONFIG_HNS_ENET=y
>  CONFIG_E1000E=y
> -- 
> 2.7.4

Hi Arnd,

could you take this one?

thanks,
Jan



Re: [PATCH] crypto: testmgr: Allow different compression results

2018-04-19 Thread Jan Glauber
On Thu, Apr 19, 2018 at 11:42:11AM +0800, Herbert Xu wrote:
> On Wed, Apr 11, 2018 at 08:28:32PM +0200, Jan Glauber wrote:
> >
> > @@ -1362,7 +1373,17 @@ static int test_comp(struct crypto_comp *tfm,
> > goto out;
> > }
> >  
> > -   if (dlen != ctemplate[i].outlen) {
> > +   ilen = dlen;
> > +   dlen = COMP_BUF_SIZE;
> > +   ret = crypto_comp_decompress(tfm, output,
> > +ilen, decomp_output, );
> > +   if (ret) {
> > +   pr_err("alg: comp: compression failed: decompress: on 
> > test %d for %s failed: ret=%d\n",
> > +  i + 1, algo, -ret);
> > +   goto out;
> > +   }
> > +
> > +   if (dlen != ctemplate[i].inlen) {
> > printk(KERN_ERR "alg: comp: Compression test %d "
> >"failed for %s: output len = %d\n", i + 1, algo,
> >dlen);
> 
> Your patch is fine as it is.
> 
> But I just thought I'd mention that if anyone wants to we should
> really change this to use a different tfm, e.g., always use the
> generic algorithm to perform the decompression.  This way if there
> were multiple implementations we can at least test them against
> the generic one.
> 
> Otherwise you could end up with a buggy implementation that works
> against itself but still generates incorrect output.

Nice idea. Would a crypto_alloc_cipher("deflate", ...) pick the generic
implementation or how can we select it?

--Jan


Re: [PATCH] crypto: testmgr: Allow different compression results

2018-04-19 Thread Jan Glauber
On Thu, Apr 19, 2018 at 11:42:11AM +0800, Herbert Xu wrote:
> On Wed, Apr 11, 2018 at 08:28:32PM +0200, Jan Glauber wrote:
> >
> > @@ -1362,7 +1373,17 @@ static int test_comp(struct crypto_comp *tfm,
> > goto out;
> > }
> >  
> > -   if (dlen != ctemplate[i].outlen) {
> > +   ilen = dlen;
> > +   dlen = COMP_BUF_SIZE;
> > +   ret = crypto_comp_decompress(tfm, output,
> > +ilen, decomp_output, );
> > +   if (ret) {
> > +   pr_err("alg: comp: compression failed: decompress: on 
> > test %d for %s failed: ret=%d\n",
> > +  i + 1, algo, -ret);
> > +   goto out;
> > +   }
> > +
> > +   if (dlen != ctemplate[i].inlen) {
> > printk(KERN_ERR "alg: comp: Compression test %d "
> >"failed for %s: output len = %d\n", i + 1, algo,
> >dlen);
> 
> Your patch is fine as it is.
> 
> But I just thought I'd mention that if anyone wants to we should
> really change this to use a different tfm, e.g., always use the
> generic algorithm to perform the decompression.  This way if there
> were multiple implementations we can at least test them against
> the generic one.
> 
> Otherwise you could end up with a buggy implementation that works
> against itself but still generates incorrect output.

Nice idea. Would a crypto_alloc_cipher("deflate", ...) pick the generic
implementation or how can we select it?

--Jan


[PATCH] crypto: testmgr: Allow different compression results

2018-04-11 Thread Jan Glauber
From: Mahipal Challa <mcha...@cavium.com>

The following error is triggered by the ThunderX ZIP driver
if the testmanager is enabled:

[  199.069437] ThunderX-ZIP :03:00.0: Found ZIP device 0 177d:a01a on Node 0
[  199.073573] alg: comp: Compression test 1 failed for deflate-generic: output 
len = 37

The reason for this error is the verification of the compression
results. Verifying the compression result only works if all
algorithm parameters are identical, in this case to the software
implementation.

Different compression engines like the ThunderX ZIP coprocessor
might yield different compression results by tuning the
algorithm parameters. In our case the compressed result is
shorter than the test vector.

We should not forbid different compression results but only
check that compression -> decompression yields the same
result. This is done already in the acomp test. Do something
similar for test_comp().

Signed-off-by: Mahipal Challa <mcha...@cavium.com>
Signed-off-by: Balakrishna Bhamidipati <bbhamidip...@cavium.com>
[jglau...@cavium.com: removed unrelated printk changes, rewrote commit msg,
 fixed whitespace and unneeded initialization]
Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 crypto/testmgr.c | 50 +-
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index af4a01c..627e82e 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1342,19 +1342,30 @@ static int test_comp(struct crypto_comp *tfm,
 int ctcount, int dtcount)
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_comp_tfm(tfm));
+   char *output, *decomp_output;
unsigned int i;
-   char result[COMP_BUF_SIZE];
int ret;
 
+   output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+   if (!output)
+   return -ENOMEM;
+
+   decomp_output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+   if (!decomp_output) {
+   kfree(output);
+   return -ENOMEM;
+   }
+
for (i = 0; i < ctcount; i++) {
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
 
-   memset(result, 0, sizeof (result));
+   memset(output, 0, sizeof(COMP_BUF_SIZE));
+   memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
ilen = ctemplate[i].inlen;
ret = crypto_comp_compress(tfm, ctemplate[i].input,
-  ilen, result, );
+  ilen, output, );
if (ret) {
printk(KERN_ERR "alg: comp: compression failed "
   "on test %d for %s: ret=%d\n", i + 1, algo,
@@ -1362,7 +1373,17 @@ static int test_comp(struct crypto_comp *tfm,
goto out;
}
 
-   if (dlen != ctemplate[i].outlen) {
+   ilen = dlen;
+   dlen = COMP_BUF_SIZE;
+   ret = crypto_comp_decompress(tfm, output,
+ilen, decomp_output, );
+   if (ret) {
+   pr_err("alg: comp: compression failed: decompress: on 
test %d for %s failed: ret=%d\n",
+  i + 1, algo, -ret);
+   goto out;
+   }
+
+   if (dlen != ctemplate[i].inlen) {
printk(KERN_ERR "alg: comp: Compression test %d "
   "failed for %s: output len = %d\n", i + 1, algo,
   dlen);
@@ -1370,10 +1391,11 @@ static int test_comp(struct crypto_comp *tfm,
goto out;
}
 
-   if (memcmp(result, ctemplate[i].output, dlen)) {
-   printk(KERN_ERR "alg: comp: Compression test %d "
-  "failed for %s\n", i + 1, algo);
-   hexdump(result, dlen);
+   if (memcmp(decomp_output, ctemplate[i].input,
+  ctemplate[i].inlen)) {
+   pr_err("alg: comp: compression failed: output differs: 
on test %d for %s\n",
+  i + 1, algo);
+   hexdump(decomp_output, dlen);
ret = -EINVAL;
goto out;
}
@@ -1383,11 +1405,11 @@ static int test_comp(struct crypto_comp *tfm,
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
 
-   memset(result, 0, sizeof (result));
+   memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
ilen = dtemplate[i].inlen;
ret = crypto_comp_decompress(tfm, dtemplate[i].input,
-ilen, result, );
+ilen, 

[PATCH] crypto: testmgr: Allow different compression results

2018-04-11 Thread Jan Glauber
From: Mahipal Challa 

The following error is triggered by the ThunderX ZIP driver
if the testmanager is enabled:

[  199.069437] ThunderX-ZIP :03:00.0: Found ZIP device 0 177d:a01a on Node 0
[  199.073573] alg: comp: Compression test 1 failed for deflate-generic: output 
len = 37

The reason for this error is the verification of the compression
results. Verifying the compression result only works if all
algorithm parameters are identical, in this case to the software
implementation.

Different compression engines like the ThunderX ZIP coprocessor
might yield different compression results by tuning the
algorithm parameters. In our case the compressed result is
shorter than the test vector.

We should not forbid different compression results but only
check that compression -> decompression yields the same
result. This is done already in the acomp test. Do something
similar for test_comp().

Signed-off-by: Mahipal Challa 
Signed-off-by: Balakrishna Bhamidipati 
[jglau...@cavium.com: removed unrelated printk changes, rewrote commit msg,
 fixed whitespace and unneeded initialization]
Signed-off-by: Jan Glauber 
---
 crypto/testmgr.c | 50 +-
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index af4a01c..627e82e 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1342,19 +1342,30 @@ static int test_comp(struct crypto_comp *tfm,
 int ctcount, int dtcount)
 {
const char *algo = crypto_tfm_alg_driver_name(crypto_comp_tfm(tfm));
+   char *output, *decomp_output;
unsigned int i;
-   char result[COMP_BUF_SIZE];
int ret;
 
+   output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+   if (!output)
+   return -ENOMEM;
+
+   decomp_output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+   if (!decomp_output) {
+   kfree(output);
+   return -ENOMEM;
+   }
+
for (i = 0; i < ctcount; i++) {
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
 
-   memset(result, 0, sizeof (result));
+   memset(output, 0, sizeof(COMP_BUF_SIZE));
+   memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
ilen = ctemplate[i].inlen;
ret = crypto_comp_compress(tfm, ctemplate[i].input,
-  ilen, result, );
+  ilen, output, );
if (ret) {
printk(KERN_ERR "alg: comp: compression failed "
   "on test %d for %s: ret=%d\n", i + 1, algo,
@@ -1362,7 +1373,17 @@ static int test_comp(struct crypto_comp *tfm,
goto out;
}
 
-   if (dlen != ctemplate[i].outlen) {
+   ilen = dlen;
+   dlen = COMP_BUF_SIZE;
+   ret = crypto_comp_decompress(tfm, output,
+ilen, decomp_output, );
+   if (ret) {
+   pr_err("alg: comp: compression failed: decompress: on 
test %d for %s failed: ret=%d\n",
+  i + 1, algo, -ret);
+   goto out;
+   }
+
+   if (dlen != ctemplate[i].inlen) {
printk(KERN_ERR "alg: comp: Compression test %d "
   "failed for %s: output len = %d\n", i + 1, algo,
   dlen);
@@ -1370,10 +1391,11 @@ static int test_comp(struct crypto_comp *tfm,
goto out;
}
 
-   if (memcmp(result, ctemplate[i].output, dlen)) {
-   printk(KERN_ERR "alg: comp: Compression test %d "
-  "failed for %s\n", i + 1, algo);
-   hexdump(result, dlen);
+   if (memcmp(decomp_output, ctemplate[i].input,
+  ctemplate[i].inlen)) {
+   pr_err("alg: comp: compression failed: output differs: 
on test %d for %s\n",
+  i + 1, algo);
+   hexdump(decomp_output, dlen);
ret = -EINVAL;
goto out;
}
@@ -1383,11 +1405,11 @@ static int test_comp(struct crypto_comp *tfm,
int ilen;
unsigned int dlen = COMP_BUF_SIZE;
 
-   memset(result, 0, sizeof (result));
+   memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
ilen = dtemplate[i].inlen;
ret = crypto_comp_decompress(tfm, dtemplate[i].input,
-ilen, result, );
+ilen, decomp_output, );
if (ret) {
printk(KERN_ERR "alg: comp: decom

[PATCH v2 1/5] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-04-09 Thread Jan Glauber
Enabling virtual mapped kernel stacks breaks the thunderx_zip
driver. On compression or decompression the executing CPU hangs
in an endless loop. The reason for this is the usage of __pa
by the driver which does no longer work for an address that is
not part of the 1:1 mapping.

The zip driver allocates a result struct on the stack and needs
to tell the hardware the physical address within this struct
that is used to signal the completion of the request.

As the hardware gets the wrong address after the broken __pa
conversion it writes to an arbitrary address. The zip driver then
waits forever for the completion byte to contain a non-zero value.

Allocating the result struct from 1:1 mapped memory resolves this
bug.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
Cc: stable <sta...@vger.kernel.org> # 4.14
---
 drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26cf9d4..b92b6e7e100f 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
 struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_comp;
 
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
 
-   ret = zip_deflate(zip_ops, _state, zip);
+   ret = zip_deflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
@@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
   struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
 
@@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
 
-   ret = zip_inflate(zip_ops, _state, zip);
+   ret = zip_inflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
-- 
2.16.2



[PATCH v2 1/5] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-04-09 Thread Jan Glauber
Enabling virtual mapped kernel stacks breaks the thunderx_zip
driver. On compression or decompression the executing CPU hangs
in an endless loop. The reason for this is the usage of __pa
by the driver which does no longer work for an address that is
not part of the 1:1 mapping.

The zip driver allocates a result struct on the stack and needs
to tell the hardware the physical address within this struct
that is used to signal the completion of the request.

As the hardware gets the wrong address after the broken __pa
conversion it writes to an arbitrary address. The zip driver then
waits forever for the completion byte to contain a non-zero value.

Allocating the result struct from 1:1 mapped memory resolves this
bug.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
Cc: stable  # 4.14
---
 drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26cf9d4..b92b6e7e100f 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
 struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_comp;
 
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
 
-   ret = zip_deflate(zip_ops, _state, zip);
+   ret = zip_deflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
@@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
   struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
 
@@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
 
-   ret = zip_inflate(zip_ops, _state, zip);
+   ret = zip_inflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
-- 
2.16.2



[PATCH v2 5/5] crypto: thunderx_zip: Fix smp_processor_id() warnings

2018-04-09 Thread Jan Glauber
Switch to raw_smp_processor_id() to prevent a number of
warnings from kernel debugging. We do not care about
preemption here, as the CPU number is only used as a
poor mans load balancing or device selection. If preemption
happens during a compress/decompress operation a small performance
hit will occur but everything will continue to work, so just
ignore it.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
---
 drivers/crypto/cavium/zip/zip_device.c | 4 ++--
 drivers/crypto/cavium/zip/zip_main.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_device.c 
b/drivers/crypto/cavium/zip/zip_device.c
index ccf21fb91513..f174ec29ed69 100644
--- a/drivers/crypto/cavium/zip/zip_device.c
+++ b/drivers/crypto/cavium/zip/zip_device.c
@@ -87,12 +87,12 @@ u32 zip_load_instr(union zip_inst_s *instr,
 * Distribute the instructions between the enabled queues based on
 * the CPU id.
 */
-   if (smp_processor_id() % 2 == 0)
+   if (raw_smp_processor_id() % 2 == 0)
queue = 0;
else
queue = 1;
 
-   zip_dbg("CPU Core: %d Queue number:%d", smp_processor_id(), queue);
+   zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
 
/* Take cmd buffer lock */
spin_lock(_dev->iq[queue].lock);
diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index ae5b20c695ca..be055b9547f6 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -113,7 +113,7 @@ struct zip_device *zip_get_device(int node)
  */
 int zip_get_node_id(void)
 {
-   return cpu_to_node(smp_processor_id());
+   return cpu_to_node(raw_smp_processor_id());
 }
 
 /* Initializes the ZIP h/w sub-system */
-- 
2.16.2



[PATCH v2 5/5] crypto: thunderx_zip: Fix smp_processor_id() warnings

2018-04-09 Thread Jan Glauber
Switch to raw_smp_processor_id() to prevent a number of
warnings from kernel debugging. We do not care about
preemption here, as the CPU number is only used as a
poor mans load balancing or device selection. If preemption
happens during a compress/decompress operation a small performance
hit will occur but everything will continue to work, so just
ignore it.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
---
 drivers/crypto/cavium/zip/zip_device.c | 4 ++--
 drivers/crypto/cavium/zip/zip_main.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_device.c 
b/drivers/crypto/cavium/zip/zip_device.c
index ccf21fb91513..f174ec29ed69 100644
--- a/drivers/crypto/cavium/zip/zip_device.c
+++ b/drivers/crypto/cavium/zip/zip_device.c
@@ -87,12 +87,12 @@ u32 zip_load_instr(union zip_inst_s *instr,
 * Distribute the instructions between the enabled queues based on
 * the CPU id.
 */
-   if (smp_processor_id() % 2 == 0)
+   if (raw_smp_processor_id() % 2 == 0)
queue = 0;
else
queue = 1;
 
-   zip_dbg("CPU Core: %d Queue number:%d", smp_processor_id(), queue);
+   zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
 
/* Take cmd buffer lock */
spin_lock(_dev->iq[queue].lock);
diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index ae5b20c695ca..be055b9547f6 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -113,7 +113,7 @@ struct zip_device *zip_get_device(int node)
  */
 int zip_get_node_id(void)
 {
-   return cpu_to_node(smp_processor_id());
+   return cpu_to_node(raw_smp_processor_id());
 }
 
 /* Initializes the ZIP h/w sub-system */
-- 
2.16.2



[PATCH v2 4/5] crypto: thunderx_zip: Fix statistics pending request value

2018-04-09 Thread Jan Glauber
The pending request counter was read from the wrong register. While
at it, there is no need to use an atomic for it as it is only read
localy in a loop.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
---
 drivers/crypto/cavium/zip/zip_main.c | 13 +
 drivers/crypto/cavium/zip/zip_main.h |  1 -
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index 79b449e0f955..ae5b20c695ca 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -469,6 +469,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
struct zip_stats  *st;
 
for (index = 0; index < MAX_ZIP_DEVICES; index++) {
+   u64 pending = 0;
+
if (zip_dev[index]) {
zip = zip_dev[index];
st  = >stats;
@@ -476,10 +478,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
/* Get all the pending requests */
for (q = 0; q < ZIP_NUM_QUEUES; q++) {
val = zip_reg_read((zip->reg_base +
-   ZIP_DBG_COREX_STA(q)));
-   val = (val >> 32);
-   val = val & 0xff;
-   atomic64_add(val, >pending_req);
+   ZIP_DBG_QUEX_STA(q)));
+   pending += val >> 32 & 0xff;
}
 
val = atomic64_read(>comp_req_complete);
@@ -514,10 +514,7 @@ static int zip_show_stats(struct seq_file *s, void *unused)
   (u64)atomic64_read(>decomp_in_bytes),
   
(u64)atomic64_read(>decomp_out_bytes),
   (u64)atomic64_read(>decomp_bad_reqs),
-  (u64)atomic64_read(>pending_req));
-
-   /* Reset pending requests  count */
-   atomic64_set(>pending_req, 0);
+  pending);
}
}
return 0;
diff --git a/drivers/crypto/cavium/zip/zip_main.h 
b/drivers/crypto/cavium/zip/zip_main.h
index 64e051f60784..e1e4fa92ce80 100644
--- a/drivers/crypto/cavium/zip/zip_main.h
+++ b/drivers/crypto/cavium/zip/zip_main.h
@@ -74,7 +74,6 @@ struct zip_stats {
atomic64_tcomp_req_complete;
atomic64_tdecomp_req_submit;
atomic64_tdecomp_req_complete;
-   atomic64_tpending_req;
atomic64_tcomp_in_bytes;
atomic64_tcomp_out_bytes;
atomic64_tdecomp_in_bytes;
-- 
2.16.2



[PATCH v2 3/5] crypto: thunderx_zip: Prevent division by zero

2018-04-09 Thread Jan Glauber
Avoid two potential divisions by zero when calculating average
values for the zip statistics.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
---
 drivers/crypto/cavium/zip/zip_main.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index 1cd8aa488185..79b449e0f955 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -482,10 +482,11 @@ static int zip_show_stats(struct seq_file *s, void 
*unused)
atomic64_add(val, >pending_req);
}
 
-   avg_chunk = (atomic64_read(>comp_in_bytes) /
-atomic64_read(>comp_req_complete));
-   avg_cr = (atomic64_read(>comp_in_bytes) /
- atomic64_read(>comp_out_bytes));
+   val = atomic64_read(>comp_req_complete);
+   avg_chunk = (val) ? atomic64_read(>comp_in_bytes) / 
val : 0;
+
+   val = atomic64_read(>comp_out_bytes);
+   avg_cr = (val) ? atomic64_read(>comp_in_bytes) / 
val : 0;
seq_printf(s, "ZIP Device %d Stats\n"
  "---\n"
  "Comp Req Submitted: \t%lld\n"
-- 
2.16.2



[PATCH v2 4/5] crypto: thunderx_zip: Fix statistics pending request value

2018-04-09 Thread Jan Glauber
The pending request counter was read from the wrong register. While
at it, there is no need to use an atomic for it as it is only read
localy in a loop.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
---
 drivers/crypto/cavium/zip/zip_main.c | 13 +
 drivers/crypto/cavium/zip/zip_main.h |  1 -
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index 79b449e0f955..ae5b20c695ca 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -469,6 +469,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
struct zip_stats  *st;
 
for (index = 0; index < MAX_ZIP_DEVICES; index++) {
+   u64 pending = 0;
+
if (zip_dev[index]) {
zip = zip_dev[index];
st  = >stats;
@@ -476,10 +478,8 @@ static int zip_show_stats(struct seq_file *s, void *unused)
/* Get all the pending requests */
for (q = 0; q < ZIP_NUM_QUEUES; q++) {
val = zip_reg_read((zip->reg_base +
-   ZIP_DBG_COREX_STA(q)));
-   val = (val >> 32);
-   val = val & 0xff;
-   atomic64_add(val, >pending_req);
+   ZIP_DBG_QUEX_STA(q)));
+   pending += val >> 32 & 0xff;
}
 
val = atomic64_read(>comp_req_complete);
@@ -514,10 +514,7 @@ static int zip_show_stats(struct seq_file *s, void *unused)
   (u64)atomic64_read(>decomp_in_bytes),
   
(u64)atomic64_read(>decomp_out_bytes),
   (u64)atomic64_read(>decomp_bad_reqs),
-  (u64)atomic64_read(>pending_req));
-
-   /* Reset pending requests  count */
-   atomic64_set(>pending_req, 0);
+  pending);
}
}
return 0;
diff --git a/drivers/crypto/cavium/zip/zip_main.h 
b/drivers/crypto/cavium/zip/zip_main.h
index 64e051f60784..e1e4fa92ce80 100644
--- a/drivers/crypto/cavium/zip/zip_main.h
+++ b/drivers/crypto/cavium/zip/zip_main.h
@@ -74,7 +74,6 @@ struct zip_stats {
atomic64_tcomp_req_complete;
atomic64_tdecomp_req_submit;
atomic64_tdecomp_req_complete;
-   atomic64_tpending_req;
atomic64_tcomp_in_bytes;
atomic64_tcomp_out_bytes;
atomic64_tdecomp_in_bytes;
-- 
2.16.2



[PATCH v2 3/5] crypto: thunderx_zip: Prevent division by zero

2018-04-09 Thread Jan Glauber
Avoid two potential divisions by zero when calculating average
values for the zip statistics.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
---
 drivers/crypto/cavium/zip/zip_main.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_main.c 
b/drivers/crypto/cavium/zip/zip_main.c
index 1cd8aa488185..79b449e0f955 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -482,10 +482,11 @@ static int zip_show_stats(struct seq_file *s, void 
*unused)
atomic64_add(val, >pending_req);
}
 
-   avg_chunk = (atomic64_read(>comp_in_bytes) /
-atomic64_read(>comp_req_complete));
-   avg_cr = (atomic64_read(>comp_in_bytes) /
- atomic64_read(>comp_out_bytes));
+   val = atomic64_read(>comp_req_complete);
+   avg_chunk = (val) ? atomic64_read(>comp_in_bytes) / 
val : 0;
+
+   val = atomic64_read(>comp_out_bytes);
+   avg_cr = (val) ? atomic64_read(>comp_in_bytes) / 
val : 0;
seq_printf(s, "ZIP Device %d Stats\n"
  "---\n"
  "Comp Req Submitted: \t%lld\n"
-- 
2.16.2



[PATCH v2 2/5] crypto: thunderx_zip: Limit result reading attempts

2018-04-09 Thread Jan Glauber
After issuing a request an endless loop was used to read the
completion state from memory which is asynchronously updated
by the ZIP coprocessor.

Add an upper bound to the retry attempts to prevent a CPU getting stuck
forever in case of an error. Additionally, add a read memory barrier
and a small delay between the reading attempts.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
Cc: stable <sta...@vger.kernel.org> # 4.14
---
 drivers/crypto/cavium/zip/common.h  | 21 +
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/common.h 
b/drivers/crypto/cavium/zip/common.h
index dc451e0a43c5..58fb3ed6e644 100644
--- a/drivers/crypto/cavium/zip/common.h
+++ b/drivers/crypto/cavium/zip/common.h
@@ -46,8 +46,10 @@
 #ifndef __COMMON_H__
 #define __COMMON_H__
 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -149,6 +151,25 @@ struct zip_operation {
u32   sizeofzops;
 };
 
+static inline int zip_poll_result(union zip_zres_s *result)
+{
+   int retries = 1000;
+
+   while (!result->s.compcode) {
+   if (!--retries) {
+   pr_err("ZIP ERR: request timed out");
+   return -ETIMEDOUT;
+   }
+   udelay(10);
+   /*
+* Force re-reading of compcode which is updated
+* by the ZIP coprocessor.
+*/
+   rmb();
+   }
+   return 0;
+}
+
 /* error messages */
 #define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
  fmt "\n", __func__, __LINE__, ## args)
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c 
b/drivers/crypto/cavium/zip/zip_deflate.c
index 9a944b8c1e29..d7133f857d67 100644
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ b/drivers/crypto/cavium/zip/zip_deflate.c
@@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Stats update for compression requests submitted */
atomic64_inc(_dev->stats.comp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Stats update for compression requests completed */
atomic64_inc(_dev->stats.comp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c 
b/drivers/crypto/cavium/zip/zip_inflate.c
index 50cbdd83dbf2..7e0d73e2f89e 100644
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ b/drivers/crypto/cavium/zip/zip_inflate.c
@@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Decompression requests submitted stats update */
atomic64_inc(_dev->stats.decomp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Decompression requests completed stats update */
atomic64_inc(_dev->stats.decomp_req_complete);
-- 
2.16.2



[PATCH v2 2/5] crypto: thunderx_zip: Limit result reading attempts

2018-04-09 Thread Jan Glauber
After issuing a request an endless loop was used to read the
completion state from memory which is asynchronously updated
by the ZIP coprocessor.

Add an upper bound to the retry attempts to prevent a CPU getting stuck
forever in case of an error. Additionally, add a read memory barrier
and a small delay between the reading attempts.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
Cc: stable  # 4.14
---
 drivers/crypto/cavium/zip/common.h  | 21 +
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/common.h 
b/drivers/crypto/cavium/zip/common.h
index dc451e0a43c5..58fb3ed6e644 100644
--- a/drivers/crypto/cavium/zip/common.h
+++ b/drivers/crypto/cavium/zip/common.h
@@ -46,8 +46,10 @@
 #ifndef __COMMON_H__
 #define __COMMON_H__
 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -149,6 +151,25 @@ struct zip_operation {
u32   sizeofzops;
 };
 
+static inline int zip_poll_result(union zip_zres_s *result)
+{
+   int retries = 1000;
+
+   while (!result->s.compcode) {
+   if (!--retries) {
+   pr_err("ZIP ERR: request timed out");
+   return -ETIMEDOUT;
+   }
+   udelay(10);
+   /*
+* Force re-reading of compcode which is updated
+* by the ZIP coprocessor.
+*/
+   rmb();
+   }
+   return 0;
+}
+
 /* error messages */
 #define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
  fmt "\n", __func__, __LINE__, ## args)
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c 
b/drivers/crypto/cavium/zip/zip_deflate.c
index 9a944b8c1e29..d7133f857d67 100644
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ b/drivers/crypto/cavium/zip/zip_deflate.c
@@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Stats update for compression requests submitted */
atomic64_inc(_dev->stats.comp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Stats update for compression requests completed */
atomic64_inc(_dev->stats.comp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c 
b/drivers/crypto/cavium/zip/zip_inflate.c
index 50cbdd83dbf2..7e0d73e2f89e 100644
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ b/drivers/crypto/cavium/zip/zip_inflate.c
@@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Decompression requests submitted stats update */
atomic64_inc(_dev->stats.decomp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Decompression requests completed stats update */
atomic64_inc(_dev->stats.decomp_req_complete);
-- 
2.16.2



[PATCH v2 0/5] ThunderX ZIP driver bug fixes

2018-04-09 Thread Jan Glauber
Some bug fixes for this driver after it stopped working with virtual mapped
stacks. I think the first two patches qualify for stable.

Jan Glauber (5):
  crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK
  crypto: thunderx_zip: Limit result reading attempts
  crypto: thunderx_zip: Prevent division by zero
  crypto: thunderx_zip: Fix statistics pending request value
  crypto: thunderx_zip: Fix smp_processor_id() warnings

 drivers/crypto/cavium/zip/common.h  | 21 +
 drivers/crypto/cavium/zip/zip_crypto.c  | 22 ++
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_device.c  |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_main.c| 24 +++-
 drivers/crypto/cavium/zip/zip_main.h|  1 -
 7 files changed, 52 insertions(+), 28 deletions(-)

-- 
2.16.2



[PATCH v2 0/5] ThunderX ZIP driver bug fixes

2018-04-09 Thread Jan Glauber
Some bug fixes for this driver after it stopped working with virtual mapped
stacks. I think the first two patches qualify for stable.

Jan Glauber (5):
  crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK
  crypto: thunderx_zip: Limit result reading attempts
  crypto: thunderx_zip: Prevent division by zero
  crypto: thunderx_zip: Fix statistics pending request value
  crypto: thunderx_zip: Fix smp_processor_id() warnings

 drivers/crypto/cavium/zip/common.h  | 21 +
 drivers/crypto/cavium/zip/zip_crypto.c  | 22 ++
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_device.c  |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_main.c| 24 +++-
 drivers/crypto/cavium/zip/zip_main.h|  1 -
 7 files changed, 52 insertions(+), 28 deletions(-)

-- 
2.16.2



Re: [PATCH 1/2] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-04-05 Thread Jan Glauber
On Wed, Mar 28, 2018 at 03:05:56PM +0200, Jan Glauber wrote:
> Enabling virtual mapped kernel stacks breaks the thunderx_zip
> driver. On compression or decompression the executing CPU hangs
> in an endless loop. The reason for this is the usage of __pa
> by the driver which does no longer work for an address that is
> not part of the 1:1 mapping.
> 
> The zip driver allocates a result struct on the stack and needs
> to tell the hardware the physical address within this struct
> that is used to signal the completion of the request.
> 
> As the hardware gets the wrong address after the broken __pa
> conversion it writes to an arbitrary address. The zip driver then
> waits forever for the completion byte to contain a non-zero value.
> 
> Allocating the result struct from 1:1 mapped memory resolves this
> bug.

Hi Herbert,

Just realized that we might sleep in this path, so GFP_KERNEL wont
work here. Same with usleep in the second patch.

I'll respin the patches.

--Jan


> Signed-off-by: Jan Glauber <jglau...@cavium.com>
> Reviewed-by: Robert Richter <rrich...@cavium.com>
> Cc: stable <sta...@vger.kernel.org> # 4.14
> ---
>  drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
>  1 file changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
> b/drivers/crypto/cavium/zip/zip_crypto.c
> index 8df4d26..2fc9b03 100644
> --- a/drivers/crypto/cavium/zip/zip_crypto.c
> +++ b/drivers/crypto/cavium/zip/zip_crypto.c
> @@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
>  struct zip_kernel_ctx *zip_ctx)
>  {
> struct zip_operation  *zip_ops   = NULL;
> -   struct zip_state  zip_state;
> +   struct zip_state  *zip_state;
> struct zip_device *zip = NULL;
> int ret;
> 
> @@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
> if (!zip)
> return -ENODEV;
> 
> -   memset(_state, 0, sizeof(struct zip_state));
> +   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
> +   if (!zip_state)
> +   return -ENOMEM;
> +
> zip_ops = _ctx->zip_comp;
> 
> zip_ops->input_len  = slen;
> zip_ops->output_len = *dlen;
> memcpy(zip_ops->input, src, slen);
> 
> -   ret = zip_deflate(zip_ops, _state, zip);
> +   ret = zip_deflate(zip_ops, zip_state, zip);
> 
> if (!ret) {
> *dlen = zip_ops->output_len;
> memcpy(dst, zip_ops->output, *dlen);
> }
> -
> +   kfree(zip_state);
> return ret;
>  }
> 
> @@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
>struct zip_kernel_ctx *zip_ctx)
>  {
> struct zip_operation  *zip_ops   = NULL;
> -   struct zip_state  zip_state;
> +   struct zip_state  *zip_state;
> struct zip_device *zip = NULL;
> int ret;
> 
> @@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
> if (!zip)
> return -ENODEV;
> 
> -   memset(_state, 0, sizeof(struct zip_state));
> +   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
> +   if (!zip_state)
> +   return -ENOMEM;
> +
> zip_ops = _ctx->zip_decomp;
> memcpy(zip_ops->input, src, slen);
> 
> @@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
> zip_ops->input_len  = slen;
> zip_ops->output_len = *dlen;
> 
> -   ret = zip_inflate(zip_ops, _state, zip);
> +   ret = zip_inflate(zip_ops, zip_state, zip);
> 
> if (!ret) {
> *dlen = zip_ops->output_len;
> memcpy(dst, zip_ops->output, *dlen);
> }
> -
> +   kfree(zip_state);
> return ret;
>  }
> 
> --
> 2.7.4


Re: [PATCH 1/2] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-04-05 Thread Jan Glauber
On Wed, Mar 28, 2018 at 03:05:56PM +0200, Jan Glauber wrote:
> Enabling virtual mapped kernel stacks breaks the thunderx_zip
> driver. On compression or decompression the executing CPU hangs
> in an endless loop. The reason for this is the usage of __pa
> by the driver which does no longer work for an address that is
> not part of the 1:1 mapping.
> 
> The zip driver allocates a result struct on the stack and needs
> to tell the hardware the physical address within this struct
> that is used to signal the completion of the request.
> 
> As the hardware gets the wrong address after the broken __pa
> conversion it writes to an arbitrary address. The zip driver then
> waits forever for the completion byte to contain a non-zero value.
> 
> Allocating the result struct from 1:1 mapped memory resolves this
> bug.

Hi Herbert,

Just realized that we might sleep in this path, so GFP_KERNEL wont
work here. Same with usleep in the second patch.

I'll respin the patches.

--Jan


> Signed-off-by: Jan Glauber 
> Reviewed-by: Robert Richter 
> Cc: stable  # 4.14
> ---
>  drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
>  1 file changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
> b/drivers/crypto/cavium/zip/zip_crypto.c
> index 8df4d26..2fc9b03 100644
> --- a/drivers/crypto/cavium/zip/zip_crypto.c
> +++ b/drivers/crypto/cavium/zip/zip_crypto.c
> @@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
>  struct zip_kernel_ctx *zip_ctx)
>  {
> struct zip_operation  *zip_ops   = NULL;
> -   struct zip_state  zip_state;
> +   struct zip_state  *zip_state;
> struct zip_device *zip = NULL;
> int ret;
> 
> @@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
> if (!zip)
> return -ENODEV;
> 
> -   memset(_state, 0, sizeof(struct zip_state));
> +   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
> +   if (!zip_state)
> +   return -ENOMEM;
> +
> zip_ops = _ctx->zip_comp;
> 
> zip_ops->input_len  = slen;
> zip_ops->output_len = *dlen;
> memcpy(zip_ops->input, src, slen);
> 
> -   ret = zip_deflate(zip_ops, _state, zip);
> +   ret = zip_deflate(zip_ops, zip_state, zip);
> 
> if (!ret) {
> *dlen = zip_ops->output_len;
> memcpy(dst, zip_ops->output, *dlen);
> }
> -
> +   kfree(zip_state);
> return ret;
>  }
> 
> @@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
>struct zip_kernel_ctx *zip_ctx)
>  {
> struct zip_operation  *zip_ops   = NULL;
> -   struct zip_state  zip_state;
> +   struct zip_state  *zip_state;
> struct zip_device *zip = NULL;
> int ret;
> 
> @@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
> if (!zip)
> return -ENODEV;
> 
> -   memset(_state, 0, sizeof(struct zip_state));
> +   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
> +   if (!zip_state)
> +   return -ENOMEM;
> +
> zip_ops = _ctx->zip_decomp;
> memcpy(zip_ops->input, src, slen);
> 
> @@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
> zip_ops->input_len  = slen;
> zip_ops->output_len = *dlen;
> 
> -   ret = zip_inflate(zip_ops, _state, zip);
> +   ret = zip_inflate(zip_ops, zip_state, zip);
> 
> if (!ret) {
> *dlen = zip_ops->output_len;
> memcpy(dst, zip_ops->output, *dlen);
> }
> -
> +   kfree(zip_state);
> return ret;
>  }
> 
> --
> 2.7.4


[PATCH 1/2] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-03-28 Thread Jan Glauber
Enabling virtual mapped kernel stacks breaks the thunderx_zip
driver. On compression or decompression the executing CPU hangs
in an endless loop. The reason for this is the usage of __pa
by the driver which does no longer work for an address that is
not part of the 1:1 mapping.

The zip driver allocates a result struct on the stack and needs
to tell the hardware the physical address within this struct
that is used to signal the completion of the request.

As the hardware gets the wrong address after the broken __pa
conversion it writes to an arbitrary address. The zip driver then
waits forever for the completion byte to contain a non-zero value.

Allocating the result struct from 1:1 mapped memory resolves this
bug.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
Cc: stable <sta...@vger.kernel.org> # 4.14
---
 drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26..2fc9b03 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
 struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_comp;
 
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
 
-   ret = zip_deflate(zip_ops, _state, zip);
+   ret = zip_deflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
@@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
   struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
 
@@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
 
-   ret = zip_inflate(zip_ops, _state, zip);
+   ret = zip_inflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
-- 
2.7.4



[PATCH 1/2] crypto: thunderx_zip: Fix fallout from CONFIG_VMAP_STACK

2018-03-28 Thread Jan Glauber
Enabling virtual mapped kernel stacks breaks the thunderx_zip
driver. On compression or decompression the executing CPU hangs
in an endless loop. The reason for this is the usage of __pa
by the driver which does no longer work for an address that is
not part of the 1:1 mapping.

The zip driver allocates a result struct on the stack and needs
to tell the hardware the physical address within this struct
that is used to signal the completion of the request.

As the hardware gets the wrong address after the broken __pa
conversion it writes to an arbitrary address. The zip driver then
waits forever for the completion byte to contain a non-zero value.

Allocating the result struct from 1:1 mapped memory resolves this
bug.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
Cc: stable  # 4.14
---
 drivers/crypto/cavium/zip/zip_crypto.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/cavium/zip/zip_crypto.c 
b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26..2fc9b03 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@ int zip_compress(const u8 *src, unsigned int slen,
 struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -135,20 +135,23 @@ int zip_compress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_comp;
 
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
memcpy(zip_ops->input, src, slen);
 
-   ret = zip_deflate(zip_ops, _state, zip);
+   ret = zip_deflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
@@ -157,7 +160,7 @@ int zip_decompress(const u8 *src, unsigned int slen,
   struct zip_kernel_ctx *zip_ctx)
 {
struct zip_operation  *zip_ops   = NULL;
-   struct zip_state  zip_state;
+   struct zip_state  *zip_state;
struct zip_device *zip = NULL;
int ret;
 
@@ -168,7 +171,10 @@ int zip_decompress(const u8 *src, unsigned int slen,
if (!zip)
return -ENODEV;
 
-   memset(_state, 0, sizeof(struct zip_state));
+   zip_state = kzalloc(sizeof(*zip_state), GFP_KERNEL);
+   if (!zip_state)
+   return -ENOMEM;
+
zip_ops = _ctx->zip_decomp;
memcpy(zip_ops->input, src, slen);
 
@@ -179,13 +185,13 @@ int zip_decompress(const u8 *src, unsigned int slen,
zip_ops->input_len  = slen;
zip_ops->output_len = *dlen;
 
-   ret = zip_inflate(zip_ops, _state, zip);
+   ret = zip_inflate(zip_ops, zip_state, zip);
 
if (!ret) {
*dlen = zip_ops->output_len;
memcpy(dst, zip_ops->output, *dlen);
}
-
+   kfree(zip_state);
return ret;
 }
 
-- 
2.7.4



[PATCH 2/2] crypto: thunderx_zip: Limit result reading attempts

2018-03-28 Thread Jan Glauber
After issuing a request an endless loop was used to read the
completion state from memory which is asynchronously updated
by the ZIP coprocessor.

Add an upper bound to the retry attempts to prevent a CPU getting stuck
forever in case of an error. Additionally, add a read memory barrier
and a small delay between the reading attempts.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
Reviewed-by: Robert Richter <rrich...@cavium.com>
Cc: stable <sta...@vger.kernel.org> # 4.14
---
 drivers/crypto/cavium/zip/common.h  | 22 ++
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/common.h 
b/drivers/crypto/cavium/zip/common.h
index dc451e0..9067451 100644
--- a/drivers/crypto/cavium/zip/common.h
+++ b/drivers/crypto/cavium/zip/common.h
@@ -46,8 +46,10 @@
 #ifndef __COMMON_H__
 #define __COMMON_H__
 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -149,6 +151,26 @@ struct zip_operation {
u32   sizeofzops;
 };
 
+#define ZIP_POLL_DELAY 20 /* microseconds */
+#define ZIP_POLL_TIMEOUT (msecs_to_jiffies(1000))
+
+static inline int zip_poll_result(union zip_zres_s *result)
+{
+   u64 end = get_jiffies_64() + ZIP_POLL_TIMEOUT;
+
+   while (!result->s.compcode) {
+   /*
+* Force re-reading of compcode which is updated
+* by the ZIP coprocessor.
+*/
+   rmb();
+   if (time_after64(get_jiffies_64(), end))
+   return -ETIMEDOUT;
+   usleep_range(ZIP_POLL_DELAY / 2, ZIP_POLL_DELAY);
+   }
+   return 0;
+}
+
 /* error messages */
 #define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
  fmt "\n", __func__, __LINE__, ## args)
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c 
b/drivers/crypto/cavium/zip/zip_deflate.c
index 9a944b8..d7133f8 100644
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ b/drivers/crypto/cavium/zip/zip_deflate.c
@@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Stats update for compression requests submitted */
atomic64_inc(_dev->stats.comp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Stats update for compression requests completed */
atomic64_inc(_dev->stats.comp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c 
b/drivers/crypto/cavium/zip/zip_inflate.c
index 50cbdd8..7e0d73e 100644
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ b/drivers/crypto/cavium/zip/zip_inflate.c
@@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Decompression requests submitted stats update */
atomic64_inc(_dev->stats.decomp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Decompression requests completed stats update */
atomic64_inc(_dev->stats.decomp_req_complete);
-- 
2.7.4



[PATCH 2/2] crypto: thunderx_zip: Limit result reading attempts

2018-03-28 Thread Jan Glauber
After issuing a request an endless loop was used to read the
completion state from memory which is asynchronously updated
by the ZIP coprocessor.

Add an upper bound to the retry attempts to prevent a CPU getting stuck
forever in case of an error. Additionally, add a read memory barrier
and a small delay between the reading attempts.

Signed-off-by: Jan Glauber 
Reviewed-by: Robert Richter 
Cc: stable  # 4.14
---
 drivers/crypto/cavium/zip/common.h  | 22 ++
 drivers/crypto/cavium/zip/zip_deflate.c |  4 ++--
 drivers/crypto/cavium/zip/zip_inflate.c |  4 ++--
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/cavium/zip/common.h 
b/drivers/crypto/cavium/zip/common.h
index dc451e0..9067451 100644
--- a/drivers/crypto/cavium/zip/common.h
+++ b/drivers/crypto/cavium/zip/common.h
@@ -46,8 +46,10 @@
 #ifndef __COMMON_H__
 #define __COMMON_H__
 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -149,6 +151,26 @@ struct zip_operation {
u32   sizeofzops;
 };
 
+#define ZIP_POLL_DELAY 20 /* microseconds */
+#define ZIP_POLL_TIMEOUT (msecs_to_jiffies(1000))
+
+static inline int zip_poll_result(union zip_zres_s *result)
+{
+   u64 end = get_jiffies_64() + ZIP_POLL_TIMEOUT;
+
+   while (!result->s.compcode) {
+   /*
+* Force re-reading of compcode which is updated
+* by the ZIP coprocessor.
+*/
+   rmb();
+   if (time_after64(get_jiffies_64(), end))
+   return -ETIMEDOUT;
+   usleep_range(ZIP_POLL_DELAY / 2, ZIP_POLL_DELAY);
+   }
+   return 0;
+}
+
 /* error messages */
 #define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
  fmt "\n", __func__, __LINE__, ## args)
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c 
b/drivers/crypto/cavium/zip/zip_deflate.c
index 9a944b8..d7133f8 100644
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ b/drivers/crypto/cavium/zip/zip_deflate.c
@@ -129,8 +129,8 @@ int zip_deflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Stats update for compression requests submitted */
atomic64_inc(_dev->stats.comp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Stats update for compression requests completed */
atomic64_inc(_dev->stats.comp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c 
b/drivers/crypto/cavium/zip/zip_inflate.c
index 50cbdd8..7e0d73e 100644
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ b/drivers/crypto/cavium/zip/zip_inflate.c
@@ -143,8 +143,8 @@ int zip_inflate(struct zip_operation *zip_ops, struct 
zip_state *s,
/* Decompression requests submitted stats update */
atomic64_inc(_dev->stats.decomp_req_submit);
 
-   while (!result_ptr->s.compcode)
-   continue;
+   /* Wait for completion or error */
+   zip_poll_result(result_ptr);
 
/* Decompression requests completed stats update */
atomic64_inc(_dev->stats.decomp_req_complete);
-- 
2.7.4



Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-26 Thread Jan Glauber
On Mon, Mar 26, 2018 at 11:28:28AM +0200, Arnd Bergmann wrote:
> On Mon, Mar 26, 2018 at 10:52 AM, Jan Glauber
> <jan.glau...@caviumnetworks.com> wrote:
> > On Tue, Mar 06, 2018 at 03:02:01PM +0100, Jan Glauber wrote:
> >> On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> >> > On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber <jglau...@cavium.com> wrote:
> >> > > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> >> >
> >> > Are you sure about those numbers? From my counting, I would have expected
> >> > twice that number in both cases: 48 cores, 2 chips and 2x SMT for 
> >> > ThunderX
> >> > vs 52 Cores, 2 chips and 4x SMT for ThunderX2.
> >>
> >> That's what I have on those machines. I counted SMT as normal CPUs as it
> >> doesn't make a difference for the config. I've not seen SMT on ThunderX.
> >>
> >> The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
> >> there may be other versions planned that I'm not aware of.
> >>
> >> > > Therefore raise the default number of CPUs from 64 to 256
> >> > > by adding an arm64 specific option to override the generic default.
> >> >
> >> > Regardless of what the correct numbers for your chips are, I'd like
> >> > to hear some other opinions on how high we should raise that default
> >> > limit, both in arch/arm64/Kconfig and in the defconfig file.
> >> >
> >> > As I remember it, there is a noticeable cost for taking the limit beyond
> >> > BITS_PER_LONG, both in terms of memory consumption and also
> >> > runtime performance (copying and comparing CPU masks).
> >>
> >> OK, that explains the default. My unverified assumption is that
> >> increasing the CPU masks wont be a noticable performance hit.
> >>
> >> Also, I don't think that anyone who wants performance will use
> >> defconfig. All server distributions would bump up the NR_CPUS anyway
> >> and really small systems will probably need to tune the config
> >> anyway.
> >>
> >> For me defconfig should produce a usable system, not with every last
> >> driver configured but with all the basics like CPUs, networking, etc.
> >> fully present.
> >>
> >> > I'm sure someone will keep coming up with even larger configurations
> >> > in the future, so we should try to decide how far we can take the
> >> > defaults for the moment without impacting users of the smallest
> >> > systems. Alternatively, you could add some measurements that
> >> > show how much memory and CPU time is used up on a typical
> >> > configuration for a small system (4 cores, no SMT, 512 MB RAM).
> >> > If that's low enough, we could just do it anyway.
> >>
> >> OK, I'll take a look.
> >
> > I've made some measurements on a 4 core board (Cavium 81xx) with
> > NR_CPUS set to 64 or 256:
> >
> > - vmlinux grows by 0.04 % with 256 CPUs
> 
> Ok. Is this both with CONFIG_CPUMASK_OFFSTACK=n?

Yes.

> > - Kernel compile time was a bit faster with 256 CPUS (which does
> >   not make sense, but at least is seems to not suffer from the change).
> 
> Do you mean compiling the same kernel configuration while running
> on a system with less than 64 CPUs on either a CONFIG_NR_CPUS=64
> or CONFIG_NR_PCUS=256 kernel, or do you mean the time to compile
> a kernel with either CONFIG_NR_CPUS=64 or CONFIG_NR_CPUS=256,
> while running on the same host?

The former, compiling everything on a 4-core system using two different
kernels to compile the same thing.

> I assume the former, which is a very interesting result, possibly
> pointing to us doing something wrong in the NR_CPUS=64 case
> that could be optimized.
> 
> If you ran with CONFIG_CPUMASK_OFFSTACK, that may have made
> a significant difference, but I would expect it to be faster without it.
> 
> To get more insight to what is happening, you could rerun the same
> test with 'perf record' and then compare the profiles. How significant
> is the runtime difference compared to the jitter you get between normal
> runs on the same configuration?

I did retry once but the odd case that CONFIG_NR_CPUS=256 was faster
was consistent. The difference was very small though so it may be
completely due to jitter.

> >   Is there a benchmark that will be better suited? Maybe even a
> >   microbenchmark that will suffer from the longer cpumasks?
> 
> Good question.
> 
> > - Available memory decreased by 0.13% (restricted memory to 512 MB),
> >   BSS increased 5.3 %
> 
> 0.13% of a few hundred megabytes is still several hundred kb, right? I'd
> like to hear some other opinions on that, but it seems to be in the
> range of enabling many additional device drivers, which is something
> we don't do lightly.

Agreed, available memory was reduced by 128 KB.

--Jan


Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-26 Thread Jan Glauber
On Mon, Mar 26, 2018 at 11:28:28AM +0200, Arnd Bergmann wrote:
> On Mon, Mar 26, 2018 at 10:52 AM, Jan Glauber
>  wrote:
> > On Tue, Mar 06, 2018 at 03:02:01PM +0100, Jan Glauber wrote:
> >> On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> >> > On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber  wrote:
> >> > > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> >> >
> >> > Are you sure about those numbers? From my counting, I would have expected
> >> > twice that number in both cases: 48 cores, 2 chips and 2x SMT for 
> >> > ThunderX
> >> > vs 52 Cores, 2 chips and 4x SMT for ThunderX2.
> >>
> >> That's what I have on those machines. I counted SMT as normal CPUs as it
> >> doesn't make a difference for the config. I've not seen SMT on ThunderX.
> >>
> >> The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
> >> there may be other versions planned that I'm not aware of.
> >>
> >> > > Therefore raise the default number of CPUs from 64 to 256
> >> > > by adding an arm64 specific option to override the generic default.
> >> >
> >> > Regardless of what the correct numbers for your chips are, I'd like
> >> > to hear some other opinions on how high we should raise that default
> >> > limit, both in arch/arm64/Kconfig and in the defconfig file.
> >> >
> >> > As I remember it, there is a noticeable cost for taking the limit beyond
> >> > BITS_PER_LONG, both in terms of memory consumption and also
> >> > runtime performance (copying and comparing CPU masks).
> >>
> >> OK, that explains the default. My unverified assumption is that
> >> increasing the CPU masks wont be a noticable performance hit.
> >>
> >> Also, I don't think that anyone who wants performance will use
> >> defconfig. All server distributions would bump up the NR_CPUS anyway
> >> and really small systems will probably need to tune the config
> >> anyway.
> >>
> >> For me defconfig should produce a usable system, not with every last
> >> driver configured but with all the basics like CPUs, networking, etc.
> >> fully present.
> >>
> >> > I'm sure someone will keep coming up with even larger configurations
> >> > in the future, so we should try to decide how far we can take the
> >> > defaults for the moment without impacting users of the smallest
> >> > systems. Alternatively, you could add some measurements that
> >> > show how much memory and CPU time is used up on a typical
> >> > configuration for a small system (4 cores, no SMT, 512 MB RAM).
> >> > If that's low enough, we could just do it anyway.
> >>
> >> OK, I'll take a look.
> >
> > I've made some measurements on a 4 core board (Cavium 81xx) with
> > NR_CPUS set to 64 or 256:
> >
> > - vmlinux grows by 0.04 % with 256 CPUs
> 
> Ok. Is this both with CONFIG_CPUMASK_OFFSTACK=n?

Yes.

> > - Kernel compile time was a bit faster with 256 CPUS (which does
> >   not make sense, but at least is seems to not suffer from the change).
> 
> Do you mean compiling the same kernel configuration while running
> on a system with less than 64 CPUs on either a CONFIG_NR_CPUS=64
> or CONFIG_NR_PCUS=256 kernel, or do you mean the time to compile
> a kernel with either CONFIG_NR_CPUS=64 or CONFIG_NR_CPUS=256,
> while running on the same host?

The former, compiling everything on a 4-core system using two different
kernels to compile the same thing.

> I assume the former, which is a very interesting result, possibly
> pointing to us doing something wrong in the NR_CPUS=64 case
> that could be optimized.
> 
> If you ran with CONFIG_CPUMASK_OFFSTACK, that may have made
> a significant difference, but I would expect it to be faster without it.
> 
> To get more insight to what is happening, you could rerun the same
> test with 'perf record' and then compare the profiles. How significant
> is the runtime difference compared to the jitter you get between normal
> runs on the same configuration?

I did retry once but the odd case that CONFIG_NR_CPUS=256 was faster
was consistent. The difference was very small though so it may be
completely due to jitter.

> >   Is there a benchmark that will be better suited? Maybe even a
> >   microbenchmark that will suffer from the longer cpumasks?
> 
> Good question.
> 
> > - Available memory decreased by 0.13% (restricted memory to 512 MB),
> >   BSS increased 5.3 %
> 
> 0.13% of a few hundred megabytes is still several hundred kb, right? I'd
> like to hear some other opinions on that, but it seems to be in the
> range of enabling many additional device drivers, which is something
> we don't do lightly.

Agreed, available memory was reduced by 128 KB.

--Jan


Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-26 Thread Jan Glauber
On Tue, Mar 06, 2018 at 03:02:01PM +0100, Jan Glauber wrote:
> On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> > On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber <jglau...@cavium.com> wrote:
> > > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> > 
> > Are you sure about those numbers? From my counting, I would have expected
> > twice that number in both cases: 48 cores, 2 chips and 2x SMT for ThunderX
> > vs 52 Cores, 2 chips and 4x SMT for ThunderX2.
> 
> That's what I have on those machines. I counted SMT as normal CPUs as it
> doesn't make a difference for the config. I've not seen SMT on ThunderX.
> 
> The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
> there may be other versions planned that I'm not aware of.
> 
> > > Therefore raise the default number of CPUs from 64 to 256
> > > by adding an arm64 specific option to override the generic default.
> > 
> > Regardless of what the correct numbers for your chips are, I'd like
> > to hear some other opinions on how high we should raise that default
> > limit, both in arch/arm64/Kconfig and in the defconfig file.
> > 
> > As I remember it, there is a noticeable cost for taking the limit beyond
> > BITS_PER_LONG, both in terms of memory consumption and also
> > runtime performance (copying and comparing CPU masks).
> 
> OK, that explains the default. My unverified assumption is that
> increasing the CPU masks wont be a noticable performance hit.
> 
> Also, I don't think that anyone who wants performance will use
> defconfig. All server distributions would bump up the NR_CPUS anyway
> and really small systems will probably need to tune the config
> anyway.
> 
> For me defconfig should produce a usable system, not with every last
> driver configured but with all the basics like CPUs, networking, etc.
> fully present.
> 
> > I'm sure someone will keep coming up with even larger configurations
> > in the future, so we should try to decide how far we can take the
> > defaults for the moment without impacting users of the smallest
> > systems. Alternatively, you could add some measurements that
> > show how much memory and CPU time is used up on a typical
> > configuration for a small system (4 cores, no SMT, 512 MB RAM).
> > If that's low enough, we could just do it anyway.
> 
> OK, I'll take a look.

I've made some measurements on a 4 core board (Cavium 81xx) with
NR_CPUS set to 64 or 256:

- vmlinux grows by 0.04 % with 256 CPUs

- Kernel compile time was a bit faster with 256 CPUS (which does
  not make sense, but at least is seems to not suffer from the change).
  Is there a benchmark that will be better suited? Maybe even a
  microbenchmark that will suffer from the longer cpumasks?

- Available memory decreased by 0.13% (restricted memory to 512 MB),
  BSS increased 5.3 %

Cheers,
Jan


Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-26 Thread Jan Glauber
On Tue, Mar 06, 2018 at 03:02:01PM +0100, Jan Glauber wrote:
> On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> > On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber  wrote:
> > > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> > 
> > Are you sure about those numbers? From my counting, I would have expected
> > twice that number in both cases: 48 cores, 2 chips and 2x SMT for ThunderX
> > vs 52 Cores, 2 chips and 4x SMT for ThunderX2.
> 
> That's what I have on those machines. I counted SMT as normal CPUs as it
> doesn't make a difference for the config. I've not seen SMT on ThunderX.
> 
> The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
> there may be other versions planned that I'm not aware of.
> 
> > > Therefore raise the default number of CPUs from 64 to 256
> > > by adding an arm64 specific option to override the generic default.
> > 
> > Regardless of what the correct numbers for your chips are, I'd like
> > to hear some other opinions on how high we should raise that default
> > limit, both in arch/arm64/Kconfig and in the defconfig file.
> > 
> > As I remember it, there is a noticeable cost for taking the limit beyond
> > BITS_PER_LONG, both in terms of memory consumption and also
> > runtime performance (copying and comparing CPU masks).
> 
> OK, that explains the default. My unverified assumption is that
> increasing the CPU masks wont be a noticable performance hit.
> 
> Also, I don't think that anyone who wants performance will use
> defconfig. All server distributions would bump up the NR_CPUS anyway
> and really small systems will probably need to tune the config
> anyway.
> 
> For me defconfig should produce a usable system, not with every last
> driver configured but with all the basics like CPUs, networking, etc.
> fully present.
> 
> > I'm sure someone will keep coming up with even larger configurations
> > in the future, so we should try to decide how far we can take the
> > defaults for the moment without impacting users of the smallest
> > systems. Alternatively, you could add some measurements that
> > show how much memory and CPU time is used up on a typical
> > configuration for a small system (4 cores, no SMT, 512 MB RAM).
> > If that's low enough, we could just do it anyway.
> 
> OK, I'll take a look.

I've made some measurements on a 4 core board (Cavium 81xx) with
NR_CPUS set to 64 or 256:

- vmlinux grows by 0.04 % with 256 CPUs

- Kernel compile time was a bit faster with 256 CPUS (which does
  not make sense, but at least is seems to not suffer from the change).
  Is there a benchmark that will be better suited? Maybe even a
  microbenchmark that will suffer from the longer cpumasks?

- Available memory decreased by 0.13% (restricted memory to 512 MB),
  BSS increased 5.3 %

Cheers,
Jan


Re: [PATCH v2] i2c: xlp9xx: Add support for SMBAlert

2018-03-19 Thread Jan Glauber
On Sat, Mar 17, 2018 at 10:03:00PM +0100, Wolfram Sang wrote:
> On Tue, Mar 06, 2018 at 06:46:34AM -0800, George Cherian wrote:
> > Add support for SMBus alert mechanism to i2c-xlp9xx driver.
> > The second interrupt is parsed to use for SMBus alert.
> > The first interrupt is the i2c controller main interrupt.
> > 
> > Signed-off-by: Kamlakant Patel <kamlakant.pa...@cavium.com>
> > Signed-off-by: George Cherian <george.cher...@cavium.com>
> 
> Are the previous reviewers happy now?

Reviewed-by: Jan Glauber <jglau...@cavium.com>

> > ---
> >  drivers/i2c/busses/i2c-xlp9xx.c | 24 
> >  1 file changed, 24 insertions(+)
> > 
> > diff --git a/drivers/i2c/busses/i2c-xlp9xx.c 
> > b/drivers/i2c/busses/i2c-xlp9xx.c
> > index eb8913e..d5cadb6 100644
> > --- a/drivers/i2c/busses/i2c-xlp9xx.c
> > +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> > @@ -10,6 +10,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -84,6 +85,8 @@ struct xlp9xx_i2c_dev {
> > struct device *dev;
> > struct i2c_adapter adapter;
> > struct completion msg_complete;
> > +   struct i2c_smbus_alert_setup alert_data;
> > +   struct i2c_client *ara;
> > int irq;
> > bool msg_read;
> > bool len_recv;
> > @@ -447,6 +450,19 @@ static int xlp9xx_i2c_get_frequency(struct 
> > platform_device *pdev,
> > return 0;
> >  }
> >  
> > +static int xlp9xx_i2c_smbus_setup(struct xlp9xx_i2c_dev *priv,
> > + struct platform_device *pdev)
> > +{
> > +   if (!priv->alert_data.irq)
> > +   return -EINVAL;
> > +
> > +   priv->ara = i2c_setup_smbus_alert(>adapter, >alert_data);
> > +   if (!priv->ara)
> > +   return -ENODEV;
> > +
> > +   return 0;
> > +}
> > +
> >  static int xlp9xx_i2c_probe(struct platform_device *pdev)
> >  {
> > struct xlp9xx_i2c_dev *priv;
> > @@ -467,6 +483,10 @@ static int xlp9xx_i2c_probe(struct platform_device 
> > *pdev)
> > dev_err(>dev, "invalid irq!\n");
> > return priv->irq;
> > }
> > +   /* SMBAlert irq */
> > +   priv->alert_data.irq = platform_get_irq(pdev, 1);
> > +   if (priv->alert_data.irq <= 0)
> 
> '< 0' should do, or?
> 
> > +   priv->alert_data.irq = 0;
> >  
> > xlp9xx_i2c_get_frequency(pdev, priv);
> > xlp9xx_i2c_init(priv);
> > @@ -493,6 +513,10 @@ static int xlp9xx_i2c_probe(struct platform_device 
> > *pdev)
> > if (err)
> > return err;
> >  
> > +   err = xlp9xx_i2c_smbus_setup(priv, pdev);
> > +   if (err)
> > +   dev_info(>dev, "No active SMBus alert %d\n", err);
> > +
> 
> Do you really want to print this info every time SMBus alert is not
> used? Is it common on this platform

We do the same thing in the ThunderX driver and thought it is useful to
have the info print.

--Jan


Re: [PATCH v2] i2c: xlp9xx: Add support for SMBAlert

2018-03-19 Thread Jan Glauber
On Sat, Mar 17, 2018 at 10:03:00PM +0100, Wolfram Sang wrote:
> On Tue, Mar 06, 2018 at 06:46:34AM -0800, George Cherian wrote:
> > Add support for SMBus alert mechanism to i2c-xlp9xx driver.
> > The second interrupt is parsed to use for SMBus alert.
> > The first interrupt is the i2c controller main interrupt.
> > 
> > Signed-off-by: Kamlakant Patel 
> > Signed-off-by: George Cherian 
> 
> Are the previous reviewers happy now?

Reviewed-by: Jan Glauber 

> > ---
> >  drivers/i2c/busses/i2c-xlp9xx.c | 24 
> >  1 file changed, 24 insertions(+)
> > 
> > diff --git a/drivers/i2c/busses/i2c-xlp9xx.c 
> > b/drivers/i2c/busses/i2c-xlp9xx.c
> > index eb8913e..d5cadb6 100644
> > --- a/drivers/i2c/busses/i2c-xlp9xx.c
> > +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> > @@ -10,6 +10,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -84,6 +85,8 @@ struct xlp9xx_i2c_dev {
> > struct device *dev;
> > struct i2c_adapter adapter;
> > struct completion msg_complete;
> > +   struct i2c_smbus_alert_setup alert_data;
> > +   struct i2c_client *ara;
> > int irq;
> > bool msg_read;
> > bool len_recv;
> > @@ -447,6 +450,19 @@ static int xlp9xx_i2c_get_frequency(struct 
> > platform_device *pdev,
> > return 0;
> >  }
> >  
> > +static int xlp9xx_i2c_smbus_setup(struct xlp9xx_i2c_dev *priv,
> > + struct platform_device *pdev)
> > +{
> > +   if (!priv->alert_data.irq)
> > +   return -EINVAL;
> > +
> > +   priv->ara = i2c_setup_smbus_alert(>adapter, >alert_data);
> > +   if (!priv->ara)
> > +   return -ENODEV;
> > +
> > +   return 0;
> > +}
> > +
> >  static int xlp9xx_i2c_probe(struct platform_device *pdev)
> >  {
> > struct xlp9xx_i2c_dev *priv;
> > @@ -467,6 +483,10 @@ static int xlp9xx_i2c_probe(struct platform_device 
> > *pdev)
> > dev_err(>dev, "invalid irq!\n");
> > return priv->irq;
> > }
> > +   /* SMBAlert irq */
> > +   priv->alert_data.irq = platform_get_irq(pdev, 1);
> > +   if (priv->alert_data.irq <= 0)
> 
> '< 0' should do, or?
> 
> > +   priv->alert_data.irq = 0;
> >  
> > xlp9xx_i2c_get_frequency(pdev, priv);
> > xlp9xx_i2c_init(priv);
> > @@ -493,6 +513,10 @@ static int xlp9xx_i2c_probe(struct platform_device 
> > *pdev)
> > if (err)
> > return err;
> >  
> > +   err = xlp9xx_i2c_smbus_setup(priv, pdev);
> > +   if (err)
> > +   dev_info(>dev, "No active SMBus alert %d\n", err);
> > +
> 
> Do you really want to print this info every time SMBus alert is not
> used? Is it common on this platform

We do the same thing in the ThunderX driver and thought it is useful to
have the info print.

--Jan


Re: [PATCH v2] Revert "mm/page_alloc: fix memmap_init_zone pageblock alignment"

2018-03-14 Thread Jan Glauber
On Wed, Mar 14, 2018 at 07:29:37PM +, Ard Biesheuvel wrote:
> This reverts commit 864b75f9d6b0100bb24fdd9a20d156e7cda9b5ae.

FWIW, the revert fixes the boot hang I'm seeing on ThunderX1.

--Jan

> Commit 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock
> alignment") modified the logic in memmap_init_zone() to initialize
> struct pages associated with invalid PFNs, to appease a VM_BUG_ON()
> in move_freepages(), which is redundant by its own admission, and
> dereferences struct page fields to obtain the zone without checking
> whether the struct pages in question are valid to begin with.
> 
> Commit 864b75f9d6b0 only makes it worse, since the rounding it does
> may cause pfn assume the same value it had in a prior iteration of
> the loop, resulting in an infinite loop and a hang very early in the
> boot. Also, since it doesn't perform the same rounding on start_pfn
> itself but only on intermediate values following an invalid PFN, we
> may still hit the same VM_BUG_ON() as before.
> 
> So instead, let's fix this at the core, and ensure that the BUG
> check doesn't dereference struct page fields of invalid pages.
> 
> Fixes: 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock 
> alignment")
> Cc: Daniel Vacek 
> Cc: Mel Gorman 
> Cc: Michal Hocko 
> Cc: Paul Burton 
> Cc: Pavel Tatashin 
> Cc: Vlastimil Babka 
> Cc: Andrew Morton 
> Cc: Linus Torvalds 
> Signed-off-by: Ard Biesheuvel 
> ---
>  mm/page_alloc.c | 13 +
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3d974cb2a1a1..635d7dd29d7f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
>* Remove at a later date when no bug reports exist related to
>* grouping pages by mobility
>*/
> - VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
> + VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
> +   pfn_valid(page_to_pfn(end_page)) &&
> +   page_zone(start_page) != page_zone(end_page));
>  #endif
>  
>   if (num_movable)
> @@ -5359,14 +5361,9 @@ void __meminit memmap_init_zone(unsigned long size, 
> int nid, unsigned long zone,
>   /*
>* Skip to the pfn preceding the next valid one (or
>* end_pfn), such that we hit a valid pfn (or end_pfn)
> -  * on our next iteration of the loop. Note that it needs
> -  * to be pageblock aligned even when the region itself
> -  * is not. move_freepages_block() can shift ahead of
> -  * the valid region but still depends on correct page
> -  * metadata.
> +  * on our next iteration of the loop.
>*/
> - pfn = (memblock_next_valid_pfn(pfn, end_pfn) &
> - ~(pageblock_nr_pages-1)) - 1;
> + pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
>  #endif
>   continue;
>   }
> -- 
> 2.15.1
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [PATCH v2] Revert "mm/page_alloc: fix memmap_init_zone pageblock alignment"

2018-03-14 Thread Jan Glauber
On Wed, Mar 14, 2018 at 07:29:37PM +, Ard Biesheuvel wrote:
> This reverts commit 864b75f9d6b0100bb24fdd9a20d156e7cda9b5ae.

FWIW, the revert fixes the boot hang I'm seeing on ThunderX1.

--Jan

> Commit 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock
> alignment") modified the logic in memmap_init_zone() to initialize
> struct pages associated with invalid PFNs, to appease a VM_BUG_ON()
> in move_freepages(), which is redundant by its own admission, and
> dereferences struct page fields to obtain the zone without checking
> whether the struct pages in question are valid to begin with.
> 
> Commit 864b75f9d6b0 only makes it worse, since the rounding it does
> may cause pfn assume the same value it had in a prior iteration of
> the loop, resulting in an infinite loop and a hang very early in the
> boot. Also, since it doesn't perform the same rounding on start_pfn
> itself but only on intermediate values following an invalid PFN, we
> may still hit the same VM_BUG_ON() as before.
> 
> So instead, let's fix this at the core, and ensure that the BUG
> check doesn't dereference struct page fields of invalid pages.
> 
> Fixes: 864b75f9d6b0 ("mm/page_alloc: fix memmap_init_zone pageblock 
> alignment")
> Cc: Daniel Vacek 
> Cc: Mel Gorman 
> Cc: Michal Hocko 
> Cc: Paul Burton 
> Cc: Pavel Tatashin 
> Cc: Vlastimil Babka 
> Cc: Andrew Morton 
> Cc: Linus Torvalds 
> Signed-off-by: Ard Biesheuvel 
> ---
>  mm/page_alloc.c | 13 +
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 3d974cb2a1a1..635d7dd29d7f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
>* Remove at a later date when no bug reports exist related to
>* grouping pages by mobility
>*/
> - VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
> + VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
> +   pfn_valid(page_to_pfn(end_page)) &&
> +   page_zone(start_page) != page_zone(end_page));
>  #endif
>  
>   if (num_movable)
> @@ -5359,14 +5361,9 @@ void __meminit memmap_init_zone(unsigned long size, 
> int nid, unsigned long zone,
>   /*
>* Skip to the pfn preceding the next valid one (or
>* end_pfn), such that we hit a valid pfn (or end_pfn)
> -  * on our next iteration of the loop. Note that it needs
> -  * to be pageblock aligned even when the region itself
> -  * is not. move_freepages_block() can shift ahead of
> -  * the valid region but still depends on correct page
> -  * metadata.
> +  * on our next iteration of the loop.
>*/
> - pfn = (memblock_next_valid_pfn(pfn, end_pfn) &
> - ~(pageblock_nr_pages-1)) - 1;
> + pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
>  #endif
>   continue;
>   }
> -- 
> 2.15.1
> 
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-06 Thread Jan Glauber
On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber <jglau...@cavium.com> wrote:
> > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> 
> Are you sure about those numbers? From my counting, I would have expected
> twice that number in both cases: 48 cores, 2 chips and 2x SMT for ThunderX
> vs 52 Cores, 2 chips and 4x SMT for ThunderX2.

That's what I have on those machines. I counted SMT as normal CPUs as it
doesn't make a difference for the config. I've not seen SMT on ThunderX.

The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
there may be other versions planned that I'm not aware of.

> > Therefore raise the default number of CPUs from 64 to 256
> > by adding an arm64 specific option to override the generic default.
> 
> Regardless of what the correct numbers for your chips are, I'd like
> to hear some other opinions on how high we should raise that default
> limit, both in arch/arm64/Kconfig and in the defconfig file.
> 
> As I remember it, there is a noticeable cost for taking the limit beyond
> BITS_PER_LONG, both in terms of memory consumption and also
> runtime performance (copying and comparing CPU masks).

OK, that explains the default. My unverified assumption is that
increasing the CPU masks wont be a noticable performance hit.

Also, I don't think that anyone who wants performance will use
defconfig. All server distributions would bump up the NR_CPUS anyway
and really small systems will probably need to tune the config
anyway.

For me defconfig should produce a usable system, not with every last
driver configured but with all the basics like CPUs, networking, etc.
fully present.

> I'm sure someone will keep coming up with even larger configurations
> in the future, so we should try to decide how far we can take the
> defaults for the moment without impacting users of the smallest
> systems. Alternatively, you could add some measurements that
> show how much memory and CPU time is used up on a typical
> configuration for a small system (4 cores, no SMT, 512 MB RAM).
> If that's low enough, we could just do it anyway.

OK, I'll take a look.

--Jan


Re: [PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-06 Thread Jan Glauber
On Tue, Mar 06, 2018 at 02:12:29PM +0100, Arnd Bergmann wrote:
> On Fri, Mar 2, 2018 at 3:37 PM, Jan Glauber  wrote:
> > ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
> 
> Are you sure about those numbers? From my counting, I would have expected
> twice that number in both cases: 48 cores, 2 chips and 2x SMT for ThunderX
> vs 52 Cores, 2 chips and 4x SMT for ThunderX2.

That's what I have on those machines. I counted SMT as normal CPUs as it
doesn't make a difference for the config. I've not seen SMT on ThunderX.

The ThunderX2 number of 224 is already with 4x SMT (and 2 chips) but
there may be other versions planned that I'm not aware of.

> > Therefore raise the default number of CPUs from 64 to 256
> > by adding an arm64 specific option to override the generic default.
> 
> Regardless of what the correct numbers for your chips are, I'd like
> to hear some other opinions on how high we should raise that default
> limit, both in arch/arm64/Kconfig and in the defconfig file.
> 
> As I remember it, there is a noticeable cost for taking the limit beyond
> BITS_PER_LONG, both in terms of memory consumption and also
> runtime performance (copying and comparing CPU masks).

OK, that explains the default. My unverified assumption is that
increasing the CPU masks wont be a noticable performance hit.

Also, I don't think that anyone who wants performance will use
defconfig. All server distributions would bump up the NR_CPUS anyway
and really small systems will probably need to tune the config
anyway.

For me defconfig should produce a usable system, not with every last
driver configured but with all the basics like CPUs, networking, etc.
fully present.

> I'm sure someone will keep coming up with even larger configurations
> in the future, so we should try to decide how far we can take the
> defaults for the moment without impacting users of the smallest
> systems. Alternatively, you could add some measurements that
> show how much memory and CPU time is used up on a typical
> configuration for a small system (4 cores, no SMT, 512 MB RAM).
> If that's low enough, we could just do it anyway.

OK, I'll take a look.

--Jan


Re: [PATCHv2 1/3] i2c: xlp9xx: Check for Bus state before every transfer

2018-03-06 Thread Jan Glauber
I don't know how valuable same-company reviewed-by's are in the end, 
but the patches look good to me with the small change in SMBalert, so you
could add:

Reviewed-by: Jan Glauber <jglau...@cavium.com>

--Jan

On Tue, Feb 27, 2018 at 01:26:18PM +, George Cherian wrote:
> I2C bus enters the STOP condition after the DATA_DONE interrupt is raised.
> Essentially the driver should be checking the bus state before sending
> any transaction. In case a transaction is initiated while the
> bus is busy, the prior transaction's stop condition is not achieved.
> Add the check to make sure the bus is not busy before every transaction.
> 
> Signed-off-by: George Cherian <george.cher...@cavium.com>
> ---
>  drivers/i2c/busses/i2c-xlp9xx.c | 32 
>  1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
> index 1f6d780..42dd1fa 100644
> --- a/drivers/i2c/busses/i2c-xlp9xx.c
> +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define XLP9XX_I2C_DIV   0x0
>  #define XLP9XX_I2C_CTRL  0x1
> @@ -36,6 +37,8 @@
>  #define XLP9XX_I2C_TIMEOUT   0X10
>  #define XLP9XX_I2C_GENCALLADDR   0x11
>  
> +#define XLP9XX_I2C_STATUS_BUSY   BIT(0)
> +
>  #define XLP9XX_I2C_CMD_START BIT(7)
>  #define XLP9XX_I2C_CMD_STOP  BIT(6)
>  #define XLP9XX_I2C_CMD_READ  BIT(5)
> @@ -71,6 +74,7 @@
>  #define XLP9XX_I2C_HIGH_FREQ 40
>  #define XLP9XX_I2C_FIFO_SIZE 0x80U
>  #define XLP9XX_I2C_TIMEOUT_MS1000
> +#define XLP9XX_I2C_BUSY_TIMEOUT  50
>  
>  #define XLP9XX_I2C_FIFO_WCNT_MASK0xff
>  #define XLP9XX_I2C_STATUS_ERRMASK(XLP9XX_I2C_INTEN_ARLOST | \
> @@ -241,6 +245,26 @@ static irqreturn_t xlp9xx_i2c_isr(int irq, void *dev_id)
>   return IRQ_HANDLED;
>  }
>  
> +static int xlp9xx_i2c_check_bus_status(struct xlp9xx_i2c_dev *priv)
> +{
> + u32 status;
> + u32 busy_timeout = XLP9XX_I2C_BUSY_TIMEOUT;
> +
> + while (busy_timeout) {
> + status = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_STATUS);
> + if ((status & XLP9XX_I2C_STATUS_BUSY) == 0)
> + break;
> +
> + busy_timeout--;
> + usleep_range(1000, 1100);
> + }
> +
> + if (!busy_timeout)
> + return -EIO;
> +
> + return 0;
> +}
> +
>  static int xlp9xx_i2c_init(struct xlp9xx_i2c_dev *priv)
>  {
>   u32 prescale;
> @@ -363,6 +387,14 @@ static int xlp9xx_i2c_xfer(struct i2c_adapter *adap, 
> struct i2c_msg *msgs,
>   int i, ret;
>   struct xlp9xx_i2c_dev *priv = i2c_get_adapdata(adap);
>  
> + ret = xlp9xx_i2c_check_bus_status(priv);
> + if (ret) {
> + xlp9xx_i2c_init(priv);
> + ret = xlp9xx_i2c_check_bus_status(priv);
> + if (ret)
> + return ret;
> + }
> +
>   for (i = 0; i < num; i++) {
>   ret = xlp9xx_i2c_xfer_msg(priv, [i], i == num - 1);
>   if (ret != 0)
> -- 
> 2.1.4


Re: [PATCHv2 1/3] i2c: xlp9xx: Check for Bus state before every transfer

2018-03-06 Thread Jan Glauber
I don't know how valuable same-company reviewed-by's are in the end, 
but the patches look good to me with the small change in SMBalert, so you
could add:

Reviewed-by: Jan Glauber 

--Jan

On Tue, Feb 27, 2018 at 01:26:18PM +, George Cherian wrote:
> I2C bus enters the STOP condition after the DATA_DONE interrupt is raised.
> Essentially the driver should be checking the bus state before sending
> any transaction. In case a transaction is initiated while the
> bus is busy, the prior transaction's stop condition is not achieved.
> Add the check to make sure the bus is not busy before every transaction.
> 
> Signed-off-by: George Cherian 
> ---
>  drivers/i2c/busses/i2c-xlp9xx.c | 32 
>  1 file changed, 32 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
> index 1f6d780..42dd1fa 100644
> --- a/drivers/i2c/busses/i2c-xlp9xx.c
> +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define XLP9XX_I2C_DIV   0x0
>  #define XLP9XX_I2C_CTRL  0x1
> @@ -36,6 +37,8 @@
>  #define XLP9XX_I2C_TIMEOUT   0X10
>  #define XLP9XX_I2C_GENCALLADDR   0x11
>  
> +#define XLP9XX_I2C_STATUS_BUSY   BIT(0)
> +
>  #define XLP9XX_I2C_CMD_START BIT(7)
>  #define XLP9XX_I2C_CMD_STOP  BIT(6)
>  #define XLP9XX_I2C_CMD_READ  BIT(5)
> @@ -71,6 +74,7 @@
>  #define XLP9XX_I2C_HIGH_FREQ 40
>  #define XLP9XX_I2C_FIFO_SIZE 0x80U
>  #define XLP9XX_I2C_TIMEOUT_MS1000
> +#define XLP9XX_I2C_BUSY_TIMEOUT  50
>  
>  #define XLP9XX_I2C_FIFO_WCNT_MASK0xff
>  #define XLP9XX_I2C_STATUS_ERRMASK(XLP9XX_I2C_INTEN_ARLOST | \
> @@ -241,6 +245,26 @@ static irqreturn_t xlp9xx_i2c_isr(int irq, void *dev_id)
>   return IRQ_HANDLED;
>  }
>  
> +static int xlp9xx_i2c_check_bus_status(struct xlp9xx_i2c_dev *priv)
> +{
> + u32 status;
> + u32 busy_timeout = XLP9XX_I2C_BUSY_TIMEOUT;
> +
> + while (busy_timeout) {
> + status = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_STATUS);
> + if ((status & XLP9XX_I2C_STATUS_BUSY) == 0)
> + break;
> +
> + busy_timeout--;
> + usleep_range(1000, 1100);
> + }
> +
> + if (!busy_timeout)
> + return -EIO;
> +
> + return 0;
> +}
> +
>  static int xlp9xx_i2c_init(struct xlp9xx_i2c_dev *priv)
>  {
>   u32 prescale;
> @@ -363,6 +387,14 @@ static int xlp9xx_i2c_xfer(struct i2c_adapter *adap, 
> struct i2c_msg *msgs,
>   int i, ret;
>   struct xlp9xx_i2c_dev *priv = i2c_get_adapdata(adap);
>  
> + ret = xlp9xx_i2c_check_bus_status(priv);
> + if (ret) {
> + xlp9xx_i2c_init(priv);
> + ret = xlp9xx_i2c_check_bus_status(priv);
> + if (ret)
> + return ret;
> + }
> +
>   for (i = 0; i < num; i++) {
>   ret = xlp9xx_i2c_xfer_msg(priv, [i], i == num - 1);
>   if (ret != 0)
> -- 
> 2.1.4


Re: [PATCH 3/3] i2c: xlp9xx: Add support for SMBAlert

2018-03-06 Thread Jan Glauber
On Tue, Feb 27, 2018 at 01:26:20PM +, George Cherian wrote:
> Add support for SMBus alert mechanism to i2c-xlp9xx driver.
> The second interrupt is parsed to use for SMBus alert.
> The first interrupt is the i2c controller main interrupt.
> 
> Signed-off-by: Kamlakant Patel 
> Signed-off-by: George Cherian 
> ---
>  drivers/i2c/busses/i2c-xlp9xx.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
> index eb8913e..9462eab 100644
> --- a/drivers/i2c/busses/i2c-xlp9xx.c
> +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -84,6 +85,8 @@ struct xlp9xx_i2c_dev {
>   struct device *dev;
>   struct i2c_adapter adapter;
>   struct completion msg_complete;
> + struct i2c_smbus_alert_setup alert_data;
> + struct i2c_client *ara;
>   int irq;
>   bool msg_read;
>   bool len_recv;
> @@ -447,6 +450,21 @@ static int xlp9xx_i2c_get_frequency(struct 
> platform_device *pdev,
>   return 0;
>  }
>  
> +static int xlp9xx_i2c_smbus_setup(struct xlp9xx_i2c_dev *priv,
> +   struct platform_device *pdev)
> +{
> + if (!priv->alert_data.irq)
> + return -EINVAL;
> +
> + priv->alert_data.alert_edge_triggered = 0;

Hi George,

I think this is not needed anymore, see:
9b9f2b8bc2ac i2c: i2c-smbus: Use threaded irq for smbalert

--Jan

> +
> + priv->ara = i2c_setup_smbus_alert(>adapter, >alert_data);
> + if (!priv->ara)
> + return -ENODEV;
> +
> + return 0;
> +}
> +
>  static int xlp9xx_i2c_probe(struct platform_device *pdev)
>  {
>   struct xlp9xx_i2c_dev *priv;
> @@ -467,6 +485,10 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
>   dev_err(>dev, "invalid irq!\n");
>   return priv->irq;
>   }
> + /* SMBAlert irq */
> + priv->alert_data.irq = platform_get_irq(pdev, 1);
> + if (priv->alert_data.irq <= 0)
> + priv->alert_data.irq = 0;
>  
>   xlp9xx_i2c_get_frequency(pdev, priv);
>   xlp9xx_i2c_init(priv);
> @@ -493,6 +515,10 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
>   if (err)
>   return err;
>  
> + err = xlp9xx_i2c_smbus_setup(priv, pdev);
> + if (err)
> + dev_info(>dev, "No active SMBus alert %d\n", err);
> +
>   platform_set_drvdata(pdev, priv);
>   dev_dbg(>dev, "I2C bus:%d added\n", priv->adapter.nr);
>  
> -- 
> 2.1.4


Re: [PATCH 3/3] i2c: xlp9xx: Add support for SMBAlert

2018-03-06 Thread Jan Glauber
On Tue, Feb 27, 2018 at 01:26:20PM +, George Cherian wrote:
> Add support for SMBus alert mechanism to i2c-xlp9xx driver.
> The second interrupt is parsed to use for SMBus alert.
> The first interrupt is the i2c controller main interrupt.
> 
> Signed-off-by: Kamlakant Patel 
> Signed-off-by: George Cherian 
> ---
>  drivers/i2c/busses/i2c-xlp9xx.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
> index eb8913e..9462eab 100644
> --- a/drivers/i2c/busses/i2c-xlp9xx.c
> +++ b/drivers/i2c/busses/i2c-xlp9xx.c
> @@ -10,6 +10,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -84,6 +85,8 @@ struct xlp9xx_i2c_dev {
>   struct device *dev;
>   struct i2c_adapter adapter;
>   struct completion msg_complete;
> + struct i2c_smbus_alert_setup alert_data;
> + struct i2c_client *ara;
>   int irq;
>   bool msg_read;
>   bool len_recv;
> @@ -447,6 +450,21 @@ static int xlp9xx_i2c_get_frequency(struct 
> platform_device *pdev,
>   return 0;
>  }
>  
> +static int xlp9xx_i2c_smbus_setup(struct xlp9xx_i2c_dev *priv,
> +   struct platform_device *pdev)
> +{
> + if (!priv->alert_data.irq)
> + return -EINVAL;
> +
> + priv->alert_data.alert_edge_triggered = 0;

Hi George,

I think this is not needed anymore, see:
9b9f2b8bc2ac i2c: i2c-smbus: Use threaded irq for smbalert

--Jan

> +
> + priv->ara = i2c_setup_smbus_alert(>adapter, >alert_data);
> + if (!priv->ara)
> + return -ENODEV;
> +
> + return 0;
> +}
> +
>  static int xlp9xx_i2c_probe(struct platform_device *pdev)
>  {
>   struct xlp9xx_i2c_dev *priv;
> @@ -467,6 +485,10 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
>   dev_err(>dev, "invalid irq!\n");
>   return priv->irq;
>   }
> + /* SMBAlert irq */
> + priv->alert_data.irq = platform_get_irq(pdev, 1);
> + if (priv->alert_data.irq <= 0)
> + priv->alert_data.irq = 0;
>  
>   xlp9xx_i2c_get_frequency(pdev, priv);
>   xlp9xx_i2c_init(priv);
> @@ -493,6 +515,10 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
>   if (err)
>   return err;
>  
> + err = xlp9xx_i2c_smbus_setup(priv, pdev);
> + if (err)
> + dev_info(>dev, "No active SMBus alert %d\n", err);
> +
>   platform_set_drvdata(pdev, priv);
>   dev_dbg(>dev, "I2C bus:%d added\n", priv->adapter.nr);
>  
> -- 
> 2.1.4


[PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-02 Thread Jan Glauber
ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
Therefore raise the default number of CPUs from 64 to 256
by adding an arm64 specific option to override the generic default.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 3594aefa496f..970950e8c76b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -630,3 +630,4 @@ CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 CONFIG_CRYPTO_AES_ARM64_NEON_BLK=m
 CONFIG_CRYPTO_CHACHA20_NEON=m
 CONFIG_CRYPTO_AES_ARM64_BS=m
+CONFIG_NR_CPUS=256
-- 
2.7.4



[PATCH 2/2] arm64: defconfig: Raise NR_CPUS to 256

2018-03-02 Thread Jan Glauber
ThunderX1 dual socket has 96 CPUs and ThunderX2 has 224 CPUs.
Therefore raise the default number of CPUs from 64 to 256
by adding an arm64 specific option to override the generic default.

Signed-off-by: Jan Glauber 
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 3594aefa496f..970950e8c76b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -630,3 +630,4 @@ CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 CONFIG_CRYPTO_AES_ARM64_NEON_BLK=m
 CONFIG_CRYPTO_CHACHA20_NEON=m
 CONFIG_CRYPTO_AES_ARM64_BS=m
+CONFIG_NR_CPUS=256
-- 
2.7.4



[PATCH 1/2] arm64: defconfig: enable THUNDER_NIC_VF

2018-03-02 Thread Jan Glauber
Without this option the NIC on ThunderX1 is not coming up
so enable it to get a working network interface.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 634b373785c4..3594aefa496f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -197,6 +197,7 @@ CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
 CONFIG_MACB=y
 CONFIG_THUNDER_NIC_PF=y
+CONFIG_THUNDER_NIC_VF=y
 CONFIG_HNS_DSAF=y
 CONFIG_HNS_ENET=y
 CONFIG_E1000E=y
-- 
2.7.4



[PATCH 1/2] arm64: defconfig: enable THUNDER_NIC_VF

2018-03-02 Thread Jan Glauber
Without this option the NIC on ThunderX1 is not coming up
so enable it to get a working network interface.

Signed-off-by: Jan Glauber 
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 634b373785c4..3594aefa496f 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -197,6 +197,7 @@ CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
 CONFIG_MACB=y
 CONFIG_THUNDER_NIC_PF=y
+CONFIG_THUNDER_NIC_VF=y
 CONFIG_HNS_DSAF=y
 CONFIG_HNS_ENET=y
 CONFIG_E1000E=y
-- 
2.7.4



[PATCH] i2c: octeon: Prevent error message on bus error

2018-02-27 Thread Jan Glauber
The error message:

[Fri Feb 16 13:42:13 2018] i2c-thunderx :01:09.4: unhandled state: 0

is mis-leading as state 0 (bus error) is not an unknown state.

Return -EIO as before but avoid printing the message. Also rename
STAT_ERROR to STATE_BUS_ERROR.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 drivers/i2c/busses/i2c-octeon-core.c | 1 +
 drivers/i2c/busses/i2c-octeon-core.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
b/drivers/i2c/busses/i2c-octeon-core.c
index 1d8775799056..d9607905dc2f 100644
--- a/drivers/i2c/busses/i2c-octeon-core.c
+++ b/drivers/i2c/busses/i2c-octeon-core.c
@@ -233,6 +233,7 @@ static int octeon_i2c_check_status(struct octeon_i2c *i2c, 
int final_read)
return -EOPNOTSUPP;
 
case STAT_TXDATA_NAK:
+   case STAT_BUS_ERROR:
return -EIO;
case STAT_TXADDR_NAK:
case STAT_RXADDR_NAK:
diff --git a/drivers/i2c/busses/i2c-octeon-core.h 
b/drivers/i2c/busses/i2c-octeon-core.h
index a7ef19855bb8..9bb9f64fdda0 100644
--- a/drivers/i2c/busses/i2c-octeon-core.h
+++ b/drivers/i2c/busses/i2c-octeon-core.h
@@ -43,7 +43,7 @@
 #define TWSI_CTL_AAK   0x04/* Assert ACK */
 
 /* Status values */
-#define STAT_ERROR 0x00
+#define STAT_BUS_ERROR 0x00
 #define STAT_START 0x08
 #define STAT_REP_START 0x10
 #define STAT_TXADDR_ACK0x18
-- 
2.16.2



[PATCH] i2c: octeon: Prevent error message on bus error

2018-02-27 Thread Jan Glauber
The error message:

[Fri Feb 16 13:42:13 2018] i2c-thunderx :01:09.4: unhandled state: 0

is mis-leading as state 0 (bus error) is not an unknown state.

Return -EIO as before but avoid printing the message. Also rename
STAT_ERROR to STATE_BUS_ERROR.

Signed-off-by: Jan Glauber 
---
 drivers/i2c/busses/i2c-octeon-core.c | 1 +
 drivers/i2c/busses/i2c-octeon-core.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
b/drivers/i2c/busses/i2c-octeon-core.c
index 1d8775799056..d9607905dc2f 100644
--- a/drivers/i2c/busses/i2c-octeon-core.c
+++ b/drivers/i2c/busses/i2c-octeon-core.c
@@ -233,6 +233,7 @@ static int octeon_i2c_check_status(struct octeon_i2c *i2c, 
int final_read)
return -EOPNOTSUPP;
 
case STAT_TXDATA_NAK:
+   case STAT_BUS_ERROR:
return -EIO;
case STAT_TXADDR_NAK:
case STAT_RXADDR_NAK:
diff --git a/drivers/i2c/busses/i2c-octeon-core.h 
b/drivers/i2c/busses/i2c-octeon-core.h
index a7ef19855bb8..9bb9f64fdda0 100644
--- a/drivers/i2c/busses/i2c-octeon-core.h
+++ b/drivers/i2c/busses/i2c-octeon-core.h
@@ -43,7 +43,7 @@
 #define TWSI_CTL_AAK   0x04/* Assert ACK */
 
 /* Status values */
-#define STAT_ERROR 0x00
+#define STAT_BUS_ERROR 0x00
 #define STAT_START 0x08
 #define STAT_REP_START 0x10
 #define STAT_TXADDR_ACK0x18
-- 
2.16.2



Re: [PATCH] arm64: Fix compilation error while accessing MPIDR_HWID_BITMASK from .S files

2018-02-19 Thread Jan Glauber
Hi Will,

I just hit the same issue with 4.16-rc2. The patch makes it compilable
again.

--Jan

On Mon, Feb 19, 2018 at 11:02:39AM +, Will Deacon wrote:
> Hi John,
> 
> On Mon, Feb 19, 2018 at 10:19:35AM +, John Garry wrote:
> > On 19/02/2018 06:39, Bhupesh Sharma wrote:
> > >Since commit e1a50de37860b3a93a9d643b09638db5aff47650 (arm64: cputype:
> > >Silence Sparse warnings), compilation of arm64 architecture is broken
> > >with the following error messages:
> > >
> > >  AR  arch/arm64/kernel/built-in.o
> > >  arch/arm64/kernel/head.S: Assembler messages:
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: junk at end of line, first
> > >  unrecognized character is `L'
> > >  arch/arm64/kernel/head.S:677: Error: unexpected characters following
> > >  instruction at operand 2 -- `movz x1,:abs_g1_s:0xff00ffUL'
> > >  arch/arm64/kernel/head.S:677: Error: unexpected characters following
> > >  instruction at operand 2 -- `movk x1,:abs_g0_nc:0xff00ffUL'
> > >
> > >This patch fixes the same by using the UL() macro correctly for
> > >assigning the MPIDR_HWID_BITMASK macro value.
> > >
> > >Signed-off-by: Bhupesh Sharma 
> > >---
> > > arch/arm64/include/asm/cputype.h | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > >diff --git a/arch/arm64/include/asm/cputype.h 
> > >b/arch/arm64/include/asm/cputype.h
> > >index eda8c5f629fc..350c76a1d15b 100644
> > >--- a/arch/arm64/include/asm/cputype.h
> > >+++ b/arch/arm64/include/asm/cputype.h
> > >@@ -20,7 +20,7 @@
> > >
> > > #define MPIDR_UP_BITMASK  (0x1 << 30)
> > > #define MPIDR_MT_BITMASK  (0x1 << 24)
> > >-#define MPIDR_HWID_BITMASK0xff00ffUL
> > >+#define MPIDR_HWID_BITMASKUL(0xff00ff)
> > 
> > Works for me.
> > 
> > FYI, I am using (old) gcc-linaro-4.8-2015.06-x86_64_aarch64.
> 
> Just to confirm: are you saying that this patch fixes the build for you,
> or that mainline builds for you and the patch is not needed?
> 
> Cheers,
> 
> Will
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [PATCH] arm64: Fix compilation error while accessing MPIDR_HWID_BITMASK from .S files

2018-02-19 Thread Jan Glauber
Hi Will,

I just hit the same issue with 4.16-rc2. The patch makes it compilable
again.

--Jan

On Mon, Feb 19, 2018 at 11:02:39AM +, Will Deacon wrote:
> Hi John,
> 
> On Mon, Feb 19, 2018 at 10:19:35AM +, John Garry wrote:
> > On 19/02/2018 06:39, Bhupesh Sharma wrote:
> > >Since commit e1a50de37860b3a93a9d643b09638db5aff47650 (arm64: cputype:
> > >Silence Sparse warnings), compilation of arm64 architecture is broken
> > >with the following error messages:
> > >
> > >  AR  arch/arm64/kernel/built-in.o
> > >  arch/arm64/kernel/head.S: Assembler messages:
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: found 'L', expected: ')'
> > >  arch/arm64/kernel/head.S:677: Error: junk at end of line, first
> > >  unrecognized character is `L'
> > >  arch/arm64/kernel/head.S:677: Error: unexpected characters following
> > >  instruction at operand 2 -- `movz x1,:abs_g1_s:0xff00ffUL'
> > >  arch/arm64/kernel/head.S:677: Error: unexpected characters following
> > >  instruction at operand 2 -- `movk x1,:abs_g0_nc:0xff00ffUL'
> > >
> > >This patch fixes the same by using the UL() macro correctly for
> > >assigning the MPIDR_HWID_BITMASK macro value.
> > >
> > >Signed-off-by: Bhupesh Sharma 
> > >---
> > > arch/arm64/include/asm/cputype.h | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > >diff --git a/arch/arm64/include/asm/cputype.h 
> > >b/arch/arm64/include/asm/cputype.h
> > >index eda8c5f629fc..350c76a1d15b 100644
> > >--- a/arch/arm64/include/asm/cputype.h
> > >+++ b/arch/arm64/include/asm/cputype.h
> > >@@ -20,7 +20,7 @@
> > >
> > > #define MPIDR_UP_BITMASK  (0x1 << 30)
> > > #define MPIDR_MT_BITMASK  (0x1 << 24)
> > >-#define MPIDR_HWID_BITMASK0xff00ffUL
> > >+#define MPIDR_HWID_BITMASKUL(0xff00ff)
> > 
> > Works for me.
> > 
> > FYI, I am using (old) gcc-linaro-4.8-2015.06-x86_64_aarch64.
> 
> Just to confirm: are you saying that this patch fixes the build for you,
> or that mainline builds for you and the patch is not needed?
> 
> Cheers,
> 
> Will
> 
> ___
> linux-arm-kernel mailing list
> linux-arm-ker...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel


Re: [Bug fix] octeon-i2c driver updates

2017-12-01 Thread Jan Glauber
Hi Sean,

as you try to solve two different issues I suggest that you create one
patch per issue.

For the second point (retry of START after recovery) I would still like
to hear Wolfram's opinion. I would assume that any i2c user should
be well aware of -EAGAIN, so I wonder if it is worth the additional
complexity of the retry logic.

Also, the first issue changes Octeon MIPS which I'm not able to test,
so David needs to be involved here.

thanks,
Jan

On Thu, Nov 30, 2017 at 01:56:09AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
wrote:
> Hi Jan,
> 
> Any other comments for this patch?
> 
> BR,
> Sean Zhang
> 
> -Original Message-
> From: Zhang, Sean C. (NSB - CN/Hangzhou) 
> Sent: Monday, November 27, 2017 4:38 PM
> To: 'Jan Glauber' <jan.glau...@caviumnetworks.com>
> Cc: david.da...@cavium.com; w...@the-dreams.de; linux-...@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Subject: RE: [Bug fix] octeon-i2c driver updates
> 
> Hi Jan,
> 
> There are two points in this patch.
> 
> Point 1. As you see, replaced octeon_i2c_init_lowlevel() by recover bus 
> status if TWSI controller is not IDLE.
> Please take a scenario like this: when system soft reset without I2C slave 
> reset, maybe make this I2C bus
> dead lock occurred (I2C slave device stuck low SCL) in chance. Then during 
> system goes up and I2C slave 
> device creating process, if this I2C slave device has a register with less 
> than 8 bytes to read, but I2C bus was
> still stuck low SCL by last system reset, then the read will failed and this 
> I2C slave device cannot be created.
> If bus recovered before the reading process, this failure can be fixed.
> 
> Function flow explanation shown as below:
> 
> a. System reset without I2C slave device reset
> --make SCL stuck low by I2C slave device
> ..
> b. octeon_i2c_probe()
> -- octeon_i2c_init_lowlevel  //reset TWSI core, but SCL still stuck low by.
> ..
> 
> c. Another I2C slave device creating process
> octeon_i2c_xfer()
> -- octeon_i2c_hlc_comp_read() //failed due to SCL stuck low.
> 
> If full recovery executed in octeon_i2c_probe(), above failure can be avoided.
> 
> 
> Point 2. octeon_i2c_recovery() is used in octeon_i2c_start() error branch, in 
> the case of octeon_i2c_recovery()
> successful, octeon_i2c_start() will return -EAGAIN, and then 
> octeon_i2c_xfer() return with error. I understand this like
> this: if octeon_i2c_recovery() successful, then i2c START signal can be sent 
> again, and all following step can be continue,
> octeon_i2c_xfer() should not return error from this condition.
> 
> BR,
> Sean Zhang
> 
> -Original Message-
> From: Jan Glauber [mailto:jan.glau...@caviumnetworks.com] 
> Sent: Friday, November 24, 2017 9:10 PM
> To: Zhang, Sean C. (NSB - CN/Hangzhou) <sean.c.zh...@nokia-sbell.com>
> Cc: david.da...@cavium.com; w...@the-dreams.de; linux-...@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Subject: Re: [Bug fix] octeon-i2c driver updates
> 
> On Thu, Nov 23, 2017 at 11:42:36AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
> wrote:
> > Dear Maintainer,
> > 
> > For octeon TWSI controller, I found below two cases, maybe can be improved.
> 
> Hi Sean,
> 
> form the description below this looks like you're fixing a bug. Can you
> elaborate on when the I2C bus dead lock occurs. Is it always happening?
> 
> What I don't like about the patch is that you're removing
> octeon_i2c_init_lowlevel() from the probe and replacing it by _always_
> going through a full recovery. Why is this neccessary?
> 
> Regards,
> Jan
> 
> > 
> > >From 09d9f0ce658d7f6a50d1af352dde619c29bc8bcf Mon Sep 17 00:00:00 2001
> > From: hgt463 <sean.c.zh...@nokia.com>
> > Date: Thu, 23 Nov 2017 18:46:09 +0800
> > Subject: [PATCH] Driver updates:
> >  - In the case of I2C bus dead lock occurred during driver probing,
> >it is better try to recovery it. so added bus recovery step in
> >octeon_i2c_probe();
> >  - The purpose of function octeon_i2c_start() is to send START, so after
> >bus recovery, also need try to send START again.
> > 
> > Signed-off-by: hgt463 <sean.c.zh...@nokia.com>
> > ---
> >  drivers/i2c/busses/i2c-octeon-core.c|   31 
> > ++-
> >  drivers/i2c/busses/i2c-octeon-platdrv.c |   15 +--
> >  2 files changed, 39 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
> > b/drivers/i2c/busses/i2c-octeon-core.c
> > index 1d87757..3ae1e03 100644
> > --- a/drivers/i2c/busses/i2c-octeon-core.c
> > +++ b/drivers/i2c/busses/i2c-octeon-core.c
> > @@ -255,

Re: [Bug fix] octeon-i2c driver updates

2017-12-01 Thread Jan Glauber
Hi Sean,

as you try to solve two different issues I suggest that you create one
patch per issue.

For the second point (retry of START after recovery) I would still like
to hear Wolfram's opinion. I would assume that any i2c user should
be well aware of -EAGAIN, so I wonder if it is worth the additional
complexity of the retry logic.

Also, the first issue changes Octeon MIPS which I'm not able to test,
so David needs to be involved here.

thanks,
Jan

On Thu, Nov 30, 2017 at 01:56:09AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
wrote:
> Hi Jan,
> 
> Any other comments for this patch?
> 
> BR,
> Sean Zhang
> 
> -Original Message-
> From: Zhang, Sean C. (NSB - CN/Hangzhou) 
> Sent: Monday, November 27, 2017 4:38 PM
> To: 'Jan Glauber' 
> Cc: david.da...@cavium.com; w...@the-dreams.de; linux-...@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Subject: RE: [Bug fix] octeon-i2c driver updates
> 
> Hi Jan,
> 
> There are two points in this patch.
> 
> Point 1. As you see, replaced octeon_i2c_init_lowlevel() by recover bus 
> status if TWSI controller is not IDLE.
> Please take a scenario like this: when system soft reset without I2C slave 
> reset, maybe make this I2C bus
> dead lock occurred (I2C slave device stuck low SCL) in chance. Then during 
> system goes up and I2C slave 
> device creating process, if this I2C slave device has a register with less 
> than 8 bytes to read, but I2C bus was
> still stuck low SCL by last system reset, then the read will failed and this 
> I2C slave device cannot be created.
> If bus recovered before the reading process, this failure can be fixed.
> 
> Function flow explanation shown as below:
> 
> a. System reset without I2C slave device reset
> --make SCL stuck low by I2C slave device
> ..
> b. octeon_i2c_probe()
> -- octeon_i2c_init_lowlevel  //reset TWSI core, but SCL still stuck low by.
> ..
> 
> c. Another I2C slave device creating process
> octeon_i2c_xfer()
> -- octeon_i2c_hlc_comp_read() //failed due to SCL stuck low.
> 
> If full recovery executed in octeon_i2c_probe(), above failure can be avoided.
> 
> 
> Point 2. octeon_i2c_recovery() is used in octeon_i2c_start() error branch, in 
> the case of octeon_i2c_recovery()
> successful, octeon_i2c_start() will return -EAGAIN, and then 
> octeon_i2c_xfer() return with error. I understand this like
> this: if octeon_i2c_recovery() successful, then i2c START signal can be sent 
> again, and all following step can be continue,
> octeon_i2c_xfer() should not return error from this condition.
> 
> BR,
> Sean Zhang
> 
> -Original Message-
> From: Jan Glauber [mailto:jan.glau...@caviumnetworks.com] 
> Sent: Friday, November 24, 2017 9:10 PM
> To: Zhang, Sean C. (NSB - CN/Hangzhou) 
> Cc: david.da...@cavium.com; w...@the-dreams.de; linux-...@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Subject: Re: [Bug fix] octeon-i2c driver updates
> 
> On Thu, Nov 23, 2017 at 11:42:36AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
> wrote:
> > Dear Maintainer,
> > 
> > For octeon TWSI controller, I found below two cases, maybe can be improved.
> 
> Hi Sean,
> 
> form the description below this looks like you're fixing a bug. Can you
> elaborate on when the I2C bus dead lock occurs. Is it always happening?
> 
> What I don't like about the patch is that you're removing
> octeon_i2c_init_lowlevel() from the probe and replacing it by _always_
> going through a full recovery. Why is this neccessary?
> 
> Regards,
> Jan
> 
> > 
> > >From 09d9f0ce658d7f6a50d1af352dde619c29bc8bcf Mon Sep 17 00:00:00 2001
> > From: hgt463 
> > Date: Thu, 23 Nov 2017 18:46:09 +0800
> > Subject: [PATCH] Driver updates:
> >  - In the case of I2C bus dead lock occurred during driver probing,
> >it is better try to recovery it. so added bus recovery step in
> >octeon_i2c_probe();
> >  - The purpose of function octeon_i2c_start() is to send START, so after
> >bus recovery, also need try to send START again.
> > 
> > Signed-off-by: hgt463 
> > ---
> >  drivers/i2c/busses/i2c-octeon-core.c|   31 
> > ++-
> >  drivers/i2c/busses/i2c-octeon-platdrv.c |   15 +--
> >  2 files changed, 39 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
> > b/drivers/i2c/busses/i2c-octeon-core.c
> > index 1d87757..3ae1e03 100644
> > --- a/drivers/i2c/busses/i2c-octeon-core.c
> > +++ b/drivers/i2c/busses/i2c-octeon-core.c
> > @@ -255,6 +255,35 @@ static int octeon_i2c_recovery(struct octeon_i2c *i2c)
> > return ret;
> &

Re: [Bug fix] octeon-i2c driver updates

2017-11-24 Thread Jan Glauber
On Thu, Nov 23, 2017 at 11:42:36AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
wrote:
> Dear Maintainer,
> 
> For octeon TWSI controller, I found below two cases, maybe can be improved.

Hi Sean,

form the description below this looks like you're fixing a bug. Can you
elaborate on when the I2C bus dead lock occurs. Is it always happening?

What I don't like about the patch is that you're removing
octeon_i2c_init_lowlevel() from the probe and replacing it by _always_
going through a full recovery. Why is this neccessary?

Regards,
Jan

> 
> >From 09d9f0ce658d7f6a50d1af352dde619c29bc8bcf Mon Sep 17 00:00:00 2001
> From: hgt463 
> Date: Thu, 23 Nov 2017 18:46:09 +0800
> Subject: [PATCH] Driver updates:
>  - In the case of I2C bus dead lock occurred during driver probing,
>it is better try to recovery it. so added bus recovery step in
>octeon_i2c_probe();
>  - The purpose of function octeon_i2c_start() is to send START, so after
>bus recovery, also need try to send START again.
> 
> Signed-off-by: hgt463 
> ---
>  drivers/i2c/busses/i2c-octeon-core.c|   31 
> ++-
>  drivers/i2c/busses/i2c-octeon-platdrv.c |   15 +--
>  2 files changed, 39 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
> b/drivers/i2c/busses/i2c-octeon-core.c
> index 1d87757..3ae1e03 100644
> --- a/drivers/i2c/busses/i2c-octeon-core.c
> +++ b/drivers/i2c/busses/i2c-octeon-core.c
> @@ -255,6 +255,35 @@ static int octeon_i2c_recovery(struct octeon_i2c *i2c)
> return ret;
>  }
>  
> +/*
> + * octeon_i2c_start_retry - send START to the bus after bus recovery.
> + * @i2c: The struct octeon_i2c
> + *
> + * Returns 0 on success, otherwise a negative errno.
> + */
> +static int octeon_i2c_start_retry(struct octeon_i2c *i2c)
> +{
> +   int ret;
> +   u8 stat;
> +
> +   ret = octeon_i2c_recovery(i2c);
> +   if (ret)
> +   goto error;
> +
> +   octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB | TWSI_CTL_STA);
> +   ret = octeon_i2c_wait(i2c);
> +   if (ret)
> +   goto error;
> +
> +   stat = octeon_i2c_stat_read(i2c);
> +   if (stat == STAT_START || stat == STAT_REP_START)
> +   /* START successful, bail out */
> +   return 0;
> +
> +error:
> +   return (ret) ? ret : -EAGAIN;
> +}
> +
>  /**
>   * octeon_i2c_start - send START to the bus
>   * @i2c: The struct octeon_i2c
> @@ -280,7 +309,7 @@ static int octeon_i2c_start(struct octeon_i2c *i2c)
>  
>  error:
> /* START failed, try to recover */
> -   ret = octeon_i2c_recovery(i2c);
> +   ret = octeon_i2c_start_retry(i2c);
> return (ret) ? ret : -EAGAIN;
>  }
>  
> diff --git a/drivers/i2c/busses/i2c-octeon-platdrv.c 
> b/drivers/i2c/busses/i2c-octeon-platdrv.c
> index 64bda83..98063af 100644
> --- a/drivers/i2c/busses/i2c-octeon-platdrv.c
> +++ b/drivers/i2c/busses/i2c-octeon-platdrv.c
> @@ -228,12 +228,6 @@ static int octeon_i2c_probe(struct platform_device *pdev)
> if (OCTEON_IS_MODEL(OCTEON_CN38XX))
> i2c->broken_irq_check = true;
>  
> -   result = octeon_i2c_init_lowlevel(i2c);
> -   if (result) {
> -   dev_err(i2c->dev, "init low level failed\n");
> -   goto  out;
> -   }
> -
> octeon_i2c_set_clock(i2c);
>  
> i2c->adap = octeon_i2c_ops;
> @@ -245,6 +239,15 @@ static int octeon_i2c_probe(struct platform_device *pdev)
> i2c_set_adapdata(>adap, i2c);
> platform_set_drvdata(pdev, i2c);
>  
> +   stat = octeon_i2c_stat_read(i2c);
> +   if (stat != STAT_IDLE) {
> +   result = octeon_i2c_recovery(i2c);
> +   if (result) {
> +   dev_err(i2c->dev, "octeon i2c recovery failed\n");
> +   goto  out;
> +   }
> +   }
> +
> result = i2c_add_adapter(>adap);
> if (result < 0)
> goto out;
> 
> 
> Attached patch for you review. Thanks in advance.
> 
> BR,
> Sean Zhang




Re: [Bug fix] octeon-i2c driver updates

2017-11-24 Thread Jan Glauber
On Thu, Nov 23, 2017 at 11:42:36AM +, Zhang, Sean C. (NSB - CN/Hangzhou) 
wrote:
> Dear Maintainer,
> 
> For octeon TWSI controller, I found below two cases, maybe can be improved.

Hi Sean,

form the description below this looks like you're fixing a bug. Can you
elaborate on when the I2C bus dead lock occurs. Is it always happening?

What I don't like about the patch is that you're removing
octeon_i2c_init_lowlevel() from the probe and replacing it by _always_
going through a full recovery. Why is this neccessary?

Regards,
Jan

> 
> >From 09d9f0ce658d7f6a50d1af352dde619c29bc8bcf Mon Sep 17 00:00:00 2001
> From: hgt463 
> Date: Thu, 23 Nov 2017 18:46:09 +0800
> Subject: [PATCH] Driver updates:
>  - In the case of I2C bus dead lock occurred during driver probing,
>it is better try to recovery it. so added bus recovery step in
>octeon_i2c_probe();
>  - The purpose of function octeon_i2c_start() is to send START, so after
>bus recovery, also need try to send START again.
> 
> Signed-off-by: hgt463 
> ---
>  drivers/i2c/busses/i2c-octeon-core.c|   31 
> ++-
>  drivers/i2c/busses/i2c-octeon-platdrv.c |   15 +--
>  2 files changed, 39 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-octeon-core.c 
> b/drivers/i2c/busses/i2c-octeon-core.c
> index 1d87757..3ae1e03 100644
> --- a/drivers/i2c/busses/i2c-octeon-core.c
> +++ b/drivers/i2c/busses/i2c-octeon-core.c
> @@ -255,6 +255,35 @@ static int octeon_i2c_recovery(struct octeon_i2c *i2c)
> return ret;
>  }
>  
> +/*
> + * octeon_i2c_start_retry - send START to the bus after bus recovery.
> + * @i2c: The struct octeon_i2c
> + *
> + * Returns 0 on success, otherwise a negative errno.
> + */
> +static int octeon_i2c_start_retry(struct octeon_i2c *i2c)
> +{
> +   int ret;
> +   u8 stat;
> +
> +   ret = octeon_i2c_recovery(i2c);
> +   if (ret)
> +   goto error;
> +
> +   octeon_i2c_ctl_write(i2c, TWSI_CTL_ENAB | TWSI_CTL_STA);
> +   ret = octeon_i2c_wait(i2c);
> +   if (ret)
> +   goto error;
> +
> +   stat = octeon_i2c_stat_read(i2c);
> +   if (stat == STAT_START || stat == STAT_REP_START)
> +   /* START successful, bail out */
> +   return 0;
> +
> +error:
> +   return (ret) ? ret : -EAGAIN;
> +}
> +
>  /**
>   * octeon_i2c_start - send START to the bus
>   * @i2c: The struct octeon_i2c
> @@ -280,7 +309,7 @@ static int octeon_i2c_start(struct octeon_i2c *i2c)
>  
>  error:
> /* START failed, try to recover */
> -   ret = octeon_i2c_recovery(i2c);
> +   ret = octeon_i2c_start_retry(i2c);
> return (ret) ? ret : -EAGAIN;
>  }
>  
> diff --git a/drivers/i2c/busses/i2c-octeon-platdrv.c 
> b/drivers/i2c/busses/i2c-octeon-platdrv.c
> index 64bda83..98063af 100644
> --- a/drivers/i2c/busses/i2c-octeon-platdrv.c
> +++ b/drivers/i2c/busses/i2c-octeon-platdrv.c
> @@ -228,12 +228,6 @@ static int octeon_i2c_probe(struct platform_device *pdev)
> if (OCTEON_IS_MODEL(OCTEON_CN38XX))
> i2c->broken_irq_check = true;
>  
> -   result = octeon_i2c_init_lowlevel(i2c);
> -   if (result) {
> -   dev_err(i2c->dev, "init low level failed\n");
> -   goto  out;
> -   }
> -
> octeon_i2c_set_clock(i2c);
>  
> i2c->adap = octeon_i2c_ops;
> @@ -245,6 +239,15 @@ static int octeon_i2c_probe(struct platform_device *pdev)
> i2c_set_adapdata(>adap, i2c);
> platform_set_drvdata(pdev, i2c);
>  
> +   stat = octeon_i2c_stat_read(i2c);
> +   if (stat != STAT_IDLE) {
> +   result = octeon_i2c_recovery(i2c);
> +   if (result) {
> +   dev_err(i2c->dev, "octeon i2c recovery failed\n");
> +   goto  out;
> +   }
> +   }
> +
> result = i2c_add_adapter(>adap);
> if (result < 0)
> goto out;
> 
> 
> Attached patch for you review. Thanks in advance.
> 
> BR,
> Sean Zhang




Re: MCP251x SPI CAN controller on Cavium ThunderX

2017-11-15 Thread Jan Glauber
On Wed, Nov 15, 2017 at 02:31:45PM +0100, Marc Kleine-Budde wrote:
> On 11/15/2017 01:40 PM, Marc Kleine-Budde wrote:
> > mcp251x_spi_trans() is called with len=3,
> > priv->spi_tx_buf and priv->spi_rx_buf point to previously allocared memory
> > 
> > priv->spi_tx_buf has been filled before calling mcp251x_spi_trans().
> 
> > #define OCTEON_SPI_MAX_BYTES 9
> 
> > static int octeon_spi_do_transfer(struct octeon_spi *p,
> >   struct spi_message *msg,
> >   struct spi_transfer *xfer,
> >   bool last_xfer)
> > {
> > struct spi_device *spi = msg->spi;
> > union cvmx_mpi_cfg mpi_cfg;
> > union cvmx_mpi_tx mpi_tx;
> > unsigned int clkdiv;
> > int mode;
> > bool cpha, cpol;
> > const u8 *tx_buf;
> > u8 *rx_buf;
> > int len;
> > int i;
> > 
> > mode = spi->mode;
> > cpha = mode & SPI_CPHA;
> > cpol = mode & SPI_CPOL;
> > 
> > clkdiv = p->sys_freq / (2 * xfer->speed_hz);
> > 
> > mpi_cfg.u64 = 0;
> > 
> > mpi_cfg.s.clkdiv = clkdiv;
> > mpi_cfg.s.cshi = (mode & SPI_CS_HIGH) ? 1 : 0;
> > mpi_cfg.s.lsbfirst = (mode & SPI_LSB_FIRST) ? 1 : 0;
> > mpi_cfg.s.wireor = (mode & SPI_3WIRE) ? 1 : 0;
> > mpi_cfg.s.idlelo = cpha != cpol;
> > mpi_cfg.s.cslate = cpha ? 1 : 0;
> > mpi_cfg.s.enable = 1;
> > 
> > if (spi->chip_select < 4)
> > p->cs_enax |= 1ull << (12 + spi->chip_select);
> > mpi_cfg.u64 |= p->cs_enax;
> > 
> > if (mpi_cfg.u64 != p->last_cfg) {
> > p->last_cfg = mpi_cfg.u64;
> > writeq(mpi_cfg.u64, p->register_base + OCTEON_SPI_CFG(p));
> > }
> > tx_buf = xfer->tx_buf;
> > rx_buf = xfer->rx_buf;
> > len = xfer->len;
> > while (len > OCTEON_SPI_MAX_BYTES) {
> > for (i = 0; i < OCTEON_SPI_MAX_BYTES; i++) {
> > u8 d;
> > if (tx_buf)
> > d = *tx_buf++;
> > else
> > d = 0;
> > writeq(d, p->register_base + OCTEON_SPI_DAT0(p) + (8 * 
> > i));
> > }
> > mpi_tx.u64 = 0;
> > mpi_tx.s.csid = spi->chip_select;
> > mpi_tx.s.leavecs = 1;
> > mpi_tx.s.txnum = tx_buf ? OCTEON_SPI_MAX_BYTES : 0;
> 
> This looks fishy, OCTEON_SPI_MAX_BYTES is 9

Because there are 9 registers in MPI_DAT(0..8).

> > mpi_tx.s.totnum = OCTEON_SPI_MAX_BYTES;
> > writeq(mpi_tx.u64, p->register_base + OCTEON_SPI_TX(p));
> > 
> > octeon_spi_wait_ready(p);
> > if (rx_buf)
> > for (i = 0; i < OCTEON_SPI_MAX_BYTES; i++) {
> > u64 v = readq(p->register_base + 
> > OCTEON_SPI_DAT0(p) + (8 * i));
> > *rx_buf++ = (u8)v;
> > }
> > len -= OCTEON_SPI_MAX_BYTES;
> > }
> > 
> > for (i = 0; i < len; i++) {
> > u8 d;
> > if (tx_buf)
> > d = *tx_buf++;
> > else
> > d = 0;
> > writeq(d, p->register_base + OCTEON_SPI_DAT0(p) + (8 * i));
> > }
> > 
> > mpi_tx.u64 = 0;
> > mpi_tx.s.csid = spi->chip_select;
> > if (last_xfer)
> > mpi_tx.s.leavecs = xfer->cs_change;
> > else
> > mpi_tx.s.leavecs = !xfer->cs_change;
> > mpi_tx.s.txnum = tx_buf ? len : 0;
> > mpi_tx.s.totnum = len;
> > writeq(mpi_tx.u64, p->register_base + OCTEON_SPI_TX(p));
> > 
> > octeon_spi_wait_ready(p);
> > if (rx_buf)
> > for (i = 0; i < len; i++) {
> > u64 v = readq(p->register_base + OCTEON_SPI_DAT0(p) + 
> > (8 * i));
> > *rx_buf++ = (u8)v;
> > }
> 
> Personally I'd fold this into the while loop, as there's quite some code
> duplication. Of course your have to improve the "if (last_xfer)" a bit.

I've not written that code, just split it for shared arm64 & mips usage
and avoided re-writing it completely on purpose :) If it turns out that
we need to change this code I might consider making changes like this.

Adding David who might know more about this driver.

--Jan


Re: MCP251x SPI CAN controller on Cavium ThunderX

2017-11-15 Thread Jan Glauber
On Wed, Nov 15, 2017 at 02:31:45PM +0100, Marc Kleine-Budde wrote:
> On 11/15/2017 01:40 PM, Marc Kleine-Budde wrote:
> > mcp251x_spi_trans() is called with len=3,
> > priv->spi_tx_buf and priv->spi_rx_buf point to previously allocared memory
> > 
> > priv->spi_tx_buf has been filled before calling mcp251x_spi_trans().
> 
> > #define OCTEON_SPI_MAX_BYTES 9
> 
> > static int octeon_spi_do_transfer(struct octeon_spi *p,
> >   struct spi_message *msg,
> >   struct spi_transfer *xfer,
> >   bool last_xfer)
> > {
> > struct spi_device *spi = msg->spi;
> > union cvmx_mpi_cfg mpi_cfg;
> > union cvmx_mpi_tx mpi_tx;
> > unsigned int clkdiv;
> > int mode;
> > bool cpha, cpol;
> > const u8 *tx_buf;
> > u8 *rx_buf;
> > int len;
> > int i;
> > 
> > mode = spi->mode;
> > cpha = mode & SPI_CPHA;
> > cpol = mode & SPI_CPOL;
> > 
> > clkdiv = p->sys_freq / (2 * xfer->speed_hz);
> > 
> > mpi_cfg.u64 = 0;
> > 
> > mpi_cfg.s.clkdiv = clkdiv;
> > mpi_cfg.s.cshi = (mode & SPI_CS_HIGH) ? 1 : 0;
> > mpi_cfg.s.lsbfirst = (mode & SPI_LSB_FIRST) ? 1 : 0;
> > mpi_cfg.s.wireor = (mode & SPI_3WIRE) ? 1 : 0;
> > mpi_cfg.s.idlelo = cpha != cpol;
> > mpi_cfg.s.cslate = cpha ? 1 : 0;
> > mpi_cfg.s.enable = 1;
> > 
> > if (spi->chip_select < 4)
> > p->cs_enax |= 1ull << (12 + spi->chip_select);
> > mpi_cfg.u64 |= p->cs_enax;
> > 
> > if (mpi_cfg.u64 != p->last_cfg) {
> > p->last_cfg = mpi_cfg.u64;
> > writeq(mpi_cfg.u64, p->register_base + OCTEON_SPI_CFG(p));
> > }
> > tx_buf = xfer->tx_buf;
> > rx_buf = xfer->rx_buf;
> > len = xfer->len;
> > while (len > OCTEON_SPI_MAX_BYTES) {
> > for (i = 0; i < OCTEON_SPI_MAX_BYTES; i++) {
> > u8 d;
> > if (tx_buf)
> > d = *tx_buf++;
> > else
> > d = 0;
> > writeq(d, p->register_base + OCTEON_SPI_DAT0(p) + (8 * 
> > i));
> > }
> > mpi_tx.u64 = 0;
> > mpi_tx.s.csid = spi->chip_select;
> > mpi_tx.s.leavecs = 1;
> > mpi_tx.s.txnum = tx_buf ? OCTEON_SPI_MAX_BYTES : 0;
> 
> This looks fishy, OCTEON_SPI_MAX_BYTES is 9

Because there are 9 registers in MPI_DAT(0..8).

> > mpi_tx.s.totnum = OCTEON_SPI_MAX_BYTES;
> > writeq(mpi_tx.u64, p->register_base + OCTEON_SPI_TX(p));
> > 
> > octeon_spi_wait_ready(p);
> > if (rx_buf)
> > for (i = 0; i < OCTEON_SPI_MAX_BYTES; i++) {
> > u64 v = readq(p->register_base + 
> > OCTEON_SPI_DAT0(p) + (8 * i));
> > *rx_buf++ = (u8)v;
> > }
> > len -= OCTEON_SPI_MAX_BYTES;
> > }
> > 
> > for (i = 0; i < len; i++) {
> > u8 d;
> > if (tx_buf)
> > d = *tx_buf++;
> > else
> > d = 0;
> > writeq(d, p->register_base + OCTEON_SPI_DAT0(p) + (8 * i));
> > }
> > 
> > mpi_tx.u64 = 0;
> > mpi_tx.s.csid = spi->chip_select;
> > if (last_xfer)
> > mpi_tx.s.leavecs = xfer->cs_change;
> > else
> > mpi_tx.s.leavecs = !xfer->cs_change;
> > mpi_tx.s.txnum = tx_buf ? len : 0;
> > mpi_tx.s.totnum = len;
> > writeq(mpi_tx.u64, p->register_base + OCTEON_SPI_TX(p));
> > 
> > octeon_spi_wait_ready(p);
> > if (rx_buf)
> > for (i = 0; i < len; i++) {
> > u64 v = readq(p->register_base + OCTEON_SPI_DAT0(p) + 
> > (8 * i));
> > *rx_buf++ = (u8)v;
> > }
> 
> Personally I'd fold this into the while loop, as there's quite some code
> duplication. Of course your have to improve the "if (last_xfer)" a bit.

I've not written that code, just split it for shared arm64 & mips usage
and avoided re-writing it completely on purpose :) If it turns out that
we need to change this code I might consider making changes like this.

Adding David who might know more about this driver.

--Jan


Re: MCP251x SPI CAN controller on Cavium ThunderX

2017-11-15 Thread Jan Glauber
On Wed, Nov 15, 2017 at 11:54:20AM +0100, Marc Kleine-Budde wrote:
> On 11/14/2017 01:02 PM, Mark Brown wrote:
> > On Mon, Nov 13, 2017 at 01:17:42PM -0800, Tim Harvey wrote:
> > 
> >> When a register is read from the mcp251x driver the
> >> octeon_spi_do_transfer() gets a spi_message with a single spi_xfer of
> >> len=3, a tx_buf, and an rx_buf which I believe is supposed to shift
> >> out 3 bytes out MOSI and shift in 3 bytes from MISO where the last
> >> byte shifted in would be the response.
> > 
> > No, that will simultaneously transmit and recieve three bytes.
> 
> That's what the driver supposed to do.
> 
> > If you want to transmit two bytes and then recieve one byte you need
> > two xfers, one with a len of 2 and a tx_buf, the other with a len of
> > 1 and a rx_buf.
> To read a register (mcp251x_read_reg()) the mcp251x does a 3 byte full
> duplex transfer. The first byte send is the command (read register) the
> second byte the register number the third byte is a dummy. The first 2
> bytes received are ignored the 3rd byte is the register contents.

To support this full duplex transfer the Cavium SPI controller needs
to know the receive lenght before setting up the transaction.

spi_transfer only includes the total length, so I don't see how this
should work.

--Jan


Re: MCP251x SPI CAN controller on Cavium ThunderX

2017-11-15 Thread Jan Glauber
On Wed, Nov 15, 2017 at 11:54:20AM +0100, Marc Kleine-Budde wrote:
> On 11/14/2017 01:02 PM, Mark Brown wrote:
> > On Mon, Nov 13, 2017 at 01:17:42PM -0800, Tim Harvey wrote:
> > 
> >> When a register is read from the mcp251x driver the
> >> octeon_spi_do_transfer() gets a spi_message with a single spi_xfer of
> >> len=3, a tx_buf, and an rx_buf which I believe is supposed to shift
> >> out 3 bytes out MOSI and shift in 3 bytes from MISO where the last
> >> byte shifted in would be the response.
> > 
> > No, that will simultaneously transmit and recieve three bytes.
> 
> That's what the driver supposed to do.
> 
> > If you want to transmit two bytes and then recieve one byte you need
> > two xfers, one with a len of 2 and a tx_buf, the other with a len of
> > 1 and a rx_buf.
> To read a register (mcp251x_read_reg()) the mcp251x does a 3 byte full
> duplex transfer. The first byte send is the command (read register) the
> second byte the register number the third byte is a dummy. The first 2
> bytes received are ignored the 3rd byte is the register contents.

To support this full duplex transfer the Cavium SPI controller needs
to know the receive lenght before setting up the transaction.

spi_transfer only includes the total length, so I don't see how this
should work.

--Jan


[PATCH] MAINTAINERS: Split Cavium EDAC entry and add myself

2017-10-19 Thread Jan Glauber
Split the Cavium EDAC entry into MIPS and ARM drivers because
they have different maintainers and mailing lists.

Add myself as additional maintainer to the ThunderX driver.

Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 MAINTAINERS | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a74227a..9ea8b89 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4906,13 +4906,19 @@ L:  linux-e...@vger.kernel.org
 S: Maintained
 F: drivers/edac/highbank*
 
-EDAC-CAVIUM
+EDAC-CAVIUM OCTEON
 M: Ralf Baechle <r...@linux-mips.org>
 M: David Daney <david.da...@cavium.com>
 L: linux-e...@vger.kernel.org
 L: linux-m...@linux-mips.org
 S: Supported
 F: drivers/edac/octeon_edac*
+
+EDAC-CAVIUM THUNDERX
+M: David Daney <david.da...@cavium.com>
+M: Jan Glauber <jglau...@cavium.com>
+L: linux-e...@vger.kernel.org
+S: Supported
 F: drivers/edac/thunderx_edac*
 
 EDAC-CORE
-- 
1.9.1



[PATCH] MAINTAINERS: Split Cavium EDAC entry and add myself

2017-10-19 Thread Jan Glauber
Split the Cavium EDAC entry into MIPS and ARM drivers because
they have different maintainers and mailing lists.

Add myself as additional maintainer to the ThunderX driver.

Signed-off-by: Jan Glauber 
---
 MAINTAINERS | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a74227a..9ea8b89 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4906,13 +4906,19 @@ L:  linux-e...@vger.kernel.org
 S: Maintained
 F: drivers/edac/highbank*
 
-EDAC-CAVIUM
+EDAC-CAVIUM OCTEON
 M: Ralf Baechle 
 M: David Daney 
 L: linux-e...@vger.kernel.org
 L: linux-m...@linux-mips.org
 S: Supported
 F: drivers/edac/octeon_edac*
+
+EDAC-CAVIUM THUNDERX
+M: David Daney 
+M: Jan Glauber 
+L: linux-e...@vger.kernel.org
+S: Supported
 F: drivers/edac/thunderx_edac*
 
 EDAC-CORE
-- 
1.9.1



Re: net/sunrpc: v4.14-rc4 lockdep warning

2017-10-16 Thread Jan Glauber
Hi Trond,

is there a patch available for this issue? I'm running into with
4.14-rc5 on my ARM64 board.

thanks, Jan

2017-10-11 19:49 GMT+02:00 Trond Myklebust :
> On Tue, 2017-10-10 at 10:19 -0700, t...@kernel.org wrote:
>> Hello,
>>
>> On Tue, Oct 10, 2017 at 04:48:57PM +, Trond Myklebust wrote:
>> > Thanks for the explanation. What I'm not really understanding here
>> > though, is how the work item could be queued at all. We have a
>> > wait_on_bit_lock() in xprt_destroy() that should mean the xprt-
>> > > task_cleanup work item has completed running, and that it cannot
>> > > be
>> >
>> > requeued.
>> >
>> > Is there a possibility that the flush_queue() might be triggered
>> > despite the work item not being queued?
>>
>> Yeah, for sure.  The lockdep annotations don't distinguish those
>> cases and assume the worst case.
>>
>
> OK. Let's just remove that call to cancel_work_sync() then. As I said,
> it should be redundant due to the wait_on_bit_lock().
>
> --
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.mykleb...@primarydata.com


Re: net/sunrpc: v4.14-rc4 lockdep warning

2017-10-16 Thread Jan Glauber
Hi Trond,

is there a patch available for this issue? I'm running into with
4.14-rc5 on my ARM64 board.

thanks, Jan

2017-10-11 19:49 GMT+02:00 Trond Myklebust :
> On Tue, 2017-10-10 at 10:19 -0700, t...@kernel.org wrote:
>> Hello,
>>
>> On Tue, Oct 10, 2017 at 04:48:57PM +, Trond Myklebust wrote:
>> > Thanks for the explanation. What I'm not really understanding here
>> > though, is how the work item could be queued at all. We have a
>> > wait_on_bit_lock() in xprt_destroy() that should mean the xprt-
>> > > task_cleanup work item has completed running, and that it cannot
>> > > be
>> >
>> > requeued.
>> >
>> > Is there a possibility that the flush_queue() might be triggered
>> > despite the work item not being queued?
>>
>> Yeah, for sure.  The lockdep annotations don't distinguish those
>> cases and assume the worst case.
>>
>
> OK. Let's just remove that call to cancel_work_sync() then. As I said,
> it should be redundant due to the wait_on_bit_lock().
>
> --
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> trond.mykleb...@primarydata.com


Re: [PATCH 0/6] Switch arm64 over to qrwlock

2017-10-10 Thread Jan Glauber
2017-10-05 14:54 GMT+02:00 Will Deacon <will.dea...@arm.com>:
>
> Hi all,
>
> This patch series reworks bits of the qrwlock code that it can be used
> to replace the asm rwlocks currently implemented for arm64. The structure
> of the series is:
>
>   Patches 1-3   : Work WFE into qrwlock using atomic_cond_read_acquire so
>   we can avoid busy-waiting.
>
>   Patch 4   : Enable qrwlocks for arm64
>
>   Patch 5-6 : Ensure writer slowpath fairness. This has a potential
>   performance impact on the writer unlock path, so I've
>   kept them at the end.
>
> The patches apply on top of my other locking cleanups:
>
>   
> http://lkml.kernel.org/r/1507055129-12300-1-git-send-email-will.dea...@arm.com
>
> although the conflict with mainline is trivial to resolve without those.
> The full stack is also pushed here:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git qrwlock
>
> All comments (particularly related to testing and performance) welcome!

Hi Will,

run your patches on ThunderX2.

Old RW locks:

insmod ./locktorture.ko nwriters_stress=56 nreaders_stress=224
torture_type="rw_lock_irq" stat_interval=2

[  558.700042] Writes:  Total: 10127  Max/Min: 0/0   Fail: 0
[  558.700054] Reads :  Total: 764714  Max/Min: 0/0   Fail: 0
[  561.797011] Writes:  Total: 11288  Max/Min: 0/0   Fail: 0
[  561.802518] Reads :  Total: 2104452  Max/Min: 0/0   Fail: 0
[  565.844219] Writes:  Total: 11512  Max/Min: 0/0   Fail: 0
[  565.849710] Reads :  Total: 4277492  Max/Min: 0/0   Fail: 0

Queued RW locks:

[  221.491207] Writes:  Total: 57318  Max/Min: 0/0   Fail: 0
[  221.491219] Reads :  Total: 382979  Max/Min: 0/0   Fail: 0
[  223.507065] Writes:  Total: 83490  Max/Min: 0/0   Fail: 0
[  223.512611] Reads :  Total: 684848  Max/Min: 0/0   Fail: 0
[  225.522937] Writes:  Total: 110012  Max/Min: 0/0   Fail: 0
[  225.528511] Reads :  Total: 968826  Max/Min: 0/0   Fail: 0

So readers are still preferred over writers, but results are _way_
better. Also, with the old implementation
above test hung the machine which does not happen with the queued variant.

If you want you can add:
Tested-by: Jan Glauber <jglau...@cavium.com>

--Jan

> Cheers,
>
> Will
>
> --->8
>
> Will Deacon (6):
>   kernel/locking: Use struct qrwlock instead of struct __qrwlock
>   locking/atomic: Add atomic_cond_read_acquire
>   kernel/locking: Use atomic_cond_read_acquire when spinning in qrwlock
>   arm64: locking: Move rwlock implementation over to qrwlocks
>   kernel/locking: Prevent slowpath writers getting held up by fastpath
>   kernel/locking: Remove unused union members from struct qrwlock
>
>  arch/arm64/Kconfig  |  17 
>  arch/arm64/include/asm/Kbuild   |   1 +
>  arch/arm64/include/asm/spinlock.h   | 164 
> +---
>  arch/arm64/include/asm/spinlock_types.h |   6 +-
>  include/asm-generic/atomic-long.h   |   3 +
>  include/asm-generic/qrwlock.h   |  14 +--
>  include/asm-generic/qrwlock_types.h |   2 +-
>  include/linux/atomic.h  |   4 +
>  kernel/locking/qrwlock.c|  83 +++-
>  9 files changed, 43 insertions(+), 251 deletions(-)
>
> --
> 2.1.4
>


Re: [PATCH 0/6] Switch arm64 over to qrwlock

2017-10-10 Thread Jan Glauber
2017-10-05 14:54 GMT+02:00 Will Deacon :
>
> Hi all,
>
> This patch series reworks bits of the qrwlock code that it can be used
> to replace the asm rwlocks currently implemented for arm64. The structure
> of the series is:
>
>   Patches 1-3   : Work WFE into qrwlock using atomic_cond_read_acquire so
>   we can avoid busy-waiting.
>
>   Patch 4   : Enable qrwlocks for arm64
>
>   Patch 5-6 : Ensure writer slowpath fairness. This has a potential
>   performance impact on the writer unlock path, so I've
>   kept them at the end.
>
> The patches apply on top of my other locking cleanups:
>
>   
> http://lkml.kernel.org/r/1507055129-12300-1-git-send-email-will.dea...@arm.com
>
> although the conflict with mainline is trivial to resolve without those.
> The full stack is also pushed here:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git qrwlock
>
> All comments (particularly related to testing and performance) welcome!

Hi Will,

run your patches on ThunderX2.

Old RW locks:

insmod ./locktorture.ko nwriters_stress=56 nreaders_stress=224
torture_type="rw_lock_irq" stat_interval=2

[  558.700042] Writes:  Total: 10127  Max/Min: 0/0   Fail: 0
[  558.700054] Reads :  Total: 764714  Max/Min: 0/0   Fail: 0
[  561.797011] Writes:  Total: 11288  Max/Min: 0/0   Fail: 0
[  561.802518] Reads :  Total: 2104452  Max/Min: 0/0   Fail: 0
[  565.844219] Writes:  Total: 11512  Max/Min: 0/0   Fail: 0
[  565.849710] Reads :  Total: 4277492  Max/Min: 0/0   Fail: 0

Queued RW locks:

[  221.491207] Writes:  Total: 57318  Max/Min: 0/0   Fail: 0
[  221.491219] Reads :  Total: 382979  Max/Min: 0/0   Fail: 0
[  223.507065] Writes:  Total: 83490  Max/Min: 0/0   Fail: 0
[  223.512611] Reads :  Total: 684848  Max/Min: 0/0   Fail: 0
[  225.522937] Writes:  Total: 110012  Max/Min: 0/0   Fail: 0
[  225.528511] Reads :  Total: 968826  Max/Min: 0/0   Fail: 0

So readers are still preferred over writers, but results are _way_
better. Also, with the old implementation
above test hung the machine which does not happen with the queued variant.

If you want you can add:
Tested-by: Jan Glauber 

--Jan

> Cheers,
>
> Will
>
> --->8
>
> Will Deacon (6):
>   kernel/locking: Use struct qrwlock instead of struct __qrwlock
>   locking/atomic: Add atomic_cond_read_acquire
>   kernel/locking: Use atomic_cond_read_acquire when spinning in qrwlock
>   arm64: locking: Move rwlock implementation over to qrwlocks
>   kernel/locking: Prevent slowpath writers getting held up by fastpath
>   kernel/locking: Remove unused union members from struct qrwlock
>
>  arch/arm64/Kconfig  |  17 
>  arch/arm64/include/asm/Kbuild   |   1 +
>  arch/arm64/include/asm/spinlock.h   | 164 
> +---
>  arch/arm64/include/asm/spinlock_types.h |   6 +-
>  include/asm-generic/atomic-long.h   |   3 +
>  include/asm-generic/qrwlock.h   |  14 +--
>  include/asm-generic/qrwlock_types.h |   2 +-
>  include/linux/atomic.h  |   4 +
>  kernel/locking/qrwlock.c|  83 +++-
>  9 files changed, 43 insertions(+), 251 deletions(-)
>
> --
> 2.1.4
>


Re: [PATCH v10 3/7] edac,soc: thunderx: Add wrapper for EDAC LMC PCI device

2017-10-02 Thread Jan Glauber
On Wed, Sep 27, 2017 at 06:19:01PM +0200, Borislav Petkov wrote:
> On Mon, Sep 25, 2017 at 02:34:58PM +0200, Jan Glauber wrote:
> > Cavium SOCs contain a memory controller that is presented as a
> > PCI device. This PCI device will be used by an EDAC driver and
> > by a PMU driver.
> > 
> > To allow both subsystems to access the device a small wrapper is
> > introduced that multi-plexes PCI probe and removal calls of the
> > device to the EDAC driver.
> > 
> > The same mechanism will be used later to call the PMU driver.
> > 
> > The ThunderX EDAC driver is limited to only build as module
> > with this patch. The reason is that with multiple users of the
> > multi-plexer all users must be either builtin or modules.
> > 
> > Signed-off-by: Jan Glauber <jglau...@cavium.com>
> > ---
> 
> ...
> 
> > diff --git a/drivers/soc/cavium/cavium_lmc.c 
> > b/drivers/soc/cavium/cavium_lmc.c
> > new file mode 100644
> > index ..87248e83c55b
> > --- /dev/null
> > +++ b/drivers/soc/cavium/cavium_lmc.c
> > @@ -0,0 +1,49 @@
> > +/*
> > + * These PCI devices contain RAS functionality and PMU counters. To allow
> > + * independent RAS and PMU drivers this driver registers for the PCI 
> > devices
> > + * and multi-plexes probe and removal.
> > + *
> > + * This file is subject to the terms and conditions of the GNU General 
> > Public
> > + * License.  See the file "COPYING" in the main directory of this archive
> > + * for more details.
> > + *
> > + * Copyright: Cavium, Inc. (C) 2017
> > + *
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +
> > +static int cvm_lmc_probe(struct pci_dev *pdev,
> > +const struct pci_device_id *ent)
> > +{
> > +   if (IS_ENABLED(CONFIG_EDAC_THUNDERX))
> > +   thunderx_edac_lmc_probe(pdev, ent);
> 
> You could save yourself the if (IS_ENABLED()) here by adding stubs in
> the lmc.h header for those functions for the !CONFIG_EDAC_THUNDERX case.

OK.

> One thing I'm not clear on though, is the design of the whole thing:
> cvm_lmc_probe() probes the EDAC driver during its own probe, which
> means, thunderx_edac needs to be loaded first. And the other things that
> get loaded, do the same.

Yes. That seems to work fine with the limitation of not being
able to have mixed build options, everything needs to be modular or
built-in. So I forced all parts to be buildable only as modules.

I went for this as the simplest solution, the probing is completely
synchronous and no state needs to be stored in the wrapper.

> What I was expecting is those small cavium_lmc.c and cavium_ocx.c
> wrappers to probe and register the respective PCI device and then its
> *users* - EDAC and PMU drivers to go and request the PCI device from
> them:
> 
> cavium_lmc_get_pci_dev()
> cavium_ocx_get_pci_dev()
> 
> and so on. Those will be exported to modules. And the small stubs can
> also be built-in too.

So this is the opposite approach which would be asynchronous. What I
don't understand with that approach:

1. What will trigger probing the edac (or perf) driver part?
   Right now the trigger is the PCI device ID. If the wrapper
   does not call into edac how should we load the ThunderX edac/perf drivers?
   The only option I see is a initcall in edac/perf to look for their devices.

2. The probe & register is _very_ specific to perf/edac and very different.
   The only part that would fit in the wrapper is pci_enable_device().
   So is that what you have in mind?

> This way you can do reference counting and whatever else.
> 
> If the above calls fail, neither EDAC nor PMU will load properly but you
> solve the multiplexing issue by having those wrappers arbitrate access
> to the PCI devices.
> 
> Because right now the wrappers are simply weakly hiding the calls into
> EDAC and that's exactly what I was opposing to.
> 
> Hmmm?

> > +   return 0;
> > +}
> > +
> > +static void cvm_lmc_remove(struct pci_dev *pdev)
> > +{
> > +   if (IS_ENABLED(CONFIG_EDAC_THUNDERX))
> > +   thunderx_edac_lmc_remove(pdev);
> > +}
> > +
> > +static const struct pci_device_id cvm_lmc_pci_table[] = {
> > +   { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa022) },
> 
> { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
> 
> You already have that PCI device id define.

True, will use it.

Thanks for looking at this!

--Jan

> -- 
> Regards/Gruss,
> Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.


Re: [PATCH v10 3/7] edac,soc: thunderx: Add wrapper for EDAC LMC PCI device

2017-10-02 Thread Jan Glauber
On Wed, Sep 27, 2017 at 06:19:01PM +0200, Borislav Petkov wrote:
> On Mon, Sep 25, 2017 at 02:34:58PM +0200, Jan Glauber wrote:
> > Cavium SOCs contain a memory controller that is presented as a
> > PCI device. This PCI device will be used by an EDAC driver and
> > by a PMU driver.
> > 
> > To allow both subsystems to access the device a small wrapper is
> > introduced that multi-plexes PCI probe and removal calls of the
> > device to the EDAC driver.
> > 
> > The same mechanism will be used later to call the PMU driver.
> > 
> > The ThunderX EDAC driver is limited to only build as module
> > with this patch. The reason is that with multiple users of the
> > multi-plexer all users must be either builtin or modules.
> > 
> > Signed-off-by: Jan Glauber 
> > ---
> 
> ...
> 
> > diff --git a/drivers/soc/cavium/cavium_lmc.c 
> > b/drivers/soc/cavium/cavium_lmc.c
> > new file mode 100644
> > index ..87248e83c55b
> > --- /dev/null
> > +++ b/drivers/soc/cavium/cavium_lmc.c
> > @@ -0,0 +1,49 @@
> > +/*
> > + * These PCI devices contain RAS functionality and PMU counters. To allow
> > + * independent RAS and PMU drivers this driver registers for the PCI 
> > devices
> > + * and multi-plexes probe and removal.
> > + *
> > + * This file is subject to the terms and conditions of the GNU General 
> > Public
> > + * License.  See the file "COPYING" in the main directory of this archive
> > + * for more details.
> > + *
> > + * Copyright: Cavium, Inc. (C) 2017
> > + *
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +
> > +static int cvm_lmc_probe(struct pci_dev *pdev,
> > +const struct pci_device_id *ent)
> > +{
> > +   if (IS_ENABLED(CONFIG_EDAC_THUNDERX))
> > +   thunderx_edac_lmc_probe(pdev, ent);
> 
> You could save yourself the if (IS_ENABLED()) here by adding stubs in
> the lmc.h header for those functions for the !CONFIG_EDAC_THUNDERX case.

OK.

> One thing I'm not clear on though, is the design of the whole thing:
> cvm_lmc_probe() probes the EDAC driver during its own probe, which
> means, thunderx_edac needs to be loaded first. And the other things that
> get loaded, do the same.

Yes. That seems to work fine with the limitation of not being
able to have mixed build options, everything needs to be modular or
built-in. So I forced all parts to be buildable only as modules.

I went for this as the simplest solution, the probing is completely
synchronous and no state needs to be stored in the wrapper.

> What I was expecting is those small cavium_lmc.c and cavium_ocx.c
> wrappers to probe and register the respective PCI device and then its
> *users* - EDAC and PMU drivers to go and request the PCI device from
> them:
> 
> cavium_lmc_get_pci_dev()
> cavium_ocx_get_pci_dev()
> 
> and so on. Those will be exported to modules. And the small stubs can
> also be built-in too.

So this is the opposite approach which would be asynchronous. What I
don't understand with that approach:

1. What will trigger probing the edac (or perf) driver part?
   Right now the trigger is the PCI device ID. If the wrapper
   does not call into edac how should we load the ThunderX edac/perf drivers?
   The only option I see is a initcall in edac/perf to look for their devices.

2. The probe & register is _very_ specific to perf/edac and very different.
   The only part that would fit in the wrapper is pci_enable_device().
   So is that what you have in mind?

> This way you can do reference counting and whatever else.
> 
> If the above calls fail, neither EDAC nor PMU will load properly but you
> solve the multiplexing issue by having those wrappers arbitrate access
> to the PCI devices.
> 
> Because right now the wrappers are simply weakly hiding the calls into
> EDAC and that's exactly what I was opposing to.
> 
> Hmmm?

> > +   return 0;
> > +}
> > +
> > +static void cvm_lmc_remove(struct pci_dev *pdev)
> > +{
> > +   if (IS_ENABLED(CONFIG_EDAC_THUNDERX))
> > +   thunderx_edac_lmc_remove(pdev);
> > +}
> > +
> > +static const struct pci_device_id cvm_lmc_pci_table[] = {
> > +   { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa022) },
> 
> { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
> 
> You already have that PCI device id define.

True, will use it.

Thanks for looking at this!

--Jan

> -- 
> Regards/Gruss,
> Boris.
> 
> Good mailing practices for 400: avoid top-posting and trim the reply.


Re: [v4,0/3] Workaround for bus/slot reset on Cavium cn8xxx root ports

2017-09-26 Thread Jan Glauber
On Wed, Sep 20, 2017 at 12:09:12PM -0600, Alex Williamson wrote:
> On Tue, 12 Sep 2017 02:40:49 -0700
> Vadim Lomovtsev <vadim.lomovt...@caviumnetworks.com> wrote:
> 
> > Hi all,
> > 
> > Are there any updates on this ?
> > Comments/objections/acks/nacks ?
> > 
> > WBBR,
> > Vadim
> > 
> > On Fri, Sep 08, 2017 at 10:10:30AM +0200, Jan Glauber wrote:
> > > Using vfio-pci on a combination of cn8xxx and some PCI devices results in
> > > a kernel panic. This is triggered by issuing a bus or a slot reset
> > > on the PCI device.
> > > 
> > > With this series both checks indicate that the reset is not possible
> > > preventing the kernel panic.
> > > 
> > > David Daney (2):
> > >   PCI: Allow PCI_DEV_FLAGS_NO_BUS_RESET to be used on bus device
> > >   PCI: Avoid bus reset for Cavium cn8xxx root ports
> > > 
> > > Jan Glauber (1):
> > >   PCI: Avoid slot reset if bus reset is not possible
> > > 
> > >  drivers/pci/pci.c| 8 
> > >  drivers/pci/quirks.c | 8 
> > >  2 files changed, 16 insertions(+)  
> 
> 
> Looks ok to me, for series:
> 
> Reviewed-by: Alex Williamson <alex.william...@redhat.com>
> 
> I am curious why we're happy targeting this quirk at a single device ID
> while at the same time trying to expand the ACS quirk to a notable
> fraction of the Cavium PCI device ID address space.  Thanks,
> 
> Alex

Bjorn, would you take these patches? 

We might need to extend the quirk to cover more cn8xxx variants,
this is not yet entirely clear on our side.

Therefore I'd like to ask if we could merge this patches now to solve the
long-standing issue for cn88xx.

thanks,
Jan


Re: [v4,0/3] Workaround for bus/slot reset on Cavium cn8xxx root ports

2017-09-26 Thread Jan Glauber
On Wed, Sep 20, 2017 at 12:09:12PM -0600, Alex Williamson wrote:
> On Tue, 12 Sep 2017 02:40:49 -0700
> Vadim Lomovtsev  wrote:
> 
> > Hi all,
> > 
> > Are there any updates on this ?
> > Comments/objections/acks/nacks ?
> > 
> > WBBR,
> > Vadim
> > 
> > On Fri, Sep 08, 2017 at 10:10:30AM +0200, Jan Glauber wrote:
> > > Using vfio-pci on a combination of cn8xxx and some PCI devices results in
> > > a kernel panic. This is triggered by issuing a bus or a slot reset
> > > on the PCI device.
> > > 
> > > With this series both checks indicate that the reset is not possible
> > > preventing the kernel panic.
> > > 
> > > David Daney (2):
> > >   PCI: Allow PCI_DEV_FLAGS_NO_BUS_RESET to be used on bus device
> > >   PCI: Avoid bus reset for Cavium cn8xxx root ports
> > > 
> > > Jan Glauber (1):
> > >   PCI: Avoid slot reset if bus reset is not possible
> > > 
> > >  drivers/pci/pci.c| 8 
> > >  drivers/pci/quirks.c | 8 
> > >  2 files changed, 16 insertions(+)  
> 
> 
> Looks ok to me, for series:
> 
> Reviewed-by: Alex Williamson 
> 
> I am curious why we're happy targeting this quirk at a single device ID
> while at the same time trying to expand the ACS quirk to a notable
> fraction of the Cavium PCI device ID address space.  Thanks,
> 
> Alex

Bjorn, would you take these patches? 

We might need to extend the quirk to cover more cn8xxx variants,
this is not yet entirely clear on our side.

Therefore I'd like to ask if we could merge this patches now to solve the
long-standing issue for cn88xx.

thanks,
Jan


[PATCH v10 2/7] perf: export perf_event_update_userpage()

2017-09-25 Thread Jan Glauber
Export perf_event_update_userpage(). This change is needed to allow
building a PMU driver as a kernel module.

Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ingo Molnar <mi...@redhat.com>
Cc: Arnaldo Carvalho de Melo <a...@kernel.org>

Signed-off-by: Jan Glauber <jglau...@cavium.com>
---
 kernel/events/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6bc21e202ae4..162f5ba756a9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4982,6 +4982,7 @@ void perf_event_update_userpage(struct perf_event *event)
 unlock:
rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(perf_event_update_userpage);
 
 static int perf_mmap_fault(struct vm_fault *vmf)
 {
-- 
2.9.0.rc0.21.g322



[PATCH v10 0/7] Cavium ARM64 uncore PMU support

2017-09-25 Thread Jan Glauber
Add support for various PMU counters found on the Cavium ThunderX and
OcteonTx SoC.

The PMU driver provides common "uncore" functions to avoid code duplication
and support adding more device PMUs (like L2 cache) in the future.

Changes to v9:
- Fix build error in first EDAC patch
- Use alphabetic ordering in some places
- Store pmu name in struct pmu
- Use devm_*
- Remove mapping of LMC events, expose hardware event numbers
  directly
- Simplify event removal

Changes to v8:
- Wrapper for PCI devices

Jan Glauber (7):
  edac: thunderx: Remove suspend/resume support
  perf: export perf_event_update_userpage()
  edac,soc: thunderx: Add wrapper for EDAC LMC PCI device
  edac,soc: thunderx: Add wrapper for EDAC OCX PCI device
  perf: cavium: Support memory controller PMU counters
  perf: cavium: Support transmit-link PMU counters
  perf: cavium: Add Documentation

 Documentation/perf/cavium-pmu.txt |  75 +
 drivers/edac/Kconfig  |   3 +
 drivers/edac/thunderx_edac.c  |  92 +-
 drivers/perf/Kconfig  |  15 +
 drivers/perf/Makefile |   1 +
 drivers/perf/cavium_pmu.c | 660 ++
 drivers/soc/Kconfig   |   1 +
 drivers/soc/Makefile  |   1 +
 drivers/soc/cavium/Kconfig|  14 +
 drivers/soc/cavium/Makefile   |   2 +
 drivers/soc/cavium/cavium_lmc.c   |  53 +++
 drivers/soc/cavium/cavium_ocx.c   |  49 +++
 include/linux/cpuhotplug.h|   1 +
 include/linux/soc/cavium/lmc.h|  12 +
 include/linux/soc/cavium/ocx.h|  12 +
 kernel/events/core.c  |   1 +
 16 files changed, 913 insertions(+), 79 deletions(-)
 create mode 100644 Documentation/perf/cavium-pmu.txt
 create mode 100644 drivers/perf/cavium_pmu.c
 create mode 100644 drivers/soc/cavium/Kconfig
 create mode 100644 drivers/soc/cavium/Makefile
 create mode 100644 drivers/soc/cavium/cavium_lmc.c
 create mode 100644 drivers/soc/cavium/cavium_ocx.c
 create mode 100644 include/linux/soc/cavium/lmc.h
 create mode 100644 include/linux/soc/cavium/ocx.h

-- 
2.9.0.rc0.21.g322



[PATCH v10 2/7] perf: export perf_event_update_userpage()

2017-09-25 Thread Jan Glauber
Export perf_event_update_userpage(). This change is needed to allow
building a PMU driver as a kernel module.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Cc: Arnaldo Carvalho de Melo 

Signed-off-by: Jan Glauber 
---
 kernel/events/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6bc21e202ae4..162f5ba756a9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4982,6 +4982,7 @@ void perf_event_update_userpage(struct perf_event *event)
 unlock:
rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(perf_event_update_userpage);
 
 static int perf_mmap_fault(struct vm_fault *vmf)
 {
-- 
2.9.0.rc0.21.g322



[PATCH v10 0/7] Cavium ARM64 uncore PMU support

2017-09-25 Thread Jan Glauber
Add support for various PMU counters found on the Cavium ThunderX and
OcteonTx SoC.

The PMU driver provides common "uncore" functions to avoid code duplication
and support adding more device PMUs (like L2 cache) in the future.

Changes to v9:
- Fix build error in first EDAC patch
- Use alphabetic ordering in some places
- Store pmu name in struct pmu
- Use devm_*
- Remove mapping of LMC events, expose hardware event numbers
  directly
- Simplify event removal

Changes to v8:
- Wrapper for PCI devices

Jan Glauber (7):
  edac: thunderx: Remove suspend/resume support
  perf: export perf_event_update_userpage()
  edac,soc: thunderx: Add wrapper for EDAC LMC PCI device
  edac,soc: thunderx: Add wrapper for EDAC OCX PCI device
  perf: cavium: Support memory controller PMU counters
  perf: cavium: Support transmit-link PMU counters
  perf: cavium: Add Documentation

 Documentation/perf/cavium-pmu.txt |  75 +
 drivers/edac/Kconfig  |   3 +
 drivers/edac/thunderx_edac.c  |  92 +-
 drivers/perf/Kconfig  |  15 +
 drivers/perf/Makefile |   1 +
 drivers/perf/cavium_pmu.c | 660 ++
 drivers/soc/Kconfig   |   1 +
 drivers/soc/Makefile  |   1 +
 drivers/soc/cavium/Kconfig|  14 +
 drivers/soc/cavium/Makefile   |   2 +
 drivers/soc/cavium/cavium_lmc.c   |  53 +++
 drivers/soc/cavium/cavium_ocx.c   |  49 +++
 include/linux/cpuhotplug.h|   1 +
 include/linux/soc/cavium/lmc.h|  12 +
 include/linux/soc/cavium/ocx.h|  12 +
 kernel/events/core.c  |   1 +
 16 files changed, 913 insertions(+), 79 deletions(-)
 create mode 100644 Documentation/perf/cavium-pmu.txt
 create mode 100644 drivers/perf/cavium_pmu.c
 create mode 100644 drivers/soc/cavium/Kconfig
 create mode 100644 drivers/soc/cavium/Makefile
 create mode 100644 drivers/soc/cavium/cavium_lmc.c
 create mode 100644 drivers/soc/cavium/cavium_ocx.c
 create mode 100644 include/linux/soc/cavium/lmc.h
 create mode 100644 include/linux/soc/cavium/ocx.h

-- 
2.9.0.rc0.21.g322



  1   2   3   4   5   6   7   8   9   10   >