[PATCH v2 1/3] Revert "netfilter: x_tables: Update remaining dereference to RCU"

2021-03-07 Thread Mark Tomlinson
This reverts commit 443d6e86f821a165fae3fc3fc13086d27ac140b1.

This (and the following) patch basically re-implemented the RCU
mechanisms of patch 784544739a25. That patch was replaced because of the
performance problems that it created when replacing tables. Now, we have
the same issue: the call to synchronize_rcu() makes replacing tables
slower by as much as an order of magnitude.

Revert these patches and fix the issue in a different way.

Signed-off-by: Mark Tomlinson 
---
 net/ipv4/netfilter/arp_tables.c | 2 +-
 net/ipv4/netfilter/ip_tables.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c576a63d09db..563b62b76a5f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
 
ret = compat_table_info(private, );
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e8f6f9d86237..6e2851f8d3a3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct 
compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, );
if (!ret && get.size == info.size)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0d453fa9e327..c4f532f4d311 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct 
compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, );
if (!ret && get.size == info.size)
-- 
2.30.1



[PATCH v2 2/3] Revert "netfilter: x_tables: Switch synchronization to RCU"

2021-03-07 Thread Mark Tomlinson
This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c.

This (and the preceding) patch basically re-implemented the RCU
mechanisms of patch 784544739a25. That patch was replaced because of the
performance problems that it created when replacing tables. Now, we have
the same issue: the call to synchronize_rcu() makes replacing tables
slower by as much as an order of magnitude.

Prior to using RCU a script calling "iptables" approx. 200 times was
taking 1.16s. With RCU this increased to 11.59s.

Revert these patches and fix the issue in a different way.

Signed-off-by: Mark Tomlinson 
---
 include/linux/netfilter/x_tables.h |  5 +--
 net/ipv4/netfilter/arp_tables.c| 14 -
 net/ipv4/netfilter/ip_tables.c | 14 -
 net/ipv6/netfilter/ip6_tables.c| 14 -
 net/netfilter/x_tables.c   | 49 +-
 5 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 8ebb64193757..5deb099d156d 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -227,7 +227,7 @@ struct xt_table {
unsigned int valid_hooks;
 
/* Man behind the curtain... */
-   struct xt_table_info __rcu *private;
+   struct xt_table_info *private;
 
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
@@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned 
int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table);
-
 #ifdef CONFIG_COMPAT
 #include 
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 563b62b76a5f..d1e04d2b5170 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
local_bh_disable();
addend = xt_write_recseq_begin();
-   private = rcu_access_pointer(table->private);
+   private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
@@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct 
xt_table *table)
 {
unsigned int countersize;
struct xt_counters *counters;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(table);
+   const struct xt_table_info *private = table->private;
 
/* We need atomic snapshot of counters: rest doesn't change
 * (other than comefrom, which userspace doesn't care
@@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
const struct arpt_entry *e;
struct xt_counters *counters;
-   struct xt_table_info *private = xt_table_get_private_protected(table);
+   struct xt_table_info *private = table->private;
int ret = 0;
void *loc_cpu_entry;
 
@@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, 
const int *len)
t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
if (!IS_ERR(t)) {
struct arpt_getinfo info;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
struct xt_table_info tmp;
 
@@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct 
arpt_get_entries __user *uptr,
 
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
 
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t 
arg, unsigned int len)
}
 
local_bh_disable();
-   private = xt_table_get_private_protected(t);
+   private = t->private;
if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int 
total_size,
   void __user *userptr)
 {
struct xt_counters *counters;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(table);
+   const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6e2851f8d3a3..f15bc21d7301 100644
--- a/net/ipv4/netfilter

[PATCH v2 3/3] netfilter: x_tables: Use correct memory barriers.

2021-03-07 Thread Mark Tomlinson
When a new table value was assigned, it was followed by a write memory
barrier. This ensured that all writes before this point would complete
before any writes after this point. However, to determine whether the
rules are unused, the sequence counter is read. To ensure that all
writes have been done before these reads, a full memory barrier is
needed, not just a write memory barrier. The same argument applies when
incrementing the counter, before the rules are read.

Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic
reported in cc00bcaa5899 (which is still present), while still
maintaining the same speed of replacing tables.

The smb_mb() barriers potentially slow the packet path, however testing
has shown no measurable change in performance on a 4-core MIPS64
platform.

Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path")
Signed-off-by: Mark Tomlinson 
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/netfilter/x_tables.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 5deb099d156d..8ec48466410a 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void)
 * since addend is most likely 1
 */
__this_cpu_add(xt_recseq.sequence, addend);
-   smp_wmb();
+   smp_mb();
 
return addend;
 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index af22dbe85e2c..a2b50596b87e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1387,7 +1387,7 @@ xt_replace_table(struct xt_table *table,
table->private = newinfo;
 
/* make sure all cpus see new ->private value */
-   smp_wmb();
+   smp_mb();
 
/*
 * Even though table entries have now been swapped, other CPU's
-- 
2.30.1



[PATCH v2 0/3] Don't use RCU for x_tables synchronization

2021-03-07 Thread Mark Tomlinson
The patches to change to using RCU synchronization in x_tables cause
updating tables to be slowed down by an order of magnitude. This has
been tried before, see https://lore.kernel.org/patchwork/patch/151796/
and ultimately was rejected. As mentioned in the patch description, a
different method can be used to ensure ordering of reads/writes. This
can simply be done by changing from smp_wmb() to smp_mb().

changes in v2:
- Update commit messages only

Mark Tomlinson (3):
  Revert "netfilter: x_tables: Update remaining dereference to RCU"
  Revert "netfilter: x_tables: Switch synchronization to RCU"
  netfilter: x_tables: Use correct memory barriers.

 include/linux/netfilter/x_tables.h |  7 ++---
 net/ipv4/netfilter/arp_tables.c| 16 +-
 net/ipv4/netfilter/ip_tables.c | 16 +-
 net/ipv6/netfilter/ip6_tables.c| 16 +-
 net/netfilter/x_tables.c   | 49 +-
 5 files changed, 60 insertions(+), 44 deletions(-)

-- 
2.30.1



Re: [PATCH 3/3] netfilter: x_tables: Use correct memory barriers.

2021-03-04 Thread Mark Tomlinson
On Thu, 2021-03-04 at 08:46 +0100, Florian Westphal wrote:
> Mark Tomlinson  wrote:
> > Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic
> > reported in cc00bcaa5899,
> 
> Can you reproduce the crashes without this change?

Yes. In our test environment we were seeing a kernel panic approx. twice
a day, with a similar output to that shown in Subash's patch (cc00bcaa5899).
With this patch we are not seeing any issue. The CPU is a dual-core ARM
Cortex-A9.

> > How much of an impact is the MB change on the packet path?

I will run our throughput tests and get these results.

I have a script which makes around 200 calls to iptables. This was taking
11.59s and now is back to 1.16s.



[PATCH 2/3] Revert "netfilter: x_tables: Switch synchronization to RCU"

2021-03-03 Thread Mark Tomlinson
This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c.

This (and the preceding) patch basically re-implemented the RCU
mechanisms of patch 784544739a25. That patch was replaced because of the
performance problems that it created when replacing tables. Now, we have
the same issue: the call to synchronize_rcu() makes replacing tables
slower by as much as an order of magnitude.

See https://lore.kernel.org/patchwork/patch/151796/ for why using RCU is
not a good idea.

Revert these patches and fix the issue in a different way.

Signed-off-by: Mark Tomlinson 
---
 include/linux/netfilter/x_tables.h |  5 +--
 net/ipv4/netfilter/arp_tables.c| 14 -
 net/ipv4/netfilter/ip_tables.c | 14 -
 net/ipv6/netfilter/ip6_tables.c| 14 -
 net/netfilter/x_tables.c   | 49 +-
 5 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 8ebb64193757..5deb099d156d 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -227,7 +227,7 @@ struct xt_table {
unsigned int valid_hooks;
 
/* Man behind the curtain... */
-   struct xt_table_info __rcu *private;
+   struct xt_table_info *private;
 
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
@@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned 
int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table);
-
 #ifdef CONFIG_COMPAT
 #include 
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 563b62b76a5f..d1e04d2b5170 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
local_bh_disable();
addend = xt_write_recseq_begin();
-   private = rcu_access_pointer(table->private);
+   private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
@@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct 
xt_table *table)
 {
unsigned int countersize;
struct xt_counters *counters;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(table);
+   const struct xt_table_info *private = table->private;
 
/* We need atomic snapshot of counters: rest doesn't change
 * (other than comefrom, which userspace doesn't care
@@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
const struct arpt_entry *e;
struct xt_counters *counters;
-   struct xt_table_info *private = xt_table_get_private_protected(table);
+   struct xt_table_info *private = table->private;
int ret = 0;
void *loc_cpu_entry;
 
@@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, 
const int *len)
t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
if (!IS_ERR(t)) {
struct arpt_getinfo info;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
struct xt_table_info tmp;
 
@@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct 
arpt_get_entries __user *uptr,
 
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
 
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t 
arg, unsigned int len)
}
 
local_bh_disable();
-   private = xt_table_get_private_protected(t);
+   private = t->private;
if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int 
total_size,
   void __user *userptr)
 {
struct xt_counters *counters;
-   const struct xt_table_info *private = 
xt_table_get_private_protected(table);
+   const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6e2851f8d3a3..f15bc21d7301 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4

[PATCH 3/3] netfilter: x_tables: Use correct memory barriers.

2021-03-03 Thread Mark Tomlinson
When a new table value was assigned, it was followed by a write memory
barrier. This ensured that all writes before this point would complete
before any writes after this point. However, to determine whether the
rules are unused, the sequence counter is read. To ensure that all
writes have been done before these reads, a full memory barrier is
needed, not just a write memory barrier. The same argument applies when
incrementing the counter, before the rules are read.

Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic
reported in cc00bcaa5899, while still maintaining the same speed of
replacing tables.

Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path")
Signed-off-by: Mark Tomlinson 
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/netfilter/x_tables.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h 
b/include/linux/netfilter/x_tables.h
index 5deb099d156d..8ec48466410a 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void)
 * since addend is most likely 1
 */
__this_cpu_add(xt_recseq.sequence, addend);
-   smp_wmb();
+   smp_mb();
 
return addend;
 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index af22dbe85e2c..a2b50596b87e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1387,7 +1387,7 @@ xt_replace_table(struct xt_table *table,
table->private = newinfo;
 
/* make sure all cpus see new ->private value */
-   smp_wmb();
+   smp_mb();
 
/*
 * Even though table entries have now been swapped, other CPU's
-- 
2.30.1



[PATCH 1/3] Revert "netfilter: x_tables: Update remaining dereference to RCU"

2021-03-03 Thread Mark Tomlinson
This reverts commit 443d6e86f821a165fae3fc3fc13086d27ac140b1.

This (and the following) patch basically re-implemented the RCU
mechanisms of patch 784544739a25. That patch was replaced because of the
performance problems that it created when replacing tables. Now, we have
the same issue: the call to synchronize_rcu() makes replacing tables
slower by as much as an order of magnitude.

See https://lore.kernel.org/patchwork/patch/151796/ for why using RCU is
not a good idea.

Revert these patches and fix the issue in a different way.

Signed-off-by: Mark Tomlinson 
---
 net/ipv4/netfilter/arp_tables.c | 2 +-
 net/ipv4/netfilter/ip_tables.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c576a63d09db..563b62b76a5f 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
 
ret = compat_table_info(private, );
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e8f6f9d86237..6e2851f8d3a3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct 
compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, );
if (!ret && get.size == info.size)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0d453fa9e327..c4f532f4d311 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct 
compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR(t)) {
-   const struct xt_table_info *private = 
xt_table_get_private_protected(t);
+   const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, );
if (!ret && get.size == info.size)
-- 
2.30.1



[PATCH 0/3] Don't use RCU for x_tables synchronization

2021-03-03 Thread Mark Tomlinson
The patches to change to using RCU synchronization in x_tables cause
updating tables to be slowed down by an order of magnitude. This has
been tried before, see https://lore.kernel.org/patchwork/patch/151796/
and ultimately was rejected. As mentioned in the patch description, a
different method can be used to ensure ordering of reads/writes. This
can simply be done by changing from smp_wmb() to smp_mb().

Mark Tomlinson (3):
  Revert "netfilter: x_tables: Update remaining dereference to RCU"
  Revert "netfilter: x_tables: Switch synchronization to RCU"
  netfilter: x_tables: Use correct memory barriers.

 include/linux/netfilter/x_tables.h |  7 ++---
 net/ipv4/netfilter/arp_tables.c| 16 +-
 net/ipv4/netfilter/ip_tables.c | 16 +-
 net/ipv6/netfilter/ip6_tables.c| 16 +-
 net/netfilter/x_tables.c   | 49 +-
 5 files changed, 60 insertions(+), 44 deletions(-)

-- 
2.30.1



[PATCH] pinctrl: bcm: pinctrl-nsp-gpio: Fix setting GPIO as output

2020-11-16 Thread Mark Tomlinson
When setting a GPIO pin to an output, it is important to set the value
correctly before enabling the output so that a glitch is not seen on the
pin. This glitch may be very short, but can be important if this is a
reset signal.

Fixes: 8bfcbbbcabe0 ("pinctrl: nsp: add gpio-a driver support for Broadcom NSP 
SoC")
Signed-off-by: Mark Tomlinson 
---
 drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c 
b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
index a00a42a61a90..942f04ca4868 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -289,8 +289,8 @@ static int nsp_gpio_direction_output(struct gpio_chip *gc, 
unsigned gpio,
unsigned long flags;
 
raw_spin_lock_irqsave(>lock, flags);
-   nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, true);
nsp_set_bit(chip, REG, NSP_GPIO_DATA_OUT, gpio, !!(val));
+   nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, true);
raw_spin_unlock_irqrestore(>lock, flags);
 
dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val);
-- 
2.29.2



[PATCH] pinctrl: bcm: pinctrl-iproc-gpio: Fix setting GPIO as output

2020-11-16 Thread Mark Tomlinson
When setting a GPIO pin to an output, it is important to set the value
correctly before enabling the output so that a glitch is not seen on the
pin. This glitch may be very short, but can be important if this is a
reset signal.

Fixes: b64333ce769c ("pinctrl: cygnus: add gpio/pinconf driver")
Signed-off-by: Mark Tomlinson 
---
 drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c 
b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index e2bd2dce6bb4..cadcf5eb0466 100644
--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -348,8 +348,8 @@ static int iproc_gpio_direction_output(struct gpio_chip 
*gc, unsigned gpio,
unsigned long flags;
 
raw_spin_lock_irqsave(>lock, flags);
-   iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, true);
iproc_set_bit(chip, IPROC_GPIO_DATA_OUT_OFFSET, gpio, !!(val));
+   iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, true);
raw_spin_unlock_irqrestore(>lock, flags);
 
dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val);
-- 
2.29.2



[PATCH] mtd: mtdoops: Don't write panic data twice

2020-09-02 Thread Mark Tomlinson
If calling mtdoops_write, don't also schedule work to be done later.

Although this appears to not be causing an issue, possibly because the
scheduled work will never get done, it is confusing.

Fixes: 016c1291ce70 ("mtd: mtdoops: do not use mtd->panic_write directly")
Signed-off-by: Mark Tomlinson 
---
 drivers/mtd/mtdoops.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 4ced68be7ed7..774970bfcf85 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -279,12 +279,13 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
 record_size - MTDOOPS_HEADER_SIZE, NULL);
 
-   /* Panics must be written immediately */
-   if (reason != KMSG_DUMP_OOPS)
+   if (reason != KMSG_DUMP_OOPS) {
+   /* Panics must be written immediately */
mtdoops_write(cxt, 1);
-
-   /* For other cases, schedule work to write it "nicely" */
-   schedule_work(>work_write);
+   } else {
+   /* For other cases, schedule work to write it "nicely" */
+   schedule_work(>work_write);
+   }
 }
 
 static void mtdoops_notify_add(struct mtd_info *mtd)
-- 
2.28.0



[PATCH v3] i2c: mv64xxx: Add bus error recovery

2020-08-26 Thread Mark Tomlinson
This adds i2c bus recovery to the mv64xxx driver.

Implement bus recovery to recover from SCL/SDA stuck low.

This uses the generic recovery function, setting the clock/data lines as
GPIO pins, and sending 9 clocks to try and recover the bus.

Signed-off-by: Mark Tomlinson 
---
Changes in v2:
 - use generic GPIO recovery function.

Changes in v3:
 - remove call to i2c_recover_bus() during probe.
 - change error message to info when pinctrl fails, matching other
   similar code.
 - handle a lack of pinctrl information better.

 drivers/i2c/busses/i2c-mv64xxx.c | 28 +++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 8d9d4ffdcd24..5c5f1d797986 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -147,6 +148,7 @@ struct mv64xxx_i2c_data {
boolirq_clear_inverted;
/* Clk div is 2 to the power n, not 2 to the power n + 1 */
boolclk_n_base_0;
+   struct i2c_bus_recovery_inforinfo;
 };
 
 static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = {
@@ -325,7 +327,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 
status)
 drv_data->msg->flags);
drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP;
mv64xxx_i2c_hw_init(drv_data);
-   drv_data->rc = -EIO;
+   i2c_recover_bus(_data->adapter);
+   drv_data->rc = -EAGAIN;
}
 }
 
@@ -562,6 +565,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data 
*drv_data)
"time_left: %d\n", drv_data->block,
(int)time_left);
mv64xxx_i2c_hw_init(drv_data);
+   i2c_recover_bus(_data->adapter);
}
} else
spin_unlock_irqrestore(_data->lock, flags);
@@ -871,6 +875,24 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 }
 #endif /* CONFIG_OF */
 
+static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data,
+ struct device *dev)
+{
+   struct i2c_bus_recovery_info *rinfo = _data->rinfo;
+
+   rinfo->pinctrl = devm_pinctrl_get(dev);
+   if (IS_ERR(rinfo->pinctrl)) {
+   if (PTR_ERR(rinfo->pinctrl) == -EPROBE_DEFER)
+   return -EPROBE_DEFER;
+   dev_info(dev, "can't get pinctrl, bus recovery not 
supported\n");
+   return PTR_ERR(rinfo->pinctrl);
+   } else if (!rinfo->pinctrl)
+   return -ENODEV;
+
+   drv_data->adapter.bus_recovery_info = rinfo;
+   return 0;
+}
+
 static int
 mv64xxx_i2c_probe(struct platform_device *pd)
 {
@@ -927,6 +949,10 @@ mv64xxx_i2c_probe(struct platform_device *pd)
goto exit_reset;
}
 
+   rc = mv64xxx_i2c_init_recovery_info(drv_data, >dev);
+   if (rc == -EPROBE_DEFER)
+   goto exit_reset;
+
drv_data->adapter.dev.parent = >dev;
drv_data->adapter.algo = _i2c_algo;
drv_data->adapter.owner = THIS_MODULE;
-- 
2.28.0



Re: [PATCH v2] i2c: mv64xxx: Add bus error recovery

2020-08-26 Thread Mark Tomlinson
On Tue, 2020-08-25 at 11:26 +0200, Wolfram Sang wrote:
> On Tue, Aug 25, 2020 at 10:52:54AM +1200, Mark Tomlinson wrote:
> 
> This adds i2c bus recovery to the mv64xxx driver.
> 
> Implement bus recovery to recover from SCL/SDA stuck low.
> 
> This uses the generic recovery function, setting the clock/data lines as
> GPIO pins, and sending 9 clocks to try and recover the bus.
> 
> Signed-off-by: Mark Tomlinson 
> ---
> Changes in v2:
>  - use generic GPIO recovery function.
> 
> Thank you for doing that! Glad to see the new helper function works for
> you as well. The initialization code is all well, but I wonder about the
> use of i2c_recover_bus(). Recovery should be tried only if it is
> detected that SDA is pulled low when the bus should be free. So, you
> shouldn't call i2c_recover_bus() in unconditionally in probe(). Can your
> HW detect if SDA is stuck low?

We actually don't need this call, as the bootloader checks and leaves the
bus in a good state. I will remove this from the patch.



[PATCH v2] i2c: mv64xxx: Add bus error recovery

2020-08-24 Thread Mark Tomlinson
This adds i2c bus recovery to the mv64xxx driver.

Implement bus recovery to recover from SCL/SDA stuck low.

This uses the generic recovery function, setting the clock/data lines as
GPIO pins, and sending 9 clocks to try and recover the bus.

Signed-off-by: Mark Tomlinson 
---
Changes in v2:
 - use generic GPIO recovery function.

 drivers/i2c/busses/i2c-mv64xxx.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 8d9d4ffdcd24..e0f0c76c0d3b 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -147,6 +148,7 @@ struct mv64xxx_i2c_data {
boolirq_clear_inverted;
/* Clk div is 2 to the power n, not 2 to the power n + 1 */
boolclk_n_base_0;
+   struct i2c_bus_recovery_inforinfo;
 };
 
 static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = {
@@ -325,7 +327,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 
status)
 drv_data->msg->flags);
drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP;
mv64xxx_i2c_hw_init(drv_data);
-   drv_data->rc = -EIO;
+   i2c_recover_bus(_data->adapter);
+   drv_data->rc = -EAGAIN;
}
 }
 
@@ -562,6 +565,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data 
*drv_data)
"time_left: %d\n", drv_data->block,
(int)time_left);
mv64xxx_i2c_hw_init(drv_data);
+   i2c_recover_bus(_data->adapter);
}
} else
spin_unlock_irqrestore(_data->lock, flags);
@@ -871,6 +875,22 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 }
 #endif /* CONFIG_OF */
 
+static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data,
+ struct device *dev)
+{
+   struct i2c_bus_recovery_info *rinfo = _data->rinfo;
+
+   rinfo->pinctrl = devm_pinctrl_get(dev);
+   if (IS_ERR(rinfo->pinctrl)) {
+   if (PTR_ERR(rinfo->pinctrl) == -EPROBE_DEFER)
+   return -EPROBE_DEFER;
+   dev_err(dev, "can't get pinctrl, bus recovery not supported\n");
+   return PTR_ERR(rinfo->pinctrl);
+   }
+   drv_data->adapter.bus_recovery_info = rinfo;
+   return 0;
+}
+
 static int
 mv64xxx_i2c_probe(struct platform_device *pd)
 {
@@ -927,6 +947,10 @@ mv64xxx_i2c_probe(struct platform_device *pd)
goto exit_reset;
}
 
+   rc = mv64xxx_i2c_init_recovery_info(drv_data, >dev);
+   if (rc == -EPROBE_DEFER)
+   goto exit_reset;
+
drv_data->adapter.dev.parent = >dev;
drv_data->adapter.algo = _i2c_algo;
drv_data->adapter.owner = THIS_MODULE;
@@ -950,6 +974,7 @@ mv64xxx_i2c_probe(struct platform_device *pd)
"mv64xxx: Can't add i2c adapter, rc: %d\n", -rc);
goto exit_free_irq;
}
+   i2c_recover_bus(_data->adapter);
 
return 0;
 
-- 
2.28.0



[PATCH] gre6: Fix reception with IP6_TNL_F_RCV_DSCP_COPY

2020-08-18 Thread Mark Tomlinson
When receiving an IPv4 packet inside an IPv6 GRE packet, and the
IP6_TNL_F_RCV_DSCP_COPY flag is set on the tunnel, the IPv4 header would
get corrupted. This is due to the common ip6_tnl_rcv() function assuming
that the inner header is always IPv6. This patch checks the tunnel
protocol for IPv4 inner packets, but still defaults to IPv6.

Fixes: 308edfdf1563 ("gre6: Cleanup GREv6 receive path, call common GRE 
functions")
Signed-off-by: Mark Tomlinson 
---
 net/ipv6/ip6_tunnel.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f635914f42ec..a0217e5bf3bc 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -915,7 +915,15 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst,
bool log_ecn_err)
 {
-   return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
+   int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
+   const struct ipv6hdr *ipv6h,
+   struct sk_buff *skb);
+
+   dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate;
+   if (tpi->proto == htons(ETH_P_IP))
+   dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate;
+
+   return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 log_ecn_err);
 }
 EXPORT_SYMBOL(ip6_tnl_rcv);
-- 
2.28.0



[PATCH v4] PCI: Reduce warnings on possible RW1C corruption

2020-08-05 Thread Mark Tomlinson
For hardware that only supports 32-bit writes to PCI there is the
possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited
messages was introduced by fb2659230120, but rate-limiting is not the
best choice here. Some devices may not show the warnings they should if
another device has just produced a bunch of warnings. Also, the number
of messages can be a nuisance on devices which are otherwise working
fine.

This patch changes the ratelimit to a single warning per bus. This
ensures no bus is 'starved' of emitting a warning and also that there
isn't a continuous stream of warnings. It would be preferable to have a
warning per device, but the pci_dev structure is not available here, and
a lookup from devfn would be far too slow.

Suggested-by: Bjorn Helgaas 
Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit 
config writes")
Signed-off-by: Mark Tomlinson 
---
changes in v4:
 - Use bitfield rather than bool to save memory (was meant to be in v3).

 drivers/pci/access.c | 9 ++---
 include/linux/pci.h  | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 79c4a2ef269a..b452467fd133 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, 
unsigned int devfn,
 * write happen to have any RW1C (write-one-to-clear) bits set, we
 * just inadvertently cleared something we shouldn't have.
 */
-   dev_warn_ratelimited(>dev, "%d-byte config write to 
%04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
-size, pci_domain_nr(bus), bus->number,
-PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   if (!bus->unsafe_warn) {
+   dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d 
offset %#x may corrupt adjacent RW1C bits\n",
+size, pci_domain_nr(bus), bus->number,
+PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   bus->unsafe_warn = 1;
+   }
 
mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
tmp = readl(addr) & mask;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 34c1c4f45288..85211a787f8b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -626,6 +626,7 @@ struct pci_bus {
struct bin_attribute*legacy_io; /* Legacy I/O for this bus */
struct bin_attribute*legacy_mem;/* Legacy mem */
unsigned intis_added:1;
+   unsigned intunsafe_warn:1;  /* warned about RW1C config 
write */
 };
 
 #define to_pci_bus(n)  container_of(n, struct pci_bus, dev)
-- 
2.28.0



[PATCH] RTC: Implement pretimeout watchdog for DS1307

2020-08-03 Thread Mark Tomlinson
If the hardware watchdog in the clock chip simply pulls the reset line
of the CPU, then there is no chance to write a stack trace to help
determine what may have been blocking the CPU.

This patch adds a pretimeout to the watchdog, which, if enabled, sets
a timer to go off before the hardware watchdog kicks in, and calls
the standard pretimeout function, which can (for example) call panic.

Signed-off-by: Mark Tomlinson 
---
 drivers/rtc/rtc-ds1307.c | 35 ++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 49702942bb08..647f8659d0bd 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * We can't determine type by probing, but if we expect pre-Linux code
@@ -174,6 +175,10 @@ struct ds1307 {
 #ifdef CONFIG_COMMON_CLK
struct clk_hw   clks[2];
 #endif
+#ifdef CONFIG_WATCHDOG_CORE
+   struct timer_list   soft_timer;
+   struct watchdog_device  *wdt;
+#endif
 };
 
 struct chip_desc {
@@ -863,12 +868,34 @@ static int m41txx_rtc_set_offset(struct device *dev, long 
offset)
 }
 
 #ifdef CONFIG_WATCHDOG_CORE
+static void ds1388_soft_wdt_expire(struct timer_list *soft_timer)
+{
+   struct ds1307 *ds1307 = container_of(soft_timer, struct ds1307, 
soft_timer);
+
+   watchdog_notify_pretimeout(ds1307->wdt);
+}
+
+static void ds1388_soft_timer_set(struct watchdog_device *wdt_dev)
+{
+   struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev);
+   int soft_timeout;
+
+   if (wdt_dev->pretimeout > 0) {
+   soft_timeout = wdt_dev->timeout - wdt_dev->pretimeout;
+   mod_timer(>soft_timer, jiffies + soft_timeout * HZ);
+   } else {
+   del_timer(>soft_timer);
+   }
+}
+
 static int ds1388_wdt_start(struct watchdog_device *wdt_dev)
 {
struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev);
u8 regs[2];
int ret;
 
+   ds1388_soft_timer_set(wdt_dev);
+
ret = regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG,
 DS1388_BIT_WF, 0);
if (ret)
@@ -900,6 +927,7 @@ static int ds1388_wdt_stop(struct watchdog_device *wdt_dev)
 {
struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev);
 
+   del_timer(>soft_timer);
return regmap_update_bits(ds1307->regmap, DS1388_REG_CONTROL,
  DS1388_BIT_WDE | DS1388_BIT_RST, 0);
 }
@@ -909,6 +937,7 @@ static int ds1388_wdt_ping(struct watchdog_device *wdt_dev)
struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev);
u8 regs[2];
 
+   ds1388_soft_timer_set(wdt_dev);
return regmap_bulk_read(ds1307->regmap, DS1388_REG_WDOG_HUN_SECS, regs,
sizeof(regs));
 }
@@ -923,6 +952,7 @@ static int ds1388_wdt_set_timeout(struct watchdog_device 
*wdt_dev,
regs[0] = 0;
regs[1] = bin2bcd(wdt_dev->timeout);
 
+   ds1388_soft_timer_set(wdt_dev);
return regmap_bulk_write(ds1307->regmap, DS1388_REG_WDOG_HUN_SECS, regs,
 sizeof(regs));
 }
@@ -1652,7 +1682,8 @@ static void ds1307_clks_register(struct ds1307 *ds1307)
 
 #ifdef CONFIG_WATCHDOG_CORE
 static const struct watchdog_info ds1388_wdt_info = {
-   .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+   .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+  WDIOF_MAGICCLOSE | WDIOF_PRETIMEOUT,
.identity = "DS1388 watchdog",
 };
 
@@ -1681,6 +1712,8 @@ static void ds1307_wdt_register(struct ds1307 *ds1307)
wdt->timeout = 99;
wdt->max_timeout = 99;
wdt->min_timeout = 1;
+   ds1307->wdt = wdt;
+   timer_setup(>soft_timer, ds1388_soft_wdt_expire, 0);
 
watchdog_init_timeout(wdt, 0, ds1307->dev);
watchdog_set_drvdata(wdt, ds1307);
-- 
2.28.0



[PATCH v3 2/2] PCI: Reduce warnings on possible RW1C corruption

2020-08-02 Thread Mark Tomlinson
For hardware that only supports 32-bit writes to PCI there is the
possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited
messages was introduced by fb2659230120, but rate-limiting is not the
best choice here. Some devices may not show the warnings they should if
another device has just produced a bunch of warnings. Also, the number
of messages can be a nuisance on devices which are otherwise working
fine.

This patch changes the ratelimit to a single warning per bus. This
ensures no bus is 'starved' of emitting a warning and also that there
isn't a continuous stream of warnings. It would be preferable to have a
warning per device, but the pci_dev structure is not available here, and
a lookup from devfn would be far too slow.

Suggested-by: Bjorn Helgaas 
Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit 
config writes")
Signed-off-by: Mark Tomlinson 
---
 drivers/pci/access.c | 9 ++---
 include/linux/pci.h  | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 79c4a2ef269a..ab85cb7df9b6 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, 
unsigned int devfn,
 * write happen to have any RW1C (write-one-to-clear) bits set, we
 * just inadvertently cleared something we shouldn't have.
 */
-   dev_warn_ratelimited(>dev, "%d-byte config write to 
%04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
-size, pci_domain_nr(bus), bus->number,
-PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   if (!bus->unsafe_warn) {
+   dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d 
offset %#x may corrupt adjacent RW1C bits\n",
+size, pci_domain_nr(bus), bus->number,
+PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   bus->unsafe_warn = true;
+   }
 
mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
tmp = readl(addr) & mask;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 34c1c4f45288..5b6ab593ae09 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -613,6 +613,7 @@ struct pci_bus {
unsigned char   primary;/* Number of primary bridge */
unsigned char   max_bus_speed;  /* enum pci_bus_speed */
unsigned char   cur_bus_speed;  /* enum pci_bus_speed */
+   boolunsafe_warn;/* warned about RW1C config write */
 #ifdef CONFIG_PCI_DOMAINS_GENERIC
int domain_nr;
 #endif
-- 
2.28.0



[PATCH v3 1/2] PCI: iproc: Set affinity mask on MSI interrupts

2020-08-02 Thread Mark Tomlinson
The core interrupt code expects the irq_set_affinity call to update the
effective affinity for the interrupt. This was not being done, so update
iproc_msi_irq_set_affinity() to do so.

Fixes: 3bc2b2348835 ("PCI: iproc: Add iProc PCIe MSI support")
Signed-off-by: Mark Tomlinson 
---
changes in v2:
 - Patch 1/2 Added Fixes tag
 - Patch 2/2 Replace original change with change suggested by Bjorn
   Helgaas.

changes in v3:
 - Use bitfield rather than bool to save memory.

 drivers/pci/controller/pcie-iproc-msi.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc-msi.c 
b/drivers/pci/controller/pcie-iproc-msi.c
index 3176ad3ab0e5..908475d27e0e 100644
--- a/drivers/pci/controller/pcie-iproc-msi.c
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data 
*data,
struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
int target_cpu = cpumask_first(mask);
int curr_cpu;
+   int ret;
 
curr_cpu = hwirq_to_cpu(msi, data->hwirq);
if (curr_cpu == target_cpu)
-   return IRQ_SET_MASK_OK_DONE;
+   ret = IRQ_SET_MASK_OK_DONE;
+   else {
+   /* steer MSI to the target CPU */
+   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + 
target_cpu;
+   ret = IRQ_SET_MASK_OK;
+   }
 
-   /* steer MSI to the target CPU */
-   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu;
+   irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
 
-   return IRQ_SET_MASK_OK;
+   return ret;
 }
 
 static void iproc_msi_irq_compose_msi_msg(struct irq_data *data,
-- 
2.28.0



Re: [PATCH v2 2/2] PCI: Reduce warnings on possible RW1C corruption

2020-08-02 Thread Mark Tomlinson
On Fri, 2020-07-31 at 09:32 -0600, Rob Herring wrote:
> 
> If we don't want to just warn when a 8 or 16 bit access occurs (I'm
> not sure if 32-bit only accesses is possible or common. Seems like
> PCI_COMMAND would always get written?), then a simple way to do this
> is just move this out of line and do something like this where the bus
> or device is created/registered:
> 
> if (bus->ops->write == pci_generic_config_write32)
> warn()
> 
This doesn't work for many of the PCI drivers, since they wrap the call
to pci_generic_config_write32() in their own function.

> > 
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 34c1c4f45288..5b6ab593ae09 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -613,6 +613,7 @@ struct pci_bus {
> > unsigned char   primary;/* Number of primary bridge */
> > unsigned char   max_bus_speed;  /* enum pci_bus_speed */
> > unsigned char   cur_bus_speed;  /* enum pci_bus_speed */
> > +   boolunsafe_warn;/* warned about RW1C config write */
> 
> Make this a bitfield next to 'is_added'.

Will do, thanks.




[PATCH v2 2/2] PCI: Reduce warnings on possible RW1C corruption

2020-07-30 Thread Mark Tomlinson
For hardware that only supports 32-bit writes to PCI there is the
possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited
messages was introduced by fb2659230120, but rate-limiting is not the
best choice here. Some devices may not show the warnings they should if
another device has just produced a bunch of warnings. Also, the number
of messages can be a nuisance on devices which are otherwise working
fine.

This patch changes the ratelimit to a single warning per bus. This
ensures no bus is 'starved' of emitting a warning and also that there
isn't a continuous stream of warnings. It would be preferable to have a
warning per device, but the pci_dev structure is not available here, and
a lookup from devfn would be far too slow.

Suggested-by: Bjorn Helgaas 
Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit 
config writes")
Signed-off-by: Mark Tomlinson 
---
 drivers/pci/access.c | 9 ++---
 include/linux/pci.h  | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 79c4a2ef269a..ab85cb7df9b6 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, 
unsigned int devfn,
 * write happen to have any RW1C (write-one-to-clear) bits set, we
 * just inadvertently cleared something we shouldn't have.
 */
-   dev_warn_ratelimited(>dev, "%d-byte config write to 
%04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
-size, pci_domain_nr(bus), bus->number,
-PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   if (!bus->unsafe_warn) {
+   dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d 
offset %#x may corrupt adjacent RW1C bits\n",
+size, pci_domain_nr(bus), bus->number,
+PCI_SLOT(devfn), PCI_FUNC(devfn), where);
+   bus->unsafe_warn = true;
+   }
 
mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8));
tmp = readl(addr) & mask;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 34c1c4f45288..5b6ab593ae09 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -613,6 +613,7 @@ struct pci_bus {
unsigned char   primary;/* Number of primary bridge */
unsigned char   max_bus_speed;  /* enum pci_bus_speed */
unsigned char   cur_bus_speed;  /* enum pci_bus_speed */
+   boolunsafe_warn;/* warned about RW1C config write */
 #ifdef CONFIG_PCI_DOMAINS_GENERIC
int domain_nr;
 #endif
-- 
2.28.0



[PATCH v2 1/2] PCI: iproc: Set affinity mask on MSI interrupts

2020-07-30 Thread Mark Tomlinson
The core interrupt code expects the irq_set_affinity call to update the
effective affinity for the interrupt. This was not being done, so update
iproc_msi_irq_set_affinity() to do so.

Fixes: 3bc2b2348835 ("PCI: iproc: Add iProc PCIe MSI support")
Signed-off-by: Mark Tomlinson 
---
changes in v2:
 - Patch 1/2 Added Fixes tag
 - Patch 2/2 Replace original change with change suggested by Bjorn
   Helgaas.

 drivers/pci/controller/pcie-iproc-msi.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc-msi.c 
b/drivers/pci/controller/pcie-iproc-msi.c
index 3176ad3ab0e5..908475d27e0e 100644
--- a/drivers/pci/controller/pcie-iproc-msi.c
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data 
*data,
struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
int target_cpu = cpumask_first(mask);
int curr_cpu;
+   int ret;
 
curr_cpu = hwirq_to_cpu(msi, data->hwirq);
if (curr_cpu == target_cpu)
-   return IRQ_SET_MASK_OK_DONE;
+   ret = IRQ_SET_MASK_OK_DONE;
+   else {
+   /* steer MSI to the target CPU */
+   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + 
target_cpu;
+   ret = IRQ_SET_MASK_OK;
+   }
 
-   /* steer MSI to the target CPU */
-   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu;
+   irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
 
-   return IRQ_SET_MASK_OK;
+   return ret;
 }
 
 static void iproc_msi_irq_compose_msi_msg(struct irq_data *data,
-- 
2.28.0



Re: [PATCH 2/3] PCI: iproc: Stop using generic config read/write functions

2020-07-30 Thread Mark Tomlinson
On Thu, 2020-07-30 at 11:09 -0500, Bjorn Helgaas wrote:
> I think it would be better to have a warning once per device, so if
> XYZ device has a problem and we look at the dmesg log, we would find a
> single message for device XYZ as a hint.  Would that reduce the
> nuisance level enough?

We would be OK with that.

> So I think I did it wrong in fb2659230120 ("PCI: Warn on possible RW1C
> corruption for sub-32 bit config writes").  Ratelimiting is the wrong
> concept because what we want is a single warning per device, not a
> limit on the similar messages for *all* devices, maybe something like
> this:
> 
> diff --git a/drivers/pci/access.c b/drivers/pci/access.c
> index 79c4a2ef269a..e5f956b7e3b7 100644
> --- a/drivers/pci/access.c
> +++ b/drivers/pci/access.c
> @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, 
> unsigned int devfn,
>* write happen to have any RW1C (write-one-to-clear) bits set, we
>* just inadvertently cleared something we shouldn't have.
>*/
> - dev_warn_ratelimited(>dev, "%d-byte config write to 
> %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n",
> + if (!(bus->unsafe_warn & (1 << devfn))) {
> + dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d 
> offset %#x may corrupt adjacent RW1C bits\n",
>size, pci_domain_nr(bus), bus->number,
>PCI_SLOT(devfn), PCI_FUNC(devfn), where);
> + bus->unsafe_warn |= 1 << devfn;
> + }

As I understand it, devfn is an 8-bit value with five bits of device
and three bits of function. So (1 << devfn) is not going to fit in an
8-bit mask. Am I missing something here? (I do admit that my PCI
knowledge is not great).




[PATCH 2/3] PCI: iproc: Stop using generic config read/write functions

2020-07-29 Thread Mark Tomlinson
The pci_generic_config_write32() function will give warning messages
whenever writing less than 4 bytes at a time. As there is nothing we can
do about this without changing the hardware, the message is just a
nuisance. So instead of using the generic functions, use the functions
that have already been written for reading/writing the config registers.

Signed-off-by: Mark Tomlinson 
---
 drivers/pci/controller/pcie-iproc.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc.c 
b/drivers/pci/controller/pcie-iproc.c
index 2c836eede42c..68ecd3050529 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -709,12 +709,13 @@ static int iproc_pcie_config_read32(struct pci_bus *bus, 
unsigned int devfn,
 {
int ret;
struct iproc_pcie *pcie = iproc_data(bus);
+   int busno = bus->number;
 
iproc_pcie_apb_err_disable(bus, true);
if (pcie->iproc_cfg_read)
ret = iproc_pcie_config_read(bus, devfn, where, size, val);
else
-   ret = pci_generic_config_read32(bus, devfn, where, size, val);
+   ret = iproc_pci_raw_config_read32(pcie, busno, devfn, where, 
size, val);
iproc_pcie_apb_err_disable(bus, false);
 
return ret;
@@ -724,9 +725,11 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, 
unsigned int devfn,
 int where, int size, u32 val)
 {
int ret;
+   struct iproc_pcie *pcie = iproc_data(bus);
+   int busno = bus->number;
 
iproc_pcie_apb_err_disable(bus, true);
-   ret = pci_generic_config_write32(bus, devfn, where, size, val);
+   ret = iproc_pci_raw_config_write32(pcie, busno, devfn, where, size, 
val);
iproc_pcie_apb_err_disable(bus, false);
 
return ret;
-- 
2.28.0



[PATCH 3/3] PCI: iproc: Set affinity mask on MSI interrupts

2020-07-29 Thread Mark Tomlinson
The core interrupt code expects the irq_set_affinity call to update the
effective affinity for the interrupt. This was not being done, so update
iproc_msi_irq_set_affinity() to do so.

Signed-off-by: Mark Tomlinson 
---
 drivers/pci/controller/pcie-iproc-msi.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc-msi.c 
b/drivers/pci/controller/pcie-iproc-msi.c
index 3176ad3ab0e5..908475d27e0e 100644
--- a/drivers/pci/controller/pcie-iproc-msi.c
+++ b/drivers/pci/controller/pcie-iproc-msi.c
@@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data 
*data,
struct iproc_msi *msi = irq_data_get_irq_chip_data(data);
int target_cpu = cpumask_first(mask);
int curr_cpu;
+   int ret;
 
curr_cpu = hwirq_to_cpu(msi, data->hwirq);
if (curr_cpu == target_cpu)
-   return IRQ_SET_MASK_OK_DONE;
+   ret = IRQ_SET_MASK_OK_DONE;
+   else {
+   /* steer MSI to the target CPU */
+   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + 
target_cpu;
+   ret = IRQ_SET_MASK_OK;
+   }
 
-   /* steer MSI to the target CPU */
-   data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu;
+   irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
 
-   return IRQ_SET_MASK_OK;
+   return ret;
 }
 
 static void iproc_msi_irq_compose_msi_msg(struct irq_data *data,
-- 
2.28.0



[PATCH 1/3] PCI: iproc: Add bus number parameter to read/write functions

2020-07-29 Thread Mark Tomlinson
This makes the read/write functions more generic, allowing them to be
used from other places.

Signed-off-by: Mark Tomlinson 
---
 drivers/pci/controller/pcie-iproc.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/controller/pcie-iproc.c 
b/drivers/pci/controller/pcie-iproc.c
index 8c7f875acf7f..2c836eede42c 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -660,13 +660,13 @@ static void __iomem *iproc_pcie_bus_map_cfg_bus(struct 
pci_bus *bus,
  where);
 }
 
-static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie,
+static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie, int busno,
   unsigned int devfn, int where,
   int size, u32 *val)
 {
void __iomem *addr;
 
-   addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3);
+   addr = iproc_pcie_map_cfg_bus(pcie, busno, devfn, where & ~0x3);
if (!addr) {
*val = ~0;
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -680,14 +680,14 @@ static int iproc_pci_raw_config_read32(struct iproc_pcie 
*pcie,
return PCIBIOS_SUCCESSFUL;
 }
 
-static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie,
+static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie, int busno,
unsigned int devfn, int where,
int size, u32 val)
 {
void __iomem *addr;
u32 mask, tmp;
 
-   addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3);
+   addr = iproc_pcie_map_cfg_bus(pcie, busno, devfn, where & ~0x3);
if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -793,7 +793,7 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie)
}
 
/* make sure we are not in EP mode */
-   iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, _type);
+   iproc_pci_raw_config_read32(pcie, 0, 0, PCI_HEADER_TYPE, 1, _type);
if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) {
dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type);
return -EFAULT;
@@ -803,15 +803,16 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie)
 #define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c
 #define PCI_CLASS_BRIDGE_MASK  0x00
 #define PCI_CLASS_BRIDGE_SHIFT 8
-   iproc_pci_raw_config_read32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
+   iproc_pci_raw_config_read32(pcie, 0, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
4, );
class &= ~PCI_CLASS_BRIDGE_MASK;
class |= (PCI_CLASS_BRIDGE_PCI << PCI_CLASS_BRIDGE_SHIFT);
-   iproc_pci_raw_config_write32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
+   iproc_pci_raw_config_write32(pcie, 0, 0, PCI_BRIDGE_CTRL_REG_OFFSET,
 4, class);
 
/* check link status to see if link is active */
-   iproc_pci_raw_config_read32(pcie, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
+   iproc_pci_raw_config_read32(pcie, 0, 0,
+   IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
2, _status);
if (link_status & PCI_EXP_LNKSTA_NLW)
link_is_active = true;
@@ -821,19 +822,19 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie)
 #define PCI_TARGET_LINK_SPEED_MASK 0xf
 #define PCI_TARGET_LINK_SPEED_GEN2 0x2
 #define PCI_TARGET_LINK_SPEED_GEN1 0x1
-   iproc_pci_raw_config_read32(pcie, 0,
+   iproc_pci_raw_config_read32(pcie, 0, 0,
IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
4, _ctrl);
if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) ==
PCI_TARGET_LINK_SPEED_GEN2) {
link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK;
link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1;
-   iproc_pci_raw_config_write32(pcie, 0,
+   iproc_pci_raw_config_write32(pcie, 0, 0,
IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2,
4, link_ctrl);
msleep(100);
 
-   iproc_pci_raw_config_read32(pcie, 0,
+   iproc_pci_raw_config_read32(pcie, 0, 0,
IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA,
2, _status);
if (link_status & PCI_EXP_LNKSTA_NLW)
-- 
2.28.0



[PATCH] ipv6: Support more than 32 MIFS

2020-07-09 Thread Mark Tomlinson
The function ip6mr_mfc_add() declared an array of ttls. If MAXMIFS is
large, this would create a large stack frame. This is fixed, and made
more efficient, by passing mf6cc_ifset to ip6mr_update_thresholds().

Signed-off-by: Mark Tomlinson 
---

As background to this patch, we have MAXMIFS set to 1025 in our kernel.
This creates other issues apart from what this patch fixes, but this
change does make the IPv4 and IPv6 code look more similar, and reduces
total amount of code. Without the double handling of TTLs, I think it is
also easier to understand. Hence I thought it could still become part of
the main kernel.

 net/ipv6/ip6mr.c | 20 ++--
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1f4d20e97c07..7123849d201b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -836,7 +836,7 @@ static void ipmr_expire_process(struct timer_list *t)
 
 static void ip6mr_update_thresholds(struct mr_table *mrt,
struct mr_mfc *cache,
-   unsigned char *ttls)
+   struct if_set *ifset)
 {
int vifi;
 
@@ -845,9 +845,8 @@ static void ip6mr_update_thresholds(struct mr_table *mrt,
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-   if (VIF_EXISTS(mrt, vifi) &&
-   ttls[vifi] && ttls[vifi] < 255) {
-   cache->mfc_un.res.ttls[vifi] = ttls[vifi];
+   if (VIF_EXISTS(mrt, vifi) && IF_ISSET(vifi, ifset)) {
+   cache->mfc_un.res.ttls[vifi] = 1;
if (cache->mfc_un.res.minvif > vifi)
cache->mfc_un.res.minvif = vifi;
if (cache->mfc_un.res.maxvif <= vifi)
@@ -1406,21 +1405,14 @@ void ip6_mr_cleanup(void)
 static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 struct mf6cctl *mfc, int mrtsock, int parent)
 {
-   unsigned char ttls[MAXMIFS];
struct mfc6_cache *uc, *c;
struct mr_mfc *_uc;
bool found;
-   int i, err;
+   int err;
 
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
 
-   memset(ttls, 255, MAXMIFS);
-   for (i = 0; i < MAXMIFS; i++) {
-   if (IF_ISSET(i, >mf6cc_ifset))
-   ttls[i] = 1;
-   }
-
/* The entries are added/deleted only under RTNL */
rcu_read_lock();
c = ip6mr_cache_find_parent(mrt, >mf6cc_origin.sin6_addr,
@@ -1429,7 +1421,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table 
*mrt,
if (c) {
write_lock_bh(_lock);
c->_c.mfc_parent = mfc->mf6cc_parent;
-   ip6mr_update_thresholds(mrt, >_c, ttls);
+   ip6mr_update_thresholds(mrt, >_c, >mf6cc_ifset);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(_lock);
@@ -1450,7 +1442,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table 
*mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
c->_c.mfc_parent = mfc->mf6cc_parent;
-   ip6mr_update_thresholds(mrt, >_c, ttls);
+   ip6mr_update_thresholds(mrt, >_c, >mf6cc_ifset);
if (!mrtsock)
c->_c.mfc_flags |= MFC_STATIC;
 
-- 
2.27.0



[PATCH] i2c: mv64xxx: Add bus error recovery

2020-07-07 Thread Mark Tomlinson
This adds i2c bus recovery to the mv64xxx driver.

Implement bus recovery to recover from SCL/SDA stuck low.

This uses the generic recovery function, setting the clock/data lines as
GPIO pins, and sending 9 clocks to try and recover the bus.

Signed-off-by: Mark Tomlinson 
---
 drivers/i2c/busses/i2c-mv64xxx.c | 77 +++-
 1 file changed, 76 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 829b8c98ae51..e58853ba3ef0 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -147,6 +148,10 @@ struct mv64xxx_i2c_data {
boolirq_clear_inverted;
/* Clk div is 2 to the power n, not 2 to the power n + 1 */
boolclk_n_base_0;
+   struct pinctrl  *pinctrl;
+   struct i2c_bus_recovery_inforinfo;
+   struct pinctrl_state*pin_default_state;
+   struct pinctrl_state*pin_gpio_state;
 };
 
 static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = {
@@ -325,7 +330,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 
status)
 drv_data->msg->flags);
drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP;
mv64xxx_i2c_hw_init(drv_data);
-   drv_data->rc = -EIO;
+   i2c_recover_bus(_data->adapter);
+   drv_data->rc = -EAGAIN;
}
 }
 
@@ -563,6 +569,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data 
*drv_data)
"time_left: %d\n", drv_data->block,
(int)time_left);
mv64xxx_i2c_hw_init(drv_data);
+   i2c_recover_bus(_data->adapter);
}
} else
spin_unlock_irqrestore(_data->lock, flags);
@@ -872,6 +879,69 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 }
 #endif /* CONFIG_OF */
 
+/*
+ * Switch to bit bang mode to prepare for i2c generic recovery.
+ */
+static void mv64xxx_i2c_prepare_recovery(struct i2c_adapter *adap)
+{
+   struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap);
+
+   pinctrl_select_state(drv_data->pinctrl, drv_data->pin_gpio_state);
+}
+
+/*
+ * Return to normal i2c operation following recovery.
+ */
+static void mv64xxx_i2c_unprepare_recovery(struct i2c_adapter *adap)
+{
+   struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap);
+
+   pinctrl_select_state(drv_data->pinctrl, drv_data->pin_default_state);
+}
+
+static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data,
+ struct platform_device *pd)
+{
+   struct i2c_bus_recovery_info *rinfo = _data->rinfo;
+   struct device *dev = >dev;
+
+   drv_data->pinctrl = devm_pinctrl_get(dev);
+   if (!drv_data->pinctrl || IS_ERR(drv_data->pinctrl)) {
+   dev_err(dev, "can't get pinctrl, bus recovery not supported\n");
+   return PTR_ERR(drv_data->pinctrl);
+   }
+
+   drv_data->pin_default_state = pinctrl_lookup_state(drv_data->pinctrl,
+   PINCTRL_STATE_DEFAULT);
+   drv_data->pin_gpio_state = pinctrl_lookup_state(drv_data->pinctrl,
+   "gpio");
+   rinfo->scl_gpiod = devm_gpiod_get(dev, "scl",
+ GPIOD_OUT_HIGH_OPEN_DRAIN);
+   rinfo->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN);
+   if (PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER ||
+   PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER)
+   return -EPROBE_DEFER;
+
+   if (IS_ERR(rinfo->sda_gpiod) ||
+   IS_ERR(rinfo->scl_gpiod) ||
+   IS_ERR(drv_data->pin_default_state) ||
+   IS_ERR(drv_data->pin_gpio_state)) {
+   dev_dbg(dev, "recovery information incomplete\n");
+   return 0;
+   }
+
+   dev_dbg(dev, "using scl-gpio %d and sda-gpio %d for recovery\n",
+   rinfo->scl_gpiod ? desc_to_gpio(rinfo->scl_gpiod) : -1,
+   rinfo->sda_gpiod ? desc_to_gpio(rinfo->sda_gpiod) : -1);
+
+   rinfo->prepare_recovery = mv64xxx_i2c_prepare_recovery;
+   rinfo->unprepare_recovery = mv64xxx_i2c_unprepare_recovery;
+   rinfo->recover_bus = i2c_generic_scl_recovery;
+   drv_data->adapter.bus_recovery_info = rinfo;
+
+   return 0;
+}
+
 static int
 mv64xxx_i2c_probe(struct platform_device *pd)
 {
@@ -939,6 +1009,10 @@ mv64xxx_i2c_probe(struct platform_device *pd)
 
mv64xxx_i2c_hw_init(drv_data);
 
+   rc = mv64xxx_i2c_init_recovery_info(drv_data, pd);
+   if (rc == -EPROBE_DEFER)
+   goto exit_reset

[PATCH v2] pinctrl: nsp: Set irq handler based on trig type

2020-07-02 Thread Mark Tomlinson
Rather than always using handle_simple_irq() as the gpio_irq_chip
handler, set a more appropriate handler based on the IRQ trigger type
requested. This is important for level triggered interrupts which need
to be masked during handling. Also, fix the interrupt acknowledge so
that it clears only one interrupt instead of all interrupts which are
currently active. Finally there is no need to clear the interrupt during
the interrupt handler, since the edge-triggered handler will do that for
us.

Signed-off-by: Mark Tomlinson 
---
Changes in v2:
- Don't perform unnecessary acks.

 drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c 
b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
index bed0124388c0..a00a42a61a90 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -154,15 +154,9 @@ static irqreturn_t nsp_gpio_irq_handler(int irq, void 
*data)
level &= readl(chip->base + NSP_GPIO_INT_MASK);
int_bits = level | event;
 
-   for_each_set_bit(bit, _bits, gc->ngpio) {
-   /*
-* Clear the interrupt before invoking the
-* handler, so we do not leave any window
-*/
-   writel(BIT(bit), chip->base + NSP_GPIO_EVENT);
+   for_each_set_bit(bit, _bits, gc->ngpio)
generic_handle_irq(
irq_linear_revmap(gc->irq.domain, bit));
-   }
}
 
return  int_bits ? IRQ_HANDLED : IRQ_NONE;
@@ -178,7 +172,7 @@ static void nsp_gpio_irq_ack(struct irq_data *d)
 
trigger_type = irq_get_trigger_type(d->irq);
if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
-   nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
+   writel(val, chip->base + NSP_GPIO_EVENT);
 }
 
 /*
@@ -262,6 +256,12 @@ static int nsp_gpio_irq_set_type(struct irq_data *d, 
unsigned int type)
 
nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio, falling);
nsp_set_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio, level_low);
+
+   if (type & IRQ_TYPE_EDGE_BOTH)
+   irq_set_handler_locked(d, handle_edge_irq);
+   else
+   irq_set_handler_locked(d, handle_level_irq);
+
raw_spin_unlock_irqrestore(>lock, flags);
 
dev_dbg(chip->dev, "gpio:%u level_low:%s falling:%s\n", gpio,
@@ -691,7 +691,7 @@ static int nsp_gpio_probe(struct platform_device *pdev)
girq->num_parents = 0;
girq->parents = NULL;
girq->default_type = IRQ_TYPE_NONE;
-   girq->handler = handle_simple_irq;
+   girq->handler = handle_bad_irq;
}
 
ret = devm_gpiochip_add_data(dev, gc, chip);
-- 
2.27.0



Re: [PATCH] pinctrl: nsp: Set irq handler based on trig type

2020-07-01 Thread Mark Tomlinson
On Tue, 2020-06-30 at 15:26 -0700, Ray Jui wrote:
> - u32 trigger_type;
> >  
> > -   trigger_type = irq_get_trigger_type(d->irq);
> > -   if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
> > -   nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
> > +   nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
> 
> 
> I have a question here. I assume writing a bit to this register will
> result in clearing that bit, is that true?
> 
> Based on the driver, the 'nsp_gpio_irq_handler' seems to rely on
> 'NSP_GPIO_EVENT' register to figure out which GPIO the interrupt is for.
> And if so, and if this is cleared here that is invoked before the actual
> IRQ handler, how does this work?

It seems that this change masked another issue I was having. However,
the original code is still wrong as using nsp_set_bit() will do a
read/modify/write and clear all events regardless of what gpio is.

I have found another issue, where I'm getting many more edge-triggered
interrupts than I think I should be, so I'll sort that and send a v2 of
this patch.



Re: [PATCH] pinctrl: initialise nsp-mux earlier.

2020-06-30 Thread Mark Tomlinson
On Tue, 2020-06-30 at 20:14 -0700, Florian Fainelli wrote:
> Sorry, it looks like I made a mistake in my testing (or I was lucky),
> > and this patch doesn't fix the issue. What is happening is:
> > 1) nsp-pinmux driver is registered (arch_initcall).
> > 2) nsp-gpio-a driver is registered (arch_initcall_sync).
> > 3) of_platform_default_populate_init() is called (also at level
> > arch_initcall_sync), which scans the device tree, adds the nsp-gpio-a
> > device, runs its probe, and this returns -EPROBE_DEFER with the error
> > message.
> > 4) Only now nsp-pinmux device is probed.
> > 
> > Changing the 'arch_initcall_sync' to 'device_initcall' in nsp-gpio-a
> > ensures that the pinmux is probed first since
> > of_platform_default_populate_init() will be called between the two
> > register calls, and the error goes away. Is this change acceptable as a
> > solution?
> 
> If probe deferral did not work, certainly but it sounds like this is
> being done just for the sake of eliminating a round of probe deferral,
> is there a functional problem this is fixing?

No, I'm just trying to prevent an "error" message appearing in syslog.

> > The actual error message in syslog is:
> > 
> > kern.err kernel: gpiochip_add_data_with_key: GPIOs 480..511
> > (1820.gpio) failed to register, -517
> > 
> > So an end user sees "err" and "failed", and doesn't know what "-517"
> > means.
> 
> How about this instead:
> 
> diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
> index 4fa075d49fbc..10d9d0c17c9e 100644
> --- a/drivers/gpio/gpiolib.c
> +++ b/drivers/gpio/gpiolib.c
> @@ -1818,9 +1818,10 @@ int gpiochip_add_data_with_key(struct gpio_chip
> *gc, void *data,
> ida_simple_remove(_ida, gdev->id);
>  err_free_gdev:
> /* failures here can mean systems won't boot... */
> -   pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
> -  gdev->base, gdev->base + gdev->ngpio - 1,
> -  gc->label ? : "generic", ret);
> +   if (ret != -EPROBE_DEFER)
> +   pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n",
> +   __func__, gdev->base, gdev->base + gdev->ngpio - 1,
> +   gc->label ? : "generic", ret);
> kfree(gdev);
> return ret;
>  }
> 
That was one of my thoughts too. I found someone had tried that
earlier, but it was rejected:


https://patchwork.ozlabs.org/project/linux-gpio/patch/1516566774-1786-1-git-send-email-da...@lechnology.com/



Re: [PATCH] pinctrl: initialise nsp-mux earlier.

2020-06-30 Thread Mark Tomlinson
On Tue, 2020-06-30 at 15:08 -0700, Ray Jui wrote:
> May I know which GPIO driver you are referring to on NSP? Both the iProc
> GPIO driver and the NSP GPIO driver are initialized at the level of
> 'arch_initcall_sync', which is supposed to be after 'arch_initcall' used
> here in the pinmux driver

Sorry, it looks like I made a mistake in my testing (or I was lucky),
and this patch doesn't fix the issue. What is happening is:
1) nsp-pinmux driver is registered (arch_initcall).
2) nsp-gpio-a driver is registered (arch_initcall_sync).
3) of_platform_default_populate_init() is called (also at level
arch_initcall_sync), which scans the device tree, adds the nsp-gpio-a
device, runs its probe, and this returns -EPROBE_DEFER with the error
message.
4) Only now nsp-pinmux device is probed.

Changing the 'arch_initcall_sync' to 'device_initcall' in nsp-gpio-a
ensures that the pinmux is probed first since
of_platform_default_populate_init() will be called between the two
register calls, and the error goes away. Is this change acceptable as a
solution?

> > though the probe will succeed when the driver is re-initialised, the
> > error can be scary to end users. To fix this, change the time the
> 
> Scary to end users? I don't know about that. -EPROBE_DEFER was
> introduced exactly for this purpose. Perhaps users need to learn what
> -EPROBE_DEFER errno means?

The actual error message in syslog is:

kern.err kernel: gpiochip_add_data_with_key: GPIOs 480..511
(1820.gpio) failed to register, -517

So an end user sees "err" and "failed", and doesn't know what "-517"
means.



[PATCH] pinctrl: initialise nsp-mux earlier.

2020-06-30 Thread Mark Tomlinson
The GPIO specified in the DTS file references the pinctrl, which is
specified after the GPIO. If the GPIO is initialised before pinctrl,
an error message for the -EPROBE_DEFER ends up in the kernel log. Even
though the probe will succeed when the driver is re-initialised, the
error can be scary to end users. To fix this, change the time the
pinctrl is probed, so that it is always before the GPIO driver.

Signed-off-by: Mark Tomlinson 
---
 drivers/pinctrl/bcm/pinctrl-nsp-mux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c 
b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c
index f1d60a708815..7586949f83ec 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c
@@ -639,4 +639,4 @@ static int __init nsp_pinmux_init(void)
 {
return platform_driver_register(_pinmux_driver);
 }
-arch_initcall(nsp_pinmux_init);
+postcore_initcall(nsp_pinmux_init);
-- 
2.27.0



[PATCH] pinctrl: nsp: Set irq handler based on trig type

2020-06-30 Thread Mark Tomlinson
Rather than always using handle_simple_irq() as the gpio_irq_chip
handler, set a more appropriate handler based on the IRQ trigger type
requested. This is important for level triggered interrupts which need
to be masked during handling. Also, always acknowledge the interrupt
regardless of whether it is edge or level triggered.

Signed-off-by: Mark Tomlinson 
---
 drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c 
b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
index bed0124388c0..349fb384113e 100644
--- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -174,11 +174,8 @@ static void nsp_gpio_irq_ack(struct irq_data *d)
struct nsp_gpio *chip = gpiochip_get_data(gc);
unsigned gpio = d->hwirq;
u32 val = BIT(gpio);
-   u32 trigger_type;
 
-   trigger_type = irq_get_trigger_type(d->irq);
-   if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
-   nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
+   nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
 }
 
 /*
@@ -262,6 +259,12 @@ static int nsp_gpio_irq_set_type(struct irq_data *d, 
unsigned int type)
 
nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio, falling);
nsp_set_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio, level_low);
+
+   if (type & IRQ_TYPE_EDGE_BOTH)
+   irq_set_handler_locked(d, handle_edge_irq);
+   else
+   irq_set_handler_locked(d, handle_level_irq);
+
raw_spin_unlock_irqrestore(>lock, flags);
 
dev_dbg(chip->dev, "gpio:%u level_low:%s falling:%s\n", gpio,
@@ -691,7 +694,7 @@ static int nsp_gpio_probe(struct platform_device *pdev)
girq->num_parents = 0;
girq->parents = NULL;
girq->default_type = IRQ_TYPE_NONE;
-   girq->handler = handle_simple_irq;
+   girq->handler = handle_bad_irq;
}
 
ret = devm_gpiochip_add_data(dev, gc, chip);
-- 
2.27.0



Re: [PATCH 4/5] spi: bcm-qspi: Make multiple data blocks interrupt-driven

2020-06-15 Thread Mark Tomlinson
On Mon, 2020-06-15 at 15:32 +0100, Mark Brown wrote:
> On Mon, Jun 15, 2020 at 04:05:56PM +1200, Mark Tomlinson wrote:
> 
> > When needing to send/receive data in small chunks, make this interrupt
> > driven rather than waiting for a completion event for each small section
> > of data.
> 
> Again was this done for a reason and if so do we understand why doing
> this from interrupt context is safe - how long can the interrupts be
> when stuffing the FIFO from interrupt context?

As I'm porting a Broadcom patch, I'm hoping someone else can add
something to this. From the history it appears there was a hard limit
(no small chunks), and this was changed to doing it in chunks with
patch 345309fa7c0c92, apparently to improve performance. I believe this
change further improves performance, but as the patch arrived without
any documentation, I'm not certain.


> > @@ -731,12 +733,14 @@ static inline u16 read_rxram_slot_u16(struct bcm_qspi 
> > *qspi, int slot)
> > ((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8);
> >  }
> >  
> > -static void read_from_hw(struct bcm_qspi *qspi, int slots)
> > +static void read_from_hw(struct bcm_qspi *qspi)
> >  {
> 
> Things might be clearer if this refactoring were split out into a
> separate patch.

Done.

> > @@ -960,24 +966,21 @@ static int bcm_qspi_transfer_one(struct spi_master 
> > *master,
> >  struct spi_transfer *trans)
> >  {
> > struct bcm_qspi *qspi = spi_master_get_devdata(master);
> > -   int slots;
> > -   unsigned long timeo = msecs_to_jiffies(100);
> > +   unsigned long timeo = msecs_to_jiffies(1000);
> 
> That's a randomly chosen value - if we're now doing the entire transfer
> then we should be trying to estimate the length of time the transfer
> will take, for a very large transfer on a slow bus it's possible that
> even a second won't be enough.
> 
Again, the value came from Broadcom. Using the data length as an
estimate sounds like a good idea.

> > -   complete(>mspi_done);
> > +
> > +   read_from_hw(qspi);
> > +
> > +   if (qspi->trans_pos.trans) {
> > +   write_to_hw(qspi);
> > +   } else {
> > +   complete(>mspi_done);
> > +   spi_finalize_current_transfer(qspi->master);
> > +   }
> > +
> 
> This is adding a spi_finalize_current_transfer() which we didn't have
> before, and still leaving us doing cleanup work in the driver in another
> thread.  This is confused, the driver should only need to finalize the
> transfer explicitly if it returned a timeout from transfer_one() but
> nothing's changed there.

I can remove the call to spi_finalize_current_transfer() from this
patch. I'll try to check what does happen in the timeout case.




[PATCH 1/5] spi: bcm-qspi: Add support for setting BSPI clock

2020-06-14 Thread Mark Tomlinson
On iProc devices (unlike previous BCM SoCs) the clock rate of the SPI
can be set. This patch adds the appropriate code for setting that.

Reviewed-by: Callum Sinclair 
Reviewed-by: Chris Packham 
Signed-off-by: Mark Tomlinson 
---
 drivers/spi/spi-bcm-qspi.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 681d09085175..8fc5b9b3757b 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -117,6 +117,13 @@
 
 #define MSPI_MSPI_STATUS_SPIF  BIT(0)
 
+#define CRU_CTRL_REG   0x0
+#define QSPI_CLK_SEL_25M   0x00
+#define QSPI_CLK_SEL_50M   0x02
+#define QSPI_CLK_SEL_31M25 0x04
+#define QSPI_CLK_SEL_62M5  0x06
+#define QSPI_CLK_SEL_MASK  0x06
+
 #define INTR_BASE_BIT_SHIFT0x02
 #define INTR_COUNT 0x07
 
@@ -170,6 +177,7 @@ enum base_type {
MSPI,
BSPI,
CHIP_SELECT,
+   CRU_CTRL,
BASEMAX,
 };
 
@@ -625,6 +633,7 @@ static void bcm_qspi_update_parms(struct bcm_qspi *qspi,
 static int bcm_qspi_setup(struct spi_device *spi)
 {
struct bcm_qspi_parms *xp;
+   struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
 
if (spi->bits_per_word > 16)
return -EINVAL;
@@ -639,6 +648,21 @@ static int bcm_qspi_setup(struct spi_device *spi)
xp->speed_hz = spi->max_speed_hz;
xp->mode = spi->mode;
 
+   if (qspi->base[CRU_CTRL]) {
+   u32 tmp = bcm_qspi_read(qspi, CRU_CTRL, CRU_CTRL_REG);
+
+   /* Set BSPI clock rate */
+   tmp &= ~QSPI_CLK_SEL_MASK;
+   if (spi->max_speed_hz >= 6250)
+   tmp |= QSPI_CLK_SEL_62M5;
+   else if (spi->max_speed_hz >= 5000)
+   tmp |= QSPI_CLK_SEL_50M;
+   else if (spi->max_speed_hz >= 3125)
+   tmp |= QSPI_CLK_SEL_31M25;
+   /* default is 25MHz */
+   bcm_qspi_write(qspi, CRU_CTRL, CRU_CTRL_REG, tmp);
+   }
+
if (spi->bits_per_word)
xp->bits_per_word = spi->bits_per_word;
else
@@ -1459,6 +1483,16 @@ int bcm_qspi_probe(struct platform_device *pdev,
qspi->soc_intc = NULL;
}
 
+   /* iProc BSPI clock is set through CRU control */
+   res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cru_ctrl");
+   if (res) {
+   qspi->base[CRU_CTRL] = devm_ioremap_resource(dev, res);
+   if (IS_ERR(qspi->base[CRU_CTRL])) {
+   ret = PTR_ERR(qspi->base[CRU_CTRL]);
+   goto qspi_probe_err;
+   }
+   }
+
ret = clk_prepare_enable(qspi->clk);
if (ret) {
dev_err(dev, "failed to prepare clock\n");
-- 
2.27.0



[PATCH 4/5] spi: bcm-qspi: Make multiple data blocks interrupt-driven

2020-06-14 Thread Mark Tomlinson
When needing to send/receive data in small chunks, make this interrupt
driven rather than waiting for a completion event for each small section
of data.

Reviewed-by: Callum Sinclair 
Reviewed-by: Chris Packham 
Signed-off-by: Mark Tomlinson 
---
 drivers/spi/spi-bcm-qspi.c | 44 --
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index ce30ebf27f06..0cc51bcda300 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -200,12 +200,14 @@ struct bcm_qspi_dev_id {
 struct qspi_trans {
struct spi_transfer *trans;
int byte;
+   int slots;
bool mspi_last_trans;
 };
 
 struct bcm_qspi {
struct platform_device *pdev;
struct spi_master *master;
+   struct spi_device *spi_dev;
struct clk *clk;
u32 base_clk;
u32 max_speed_hz;
@@ -731,12 +733,14 @@ static inline u16 read_rxram_slot_u16(struct bcm_qspi 
*qspi, int slot)
((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8);
 }
 
-static void read_from_hw(struct bcm_qspi *qspi, int slots)
+static void read_from_hw(struct bcm_qspi *qspi)
 {
struct qspi_trans tp;
-   int slot;
+   int slot, slots;
 
bcm_qspi_disable_bspi(qspi);
+   tp = qspi->trans_pos;
+   slots = tp.slots;
 
if (slots > MSPI_NUM_CDRAM) {
/* should never happen */
@@ -744,8 +748,6 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
return;
}
 
-   tp = qspi->trans_pos;
-
for (slot = 0; slot < slots; slot++) {
if (tp.trans->rx_buf) {
if (tp.trans->bits_per_word <= 8) {
@@ -803,11 +805,12 @@ static inline void write_cdram_slot(struct bcm_qspi 
*qspi, int slot, u32 val)
 }
 
 /* Return number of slots written */
-static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
+static int write_to_hw(struct bcm_qspi *qspi)
 {
struct qspi_trans tp;
int slot = 0, tstatus = 0;
u32 mspi_cdram = 0;
+   struct spi_device *spi = qspi->spi_dev;
 
bcm_qspi_disable_bspi(qspi);
tp = qspi->trans_pos;
@@ -846,6 +849,9 @@ static int write_to_hw(struct bcm_qspi *qspi, struct 
spi_device *spi)
slot++;
}
 
+   /* save slot number for read_from_hw() */
+   qspi->trans_pos.slots = slot;
+
if (!slot) {
dev_err(>pdev->dev, "%s: no data to send?", __func__);
goto done;
@@ -960,24 +966,21 @@ static int bcm_qspi_transfer_one(struct spi_master 
*master,
 struct spi_transfer *trans)
 {
struct bcm_qspi *qspi = spi_master_get_devdata(master);
-   int slots;
-   unsigned long timeo = msecs_to_jiffies(100);
+   unsigned long timeo = msecs_to_jiffies(1000);
 
if (!spi->cs_gpiod)
bcm_qspi_chip_select(qspi, spi->chip_select);
qspi->trans_pos.trans = trans;
qspi->trans_pos.byte = 0;
+   qspi->spi_dev = spi;
 
-   while (qspi->trans_pos.byte < trans->len) {
-   reinit_completion(>mspi_done);
+   reinit_completion(>mspi_done);
 
-   slots = write_to_hw(qspi, spi);
-   if (!wait_for_completion_timeout(>mspi_done, timeo)) {
-   dev_err(>pdev->dev, "timeout waiting for MSPI\n");
-   return -ETIMEDOUT;
-   }
+   write_to_hw(qspi);
 
-   read_from_hw(qspi, slots);
+   if (!wait_for_completion_timeout(>mspi_done, timeo)) {
+   dev_err(>pdev->dev, "timeout waiting for MSPI\n");
+   return -ETIMEDOUT;
}
bcm_qspi_enable_bspi(qspi);
 
@@ -1092,7 +1095,16 @@ static irqreturn_t bcm_qspi_mspi_l2_isr(int irq, void 
*dev_id)
bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status);
if (qspi->soc_intc)
soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_DONE);
-   complete(>mspi_done);
+
+   read_from_hw(qspi);
+
+   if (qspi->trans_pos.trans) {
+   write_to_hw(qspi);
+   } else {
+   complete(>mspi_done);
+   spi_finalize_current_transfer(qspi->master);
+   }
+
return IRQ_HANDLED;
}
 
-- 
2.27.0



[PATCH 2/5] spi: bcm-qspi: Improve debug reading SPI data

2020-06-14 Thread Mark Tomlinson
This patch prevents device debug when data is not read from hardware
(i.e. when there is no receive buffer).

Reviewed-by: Callum Sinclair 
Reviewed-by: Chris Packham 
Signed-off-by: Mark Tomlinson 
---
 drivers/spi/spi-bcm-qspi.c | 31 ---
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 8fc5b9b3757b..92e04d24359f 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -749,21 +749,22 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots)
tp = qspi->trans_pos;
 
for (slot = 0; slot < slots; slot++) {
-   if (tp.trans->bits_per_word <= 8) {
-   u8 *buf = tp.trans->rx_buf;
-
-   if (buf)
-   buf[tp.byte] = read_rxram_slot_u8(qspi, slot);
-   dev_dbg(>pdev->dev, "RD %02x\n",
-   buf ? buf[tp.byte] : 0x0);
-   } else {
-   u16 *buf = tp.trans->rx_buf;
-
-   if (buf)
-   buf[tp.byte / 2] = read_rxram_slot_u16(qspi,
- slot);
-   dev_dbg(>pdev->dev, "RD %04x\n",
-   buf ? buf[tp.byte / 2] : 0x0);
+   if (tp.trans->rx_buf) {
+   if (tp.trans->bits_per_word <= 8) {
+   u8 *buf = tp.trans->rx_buf;
+
+   buf[tp.byte] =
+   read_rxram_slot_u8(qspi, slot);
+   dev_dbg(>pdev->dev, "RD %02x\n",
+   buf[tp.byte]);
+   } else {
+   u16 *buf = tp.trans->rx_buf;
+
+   buf[tp.byte / 2] =
+   read_rxram_slot_u16(qspi, slot);
+   dev_dbg(>pdev->dev, "RD %04x\n",
+   buf[tp.byte / 2]);
+   }
}
 
update_qspi_trans_byte_count(qspi, ,
-- 
2.27.0



[PATCH 0/5] Improvements to spi-bcm-qspi

2020-06-14 Thread Mark Tomlinson
This series of patches came from a single large Broadcom patch that
implements drivers for a number of their integrated switch chips. Mostly
this is just splitting the qspi driver into smaller parts and doesn't
include much original from me.

Mark Tomlinson (5):
  spi: bcm-qspi: Add support for setting BSPI clock
  spi: bcm-qspi: Improve debug reading SPI data
  spi: bcm-qspi: Do not split transfers into small chunks
  spi: bcm-qspi: Make multiple data blocks interrupt-driven
  spi: bcm-qspi: Improve interrupt handling

 drivers/spi/spi-bcm-qspi.c | 189 ++---
 drivers/spi/spi-bcm-qspi.h |   5 +-
 2 files changed, 115 insertions(+), 79 deletions(-)

-- 
2.27.0



[PATCH 3/5] spi: bcm-qspi: Do not split transfers into small chunks

2020-06-14 Thread Mark Tomlinson
Instead of splitting transfers into smaller parts, just perform the
operation that the higher level asked for.

Reviewed-by: Callum Sinclair 
Reviewed-by: Chris Packham 
Signed-off-by: Mark Tomlinson 
---
 drivers/spi/spi-bcm-qspi.c | 69 +++---
 1 file changed, 27 insertions(+), 42 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 92e04d24359f..ce30ebf27f06 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -78,8 +78,6 @@
 #define BSPI_BPP_MODE_SELECT_MASK  BIT(8)
 #define BSPI_BPP_ADDR_SELECT_MASK  BIT(16)
 
-#define BSPI_READ_LENGTH   256
-
 /* MSPI register offsets */
 #define MSPI_SPCR0_LSB 0x000
 #define MSPI_SPCR0_MSB 0x004
@@ -888,9 +886,9 @@ static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi,
 const struct spi_mem_op *op)
 {
struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
-   u32 addr = 0, len, rdlen, len_words, from = 0;
+   u32 addr = 0, len, len_words, from = 0;
int ret = 0;
-   unsigned long timeo = msecs_to_jiffies(100);
+   unsigned long timeo = msecs_to_jiffies(1500);
struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
 
if (bcm_qspi_bspi_ver_three(qspi))
@@ -925,47 +923,34 @@ static int bcm_qspi_bspi_exec_mem_op(struct spi_device 
*spi,
 * into RAF buffer read lengths
 */
len = op->data.nbytes;
+   reinit_completion(>bspi_done);
+   bcm_qspi_enable_bspi(qspi);
+   len_words = (len + 3) >> 2;
+   qspi->bspi_rf_op = op;
+   qspi->bspi_rf_op_status = 0;
qspi->bspi_rf_op_idx = 0;
+   qspi->bspi_rf_op_len = len;
+   dev_dbg(>pdev->dev, "bspi xfr addr 0x%x len 0x%x", addr, len);
 
-   do {
-   if (len > BSPI_READ_LENGTH)
-   rdlen = BSPI_READ_LENGTH;
-   else
-   rdlen = len;
-
-   reinit_completion(>bspi_done);
-   bcm_qspi_enable_bspi(qspi);
-   len_words = (rdlen + 3) >> 2;
-   qspi->bspi_rf_op = op;
-   qspi->bspi_rf_op_status = 0;
-   qspi->bspi_rf_op_len = rdlen;
-   dev_dbg(>pdev->dev,
-   "bspi xfr addr 0x%x len 0x%x", addr, rdlen);
-   bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr);
-   bcm_qspi_write(qspi, BSPI, BSPI_RAF_NUM_WORDS, len_words);
-   bcm_qspi_write(qspi, BSPI, BSPI_RAF_WATERMARK, 0);
-   if (qspi->soc_intc) {
-   /*
-* clear soc MSPI and BSPI interrupts and enable
-* BSPI interrupts.
-*/
-   soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_BSPI_DONE);
-   soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, true);
-   }
-
-   /* Must flush previous writes before starting BSPI operation */
-   mb();
-   bcm_qspi_bspi_lr_start(qspi);
-   if (!wait_for_completion_timeout(>bspi_done, timeo)) {
-   dev_err(>pdev->dev, "timeout waiting for BSPI\n");
-   ret = -ETIMEDOUT;
-   break;
-   }
+   bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr);
+   bcm_qspi_write(qspi, BSPI, BSPI_RAF_NUM_WORDS, len_words);
+   bcm_qspi_write(qspi, BSPI, BSPI_RAF_WATERMARK, 0);
+   if (qspi->soc_intc) {
+   /*
+* clear soc MSPI and BSPI interrupts and enable
+* BSPI interrupts.
+*/
+   soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_BSPI_DONE);
+   soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, true);
+   }
 
-   /* set msg return length */
-   addr += rdlen;
-   len -= rdlen;
-   } while (len);
+   /* Must flush previous writes before starting BSPI operation */
+   mb();
+   bcm_qspi_bspi_lr_start(qspi);
+   if (!wait_for_completion_timeout(>bspi_done, timeo)) {
+   dev_err(>pdev->dev, "timeout waiting for BSPI\n");
+   ret = -ETIMEDOUT;
+   }
 
return ret;
 }
-- 
2.27.0



[PATCH 5/5] spi: bcm-qspi: Improve interrupt handling

2020-06-14 Thread Mark Tomlinson
Acknowledge interrupts correctly and add support for fifo-full
interrupt, distinguishing it from the done interrupt.

Reviewed-by: Callum Sinclair 
Reviewed-by: Chris Packham 
Signed-off-by: Mark Tomlinson 
---
 drivers/spi/spi-bcm-qspi.c | 11 ++-
 drivers/spi/spi-bcm-qspi.h |  5 -
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 0cc51bcda300..3753eff8a154 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1123,6 +1123,8 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void 
*dev_id)
if (qspi->bspi_rf_op_len == 0) {
qspi->bspi_rf_op = NULL;
if (qspi->soc_intc) {
+   /* Ack BSPI done interrupt */
+   soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_DONE);
/* disable soc BSPI interrupt */
soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE,
   false);
@@ -1134,11 +1136,10 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, 
void *dev_id)
bcm_qspi_bspi_lr_clear(qspi);
else
bcm_qspi_bspi_flush_prefetch_buffers(qspi);
-   }
-
-   if (qspi->soc_intc)
-   /* clear soc BSPI interrupt */
-   soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_DONE);
+   } else  if (qspi->soc_intc)
+   /* Ack FIFO full interrupt */
+   soc_intc->bcm_qspi_int_ack(soc_intc,
+  BSPI_FIFO_FULL);
}
 
status &= INTR_BSPI_LR_SESSION_DONE_MASK;
diff --git a/drivers/spi/spi-bcm-qspi.h b/drivers/spi/spi-bcm-qspi.h
index 01aec6460108..b68e275bc721 100644
--- a/drivers/spi/spi-bcm-qspi.h
+++ b/drivers/spi/spi-bcm-qspi.h
@@ -48,7 +48,8 @@ enum {
MSPI_DONE = 0x1,
BSPI_DONE = 0x2,
BSPI_ERR = 0x4,
-   MSPI_BSPI_DONE = 0x7
+   MSPI_BSPI_DONE = 0x7,
+   BSPI_FIFO_FULL = 0x8
 };
 
 struct bcm_qspi_soc_intc {
@@ -84,6 +85,8 @@ static inline u32 get_qspi_mask(int type)
return INTR_MSPI_DONE_MASK;
case BSPI_DONE:
return BSPI_LR_INTERRUPTS_ALL;
+   case BSPI_FIFO_FULL:
+   return INTR_BSPI_LR_FULLNESS_REACHED_MASK;
case MSPI_BSPI_DONE:
return QSPI_INTERRUPTS_ALL;
case BSPI_ERR:
-- 
2.27.0



Re: [PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()

2018-11-27 Thread Mark Tomlinson


On 27/11/18 11:11 PM, Greg KH wrote:
> On Tue, Nov 27, 2018 at 09:57:23AM +1300, Mark Tomlinson wrote:
>> sysrq_do_reset() is called in softirq context, so it cannot call
>> sync() directly. Instead, call orderly_reboot(), which creates a work
>> item to run /sbin/reboot, or do emergency_sync and restart if the
>> command fails.
>>
>> Signed-off-by: Mark Tomlinson 
>> ---
>>   drivers/tty/sysrq.c | 3 +--
>>   1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
>> index ad1ee5d01b53..f2ca32c1ad7c 100644
>> --- a/drivers/tty/sysrq.c
>> +++ b/drivers/tty/sysrq.c
>> @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t)
>>   
>>  state->reset_requested = true;
>>   
>> -ksys_sync();
>> -kernel_restart(NULL);
>> +orderly_reboot();
> 
> Is this something new?  Why haven't we had reports of this failing in
> the past?  Or has something changed recently to cause this to now be
> needed?
> 
> thanks,
> 
> greg k-h
> 

As far as I can tell, sysrq_do_reset() is only used when setting the 
"keyset" property in "/chosen/linux,sysrq-reset-seq" (which is what I 
want to do). No DTS file in the kernel tree sets this, so I don't know 
if anyone else is using this. I am wondering whether it ever worked. A 
reboot does occur after a kernel panic, so if the console is not 
available, it may appear to have worked.

Pressing the standard 'sysrq' key followed by 'b'(reboot) instead calls 
sysrq_handle_reboot(), which does not call sync at all, but 
emergency_restart(). Also, the 's'(sync) key calls emergency_sync(), 
which performs the sync via a work queue.

Re: [PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()

2018-11-27 Thread Mark Tomlinson


On 27/11/18 11:11 PM, Greg KH wrote:
> On Tue, Nov 27, 2018 at 09:57:23AM +1300, Mark Tomlinson wrote:
>> sysrq_do_reset() is called in softirq context, so it cannot call
>> sync() directly. Instead, call orderly_reboot(), which creates a work
>> item to run /sbin/reboot, or do emergency_sync and restart if the
>> command fails.
>>
>> Signed-off-by: Mark Tomlinson 
>> ---
>>   drivers/tty/sysrq.c | 3 +--
>>   1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
>> index ad1ee5d01b53..f2ca32c1ad7c 100644
>> --- a/drivers/tty/sysrq.c
>> +++ b/drivers/tty/sysrq.c
>> @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t)
>>   
>>  state->reset_requested = true;
>>   
>> -ksys_sync();
>> -kernel_restart(NULL);
>> +orderly_reboot();
> 
> Is this something new?  Why haven't we had reports of this failing in
> the past?  Or has something changed recently to cause this to now be
> needed?
> 
> thanks,
> 
> greg k-h
> 

As far as I can tell, sysrq_do_reset() is only used when setting the 
"keyset" property in "/chosen/linux,sysrq-reset-seq" (which is what I 
want to do). No DTS file in the kernel tree sets this, so I don't know 
if anyone else is using this. I am wondering whether it ever worked. A 
reboot does occur after a kernel panic, so if the console is not 
available, it may appear to have worked.

Pressing the standard 'sysrq' key followed by 'b'(reboot) instead calls 
sysrq_handle_reboot(), which does not call sync at all, but 
emergency_restart(). Also, the 's'(sync) key calls emergency_sync(), 
which performs the sync via a work queue.

[PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()

2018-11-26 Thread Mark Tomlinson
sysrq_do_reset() is called in softirq context, so it cannot call
sync() directly. Instead, call orderly_reboot(), which creates a work
item to run /sbin/reboot, or do emergency_sync and restart if the
command fails.

Signed-off-by: Mark Tomlinson 
---
 drivers/tty/sysrq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index ad1ee5d01b53..f2ca32c1ad7c 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t)
 
state->reset_requested = true;
 
-   ksys_sync();
-   kernel_restart(NULL);
+   orderly_reboot();
 }
 
 static void sysrq_handle_reset_request(struct sysrq_state *state)
-- 
2.19.2


[PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()

2018-11-26 Thread Mark Tomlinson
sysrq_do_reset() is called in softirq context, so it cannot call
sync() directly. Instead, call orderly_reboot(), which creates a work
item to run /sbin/reboot, or do emergency_sync and restart if the
command fails.

Signed-off-by: Mark Tomlinson 
---
 drivers/tty/sysrq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index ad1ee5d01b53..f2ca32c1ad7c 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t)
 
state->reset_requested = true;
 
-   ksys_sync();
-   kernel_restart(NULL);
+   orderly_reboot();
 }
 
 static void sysrq_handle_reset_request(struct sysrq_state *state)
-- 
2.19.2


[PATCH 1/1] JFFS2: Less locking when reading directory entries

2015-05-17 Thread Mark Tomlinson
At startup, reading a directory (for example to do an ls), requires finding
information on every file. To support JFFS2 recovery from partially written
blocks, the JFFS2 driver must scan all data blocks to verify the checksums
are correct just to be able to correctly report the length of a file. This
will take some time, and will be dependent on the amount of data on the
filesystem.

What makes this worse is that any path lookup will lock the dentry cache
to add the new entry. The JFFS2 driver then spends time finding the file
information (reading the entire file), before it returns the new dentry
information allowing the cache to be unlocked. During this time, no other
files in the same directory can be opened or even tested for existence.

However, there is no need for the dentry cache to be locked for the scan of
the file. The JFFS2 driver already locks the file, so the file will not be
deleted or modified. It also ensures that if another process tries to scan
the same file, the second process will be blocked and the scan only proceed
once.

To make the scan occur without locking the cache, a new vfs call has been
added which allows a filesystem to scan the file, but not return anything.
When the lookup occurs after this, the JFFS2 driver will find this
information and can quickly return the filled-in dentry.

Signed-off-by: Mark Tomlinson 
---
 fs/jffs2/dir.c | 41 +--
 fs/namei.c | 63 +++---
 include/linux/fs.h |  1 +
 3 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97..69c0ec4 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *);
 static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
 struct inode *, struct dentry *);
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name);
 
 const struct file_operations jffs2_dir_operations =
 {
@@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations =
 {
.create =   jffs2_create,
.lookup =   jffs2_lookup,
+   .prescan =  jffs2_prescan,
.link = jffs2_link,
.unlink =   jffs2_unlink,
.symlink =  jffs2_symlink,
@@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations =
and we use the same hash function as the dentries. Makes this
nice and simple
 */
-static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
-  unsigned int flags)
+/* The prescan function does not have a dentry to fill in, so create this 
common function
+ * which is just passed the name and the inode for the directory.
+ * This function is very similar to the original jffs2_lookup, except for the 
arguments
+ * and the fact that the dentry (now not passed) is not updated.
+ */
+static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr 
*d_name)
 {
struct jffs2_inode_info *dir_f;
struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
 
jffs2_dbg(1, "jffs2_lookup()\n");
 
-   if (target->d_name.len > JFFS2_MAX_NAME_LEN)
+   if (d_name->len > JFFS2_MAX_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
 
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
mutex_lock(_f->sem);
 
/* NB: The 2.2 backport will need to explicitly check for '.' and '..' 
here */
-   for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= 
target->d_name.hash; fd_list = fd_list->next) {
-   if (fd_list->nhash == target->d_name.hash &&
+   for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= d_name->hash; 
fd_list = fd_list->next) {
+   if (fd_list->nhash == d_name->hash &&
(!fd || fd_list->version > fd->version) &&
-   strlen(fd_list->name) == target->d_name.len &&
-   !strncmp(fd_list->name, target->d_name.name, 
target->d_name.len)) {
+   strlen(fd_list->name) == d_name->len &&
+   !strncmp(fd_list->name, d_name->name, d_name->len)) {
fd = fd_list;
}
}
@@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
if (IS_ERR(inode))
pr_warn("iget() failed for ino #%u\n", ino);
}
+   return inode;
+}
+
+/* Fill in an inode, and store the information in 

[PATCH 0/1] JFFS2: Less locking when reading directory entries

2015-05-17 Thread Mark Tomlinson
I have posted this before, but have extended the patch into a few more
functions. The intent of the code is as before -- to improve JFFS2 lookups
by not locking i_mutex for long periods when files are not in cache. For
our embedded environment, we see a *five second* improvement in boot time.

This patch is an attempt to improve the speed of JFFS2 at startup. Our
particular problem is that we have a 30MB file on NOR flash which takes
about five seconds to read and test the data CRCs. During this time access
to other files in the same directory is blocked, due to
parent->d_inode->i_mutex being locked.

This patch solves this problem by adding a 'pre-lookup' call down to JFFS2,
which can be called without this mutex held. When the actual lookup is
performed, the results are in JFFS2's cache, and the dentry can be filled
in quickly.

However, given that I do not have experience at Linux filesystem code,
I can't be sure that this is a correct solution, or that there isn't a
better way of achieving what I'm trying to do. I feel there must be a way
to do this without creating a new VFS function call.

I suspect other filesystems could benefit from this too, as a lot of them
call the same d_splice_alias() function to fill in the dentry. JFFS2
already seems to have all the lower-level locks that are needed for this to
work; I don't know if that's true in other filesystems which could be
relying on the directory's i_mutex being locked. Because JFFS2 needs to
walk the entire file, there are big gains to be made here; other filesystems
may gain little to nothing.

I'm not expecting that this patch will get applied as-is, but please let me
know if there is any merit to it, whether it should work, and what still
needs to be done to if this is to be made part of the kernel.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/1] JFFS2: Less locking when reading directory entries

2015-05-17 Thread Mark Tomlinson
At startup, reading a directory (for example to do an ls), requires finding
information on every file. To support JFFS2 recovery from partially written
blocks, the JFFS2 driver must scan all data blocks to verify the checksums
are correct just to be able to correctly report the length of a file. This
will take some time, and will be dependent on the amount of data on the
filesystem.

What makes this worse is that any path lookup will lock the dentry cache
to add the new entry. The JFFS2 driver then spends time finding the file
information (reading the entire file), before it returns the new dentry
information allowing the cache to be unlocked. During this time, no other
files in the same directory can be opened or even tested for existence.

However, there is no need for the dentry cache to be locked for the scan of
the file. The JFFS2 driver already locks the file, so the file will not be
deleted or modified. It also ensures that if another process tries to scan
the same file, the second process will be blocked and the scan only proceed
once.

To make the scan occur without locking the cache, a new vfs call has been
added which allows a filesystem to scan the file, but not return anything.
When the lookup occurs after this, the JFFS2 driver will find this
information and can quickly return the filled-in dentry.

Signed-off-by: Mark Tomlinson mark.tomlin...@alliedtelesis.co.nz
---
 fs/jffs2/dir.c | 41 +--
 fs/namei.c | 63 +++---
 include/linux/fs.h |  1 +
 3 files changed, 85 insertions(+), 20 deletions(-)

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 1ba5c97..69c0ec4 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *);
 static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
 struct inode *, struct dentry *);
+static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name);
 
 const struct file_operations jffs2_dir_operations =
 {
@@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations =
 {
.create =   jffs2_create,
.lookup =   jffs2_lookup,
+   .prescan =  jffs2_prescan,
.link = jffs2_link,
.unlink =   jffs2_unlink,
.symlink =  jffs2_symlink,
@@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations =
and we use the same hash function as the dentries. Makes this
nice and simple
 */
-static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
-  unsigned int flags)
+/* The prescan function does not have a dentry to fill in, so create this 
common function
+ * which is just passed the name and the inode for the directory.
+ * This function is very similar to the original jffs2_lookup, except for the 
arguments
+ * and the fact that the dentry (now not passed) is not updated.
+ */
+static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr 
*d_name)
 {
struct jffs2_inode_info *dir_f;
struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
 
jffs2_dbg(1, jffs2_lookup()\n);
 
-   if (target-d_name.len  JFFS2_MAX_NAME_LEN)
+   if (d_name-len  JFFS2_MAX_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
 
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
mutex_lock(dir_f-sem);
 
/* NB: The 2.2 backport will need to explicitly check for '.' and '..' 
here */
-   for (fd_list = dir_f-dents; fd_list  fd_list-nhash = 
target-d_name.hash; fd_list = fd_list-next) {
-   if (fd_list-nhash == target-d_name.hash 
+   for (fd_list = dir_f-dents; fd_list  fd_list-nhash = d_name-hash; 
fd_list = fd_list-next) {
+   if (fd_list-nhash == d_name-hash 
(!fd || fd_list-version  fd-version) 
-   strlen(fd_list-name) == target-d_name.len 
-   !strncmp(fd_list-name, target-d_name.name, 
target-d_name.len)) {
+   strlen(fd_list-name) == d_name-len 
+   !strncmp(fd_list-name, d_name-name, d_name-len)) {
fd = fd_list;
}
}
@@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, 
struct dentry *target,
if (IS_ERR(inode))
pr_warn(iget() failed for ino #%u\n, ino);
}
+   return inode;
+}
+
+/* Fill in an inode, and store the information in cache. This allows a
+ * subsequent jffs2_lookup() call to proceed quickly, which is useful
+ * since the jffs2_lookup() call will have the directory entry cache
+ * locked.
+ */
+static void

[PATCH 0/1] JFFS2: Less locking when reading directory entries

2015-05-17 Thread Mark Tomlinson
I have posted this before, but have extended the patch into a few more
functions. The intent of the code is as before -- to improve JFFS2 lookups
by not locking i_mutex for long periods when files are not in cache. For
our embedded environment, we see a *five second* improvement in boot time.

This patch is an attempt to improve the speed of JFFS2 at startup. Our
particular problem is that we have a 30MB file on NOR flash which takes
about five seconds to read and test the data CRCs. During this time access
to other files in the same directory is blocked, due to
parent-d_inode-i_mutex being locked.

This patch solves this problem by adding a 'pre-lookup' call down to JFFS2,
which can be called without this mutex held. When the actual lookup is
performed, the results are in JFFS2's cache, and the dentry can be filled
in quickly.

However, given that I do not have experience at Linux filesystem code,
I can't be sure that this is a correct solution, or that there isn't a
better way of achieving what I'm trying to do. I feel there must be a way
to do this without creating a new VFS function call.

I suspect other filesystems could benefit from this too, as a lot of them
call the same d_splice_alias() function to fill in the dentry. JFFS2
already seems to have all the lower-level locks that are needed for this to
work; I don't know if that's true in other filesystems which could be
relying on the directory's i_mutex being locked. Because JFFS2 needs to
walk the entire file, there are big gains to be made here; other filesystems
may gain little to nothing.

I'm not expecting that this patch will get applied as-is, but please let me
know if there is any merit to it, whether it should work, and what still
needs to be done to if this is to be made part of the kernel.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/