[PATCH v2 1/3] Revert "netfilter: x_tables: Update remaining dereference to RCU"
This reverts commit 443d6e86f821a165fae3fc3fc13086d27ac140b1. This (and the following) patch basically re-implemented the RCU mechanisms of patch 784544739a25. That patch was replaced because of the performance problems that it created when replacing tables. Now, we have the same issue: the call to synchronize_rcu() makes replacing tables slower by as much as an order of magnitude. Revert these patches and fix the issue in a different way. Signed-off-by: Mark Tomlinson --- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c576a63d09db..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e8f6f9d86237..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); if (!ret && get.size == info.size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0d453fa9e327..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); if (!ret && get.size == info.size) -- 2.30.1
[PATCH v2 2/3] Revert "netfilter: x_tables: Switch synchronization to RCU"
This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c. This (and the preceding) patch basically re-implemented the RCU mechanisms of patch 784544739a25. That patch was replaced because of the performance problems that it created when replacing tables. Now, we have the same issue: the call to synchronize_rcu() makes replacing tables slower by as much as an order of magnitude. Prior to using RCU a script calling "iptables" approx. 200 times was taking 1.16s. With RCU this increased to 11.59s. Revert these patches and fix the issue in a different way. Signed-off-by: Mark Tomlinson --- include/linux/netfilter/x_tables.h | 5 +-- net/ipv4/netfilter/arp_tables.c| 14 - net/ipv4/netfilter/ip_tables.c | 14 - net/ipv6/netfilter/ip6_tables.c| 14 - net/netfilter/x_tables.c | 49 +- 5 files changed, 56 insertions(+), 40 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ebb64193757..5deb099d156d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info __rcu *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table); - #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 563b62b76a5f..d1e04d2b5170 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = xt_table_get_private_protected(table); + struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e2851f8d3a3..f15bc21d7301 100644 --- a/net/ipv4/netfilter
[PATCH v2 3/3] netfilter: x_tables: Use correct memory barriers.
When a new table value was assigned, it was followed by a write memory barrier. This ensured that all writes before this point would complete before any writes after this point. However, to determine whether the rules are unused, the sequence counter is read. To ensure that all writes have been done before these reads, a full memory barrier is needed, not just a write memory barrier. The same argument applies when incrementing the counter, before the rules are read. Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic reported in cc00bcaa5899 (which is still present), while still maintaining the same speed of replacing tables. The smb_mb() barriers potentially slow the packet path, however testing has shown no measurable change in performance on a 4-core MIPS64 platform. Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path") Signed-off-by: Mark Tomlinson --- include/linux/netfilter/x_tables.h | 2 +- net/netfilter/x_tables.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ec48466410a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..a2b50596b87e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1387,7 +1387,7 @@ xt_replace_table(struct xt_table *table, table->private = newinfo; /* make sure all cpus see new ->private value */ - smp_wmb(); + smp_mb(); /* * Even though table entries have now been swapped, other CPU's -- 2.30.1
[PATCH v2 0/3] Don't use RCU for x_tables synchronization
The patches to change to using RCU synchronization in x_tables cause updating tables to be slowed down by an order of magnitude. This has been tried before, see https://lore.kernel.org/patchwork/patch/151796/ and ultimately was rejected. As mentioned in the patch description, a different method can be used to ensure ordering of reads/writes. This can simply be done by changing from smp_wmb() to smp_mb(). changes in v2: - Update commit messages only Mark Tomlinson (3): Revert "netfilter: x_tables: Update remaining dereference to RCU" Revert "netfilter: x_tables: Switch synchronization to RCU" netfilter: x_tables: Use correct memory barriers. include/linux/netfilter/x_tables.h | 7 ++--- net/ipv4/netfilter/arp_tables.c| 16 +- net/ipv4/netfilter/ip_tables.c | 16 +- net/ipv6/netfilter/ip6_tables.c| 16 +- net/netfilter/x_tables.c | 49 +- 5 files changed, 60 insertions(+), 44 deletions(-) -- 2.30.1
Re: [PATCH 3/3] netfilter: x_tables: Use correct memory barriers.
On Thu, 2021-03-04 at 08:46 +0100, Florian Westphal wrote: > Mark Tomlinson wrote: > > Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic > > reported in cc00bcaa5899, > > Can you reproduce the crashes without this change? Yes. In our test environment we were seeing a kernel panic approx. twice a day, with a similar output to that shown in Subash's patch (cc00bcaa5899). With this patch we are not seeing any issue. The CPU is a dual-core ARM Cortex-A9. > > How much of an impact is the MB change on the packet path? I will run our throughput tests and get these results. I have a script which makes around 200 calls to iptables. This was taking 11.59s and now is back to 1.16s.
[PATCH 2/3] Revert "netfilter: x_tables: Switch synchronization to RCU"
This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c. This (and the preceding) patch basically re-implemented the RCU mechanisms of patch 784544739a25. That patch was replaced because of the performance problems that it created when replacing tables. Now, we have the same issue: the call to synchronize_rcu() makes replacing tables slower by as much as an order of magnitude. See https://lore.kernel.org/patchwork/patch/151796/ for why using RCU is not a good idea. Revert these patches and fix the issue in a different way. Signed-off-by: Mark Tomlinson --- include/linux/netfilter/x_tables.h | 5 +-- net/ipv4/netfilter/arp_tables.c| 14 - net/ipv4/netfilter/ip_tables.c | 14 - net/ipv6/netfilter/ip6_tables.c| 14 - net/netfilter/x_tables.c | 49 +- 5 files changed, 56 insertions(+), 40 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ebb64193757..5deb099d156d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info __rcu *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table); - #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 563b62b76a5f..d1e04d2b5170 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = xt_table_get_private_protected(table); + struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e2851f8d3a3..f15bc21d7301 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4
[PATCH 3/3] netfilter: x_tables: Use correct memory barriers.
When a new table value was assigned, it was followed by a write memory barrier. This ensured that all writes before this point would complete before any writes after this point. However, to determine whether the rules are unused, the sequence counter is read. To ensure that all writes have been done before these reads, a full memory barrier is needed, not just a write memory barrier. The same argument applies when incrementing the counter, before the rules are read. Changing to using smp_mb() instead of smp_wmb() fixes the kernel panic reported in cc00bcaa5899, while still maintaining the same speed of replacing tables. Fixes: 7f5c6d4f665b ("netfilter: get rid of atomic ops in fast path") Signed-off-by: Mark Tomlinson --- include/linux/netfilter/x_tables.h | 2 +- net/netfilter/x_tables.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ec48466410a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..a2b50596b87e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1387,7 +1387,7 @@ xt_replace_table(struct xt_table *table, table->private = newinfo; /* make sure all cpus see new ->private value */ - smp_wmb(); + smp_mb(); /* * Even though table entries have now been swapped, other CPU's -- 2.30.1
[PATCH 1/3] Revert "netfilter: x_tables: Update remaining dereference to RCU"
This reverts commit 443d6e86f821a165fae3fc3fc13086d27ac140b1. This (and the following) patch basically re-implemented the RCU mechanisms of patch 784544739a25. That patch was replaced because of the performance problems that it created when replacing tables. Now, we have the same issue: the call to synchronize_rcu() makes replacing tables slower by as much as an order of magnitude. See https://lore.kernel.org/patchwork/patch/151796/ for why using RCU is not a good idea. Revert these patches and fix the issue in a different way. Signed-off-by: Mark Tomlinson --- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c576a63d09db..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e8f6f9d86237..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); if (!ret && get.size == info.size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0d453fa9e327..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, ); if (!ret && get.size == info.size) -- 2.30.1
[PATCH 0/3] Don't use RCU for x_tables synchronization
The patches to change to using RCU synchronization in x_tables cause updating tables to be slowed down by an order of magnitude. This has been tried before, see https://lore.kernel.org/patchwork/patch/151796/ and ultimately was rejected. As mentioned in the patch description, a different method can be used to ensure ordering of reads/writes. This can simply be done by changing from smp_wmb() to smp_mb(). Mark Tomlinson (3): Revert "netfilter: x_tables: Update remaining dereference to RCU" Revert "netfilter: x_tables: Switch synchronization to RCU" netfilter: x_tables: Use correct memory barriers. include/linux/netfilter/x_tables.h | 7 ++--- net/ipv4/netfilter/arp_tables.c| 16 +- net/ipv4/netfilter/ip_tables.c | 16 +- net/ipv6/netfilter/ip6_tables.c| 16 +- net/netfilter/x_tables.c | 49 +- 5 files changed, 60 insertions(+), 44 deletions(-) -- 2.30.1
[PATCH] pinctrl: bcm: pinctrl-nsp-gpio: Fix setting GPIO as output
When setting a GPIO pin to an output, it is important to set the value correctly before enabling the output so that a glitch is not seen on the pin. This glitch may be very short, but can be important if this is a reset signal. Fixes: 8bfcbbbcabe0 ("pinctrl: nsp: add gpio-a driver support for Broadcom NSP SoC") Signed-off-by: Mark Tomlinson --- drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index a00a42a61a90..942f04ca4868 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -289,8 +289,8 @@ static int nsp_gpio_direction_output(struct gpio_chip *gc, unsigned gpio, unsigned long flags; raw_spin_lock_irqsave(>lock, flags); - nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, true); nsp_set_bit(chip, REG, NSP_GPIO_DATA_OUT, gpio, !!(val)); + nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, true); raw_spin_unlock_irqrestore(>lock, flags); dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val); -- 2.29.2
[PATCH] pinctrl: bcm: pinctrl-iproc-gpio: Fix setting GPIO as output
When setting a GPIO pin to an output, it is important to set the value correctly before enabling the output so that a glitch is not seen on the pin. This glitch may be very short, but can be important if this is a reset signal. Fixes: b64333ce769c ("pinctrl: cygnus: add gpio/pinconf driver") Signed-off-by: Mark Tomlinson --- drivers/pinctrl/bcm/pinctrl-iproc-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index e2bd2dce6bb4..cadcf5eb0466 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -348,8 +348,8 @@ static int iproc_gpio_direction_output(struct gpio_chip *gc, unsigned gpio, unsigned long flags; raw_spin_lock_irqsave(>lock, flags); - iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, true); iproc_set_bit(chip, IPROC_GPIO_DATA_OUT_OFFSET, gpio, !!(val)); + iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, true); raw_spin_unlock_irqrestore(>lock, flags); dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val); -- 2.29.2
[PATCH] mtd: mtdoops: Don't write panic data twice
If calling mtdoops_write, don't also schedule work to be done later. Although this appears to not be causing an issue, possibly because the scheduled work will never get done, it is confusing. Fixes: 016c1291ce70 ("mtd: mtdoops: do not use mtd->panic_write directly") Signed-off-by: Mark Tomlinson --- drivers/mtd/mtdoops.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 4ced68be7ed7..774970bfcf85 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -279,12 +279,13 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper, kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, record_size - MTDOOPS_HEADER_SIZE, NULL); - /* Panics must be written immediately */ - if (reason != KMSG_DUMP_OOPS) + if (reason != KMSG_DUMP_OOPS) { + /* Panics must be written immediately */ mtdoops_write(cxt, 1); - - /* For other cases, schedule work to write it "nicely" */ - schedule_work(>work_write); + } else { + /* For other cases, schedule work to write it "nicely" */ + schedule_work(>work_write); + } } static void mtdoops_notify_add(struct mtd_info *mtd) -- 2.28.0
[PATCH v3] i2c: mv64xxx: Add bus error recovery
This adds i2c bus recovery to the mv64xxx driver. Implement bus recovery to recover from SCL/SDA stuck low. This uses the generic recovery function, setting the clock/data lines as GPIO pins, and sending 9 clocks to try and recover the bus. Signed-off-by: Mark Tomlinson --- Changes in v2: - use generic GPIO recovery function. Changes in v3: - remove call to i2c_recover_bus() during probe. - change error message to info when pinctrl fails, matching other similar code. - handle a lack of pinctrl information better. drivers/i2c/busses/i2c-mv64xxx.c | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8d9d4ffdcd24..5c5f1d797986 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,7 @@ struct mv64xxx_i2c_data { boolirq_clear_inverted; /* Clk div is 2 to the power n, not 2 to the power n + 1 */ boolclk_n_base_0; + struct i2c_bus_recovery_inforinfo; }; static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = { @@ -325,7 +327,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) drv_data->msg->flags); drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; mv64xxx_i2c_hw_init(drv_data); - drv_data->rc = -EIO; + i2c_recover_bus(_data->adapter); + drv_data->rc = -EAGAIN; } } @@ -562,6 +565,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data *drv_data) "time_left: %d\n", drv_data->block, (int)time_left); mv64xxx_i2c_hw_init(drv_data); + i2c_recover_bus(_data->adapter); } } else spin_unlock_irqrestore(_data->lock, flags); @@ -871,6 +875,24 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, } #endif /* CONFIG_OF */ +static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data, + struct device *dev) +{ + struct i2c_bus_recovery_info *rinfo = _data->rinfo; + + rinfo->pinctrl = devm_pinctrl_get(dev); + if (IS_ERR(rinfo->pinctrl)) { + if (PTR_ERR(rinfo->pinctrl) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_info(dev, "can't get pinctrl, bus recovery not supported\n"); + return PTR_ERR(rinfo->pinctrl); + } else if (!rinfo->pinctrl) + return -ENODEV; + + drv_data->adapter.bus_recovery_info = rinfo; + return 0; +} + static int mv64xxx_i2c_probe(struct platform_device *pd) { @@ -927,6 +949,10 @@ mv64xxx_i2c_probe(struct platform_device *pd) goto exit_reset; } + rc = mv64xxx_i2c_init_recovery_info(drv_data, >dev); + if (rc == -EPROBE_DEFER) + goto exit_reset; + drv_data->adapter.dev.parent = >dev; drv_data->adapter.algo = _i2c_algo; drv_data->adapter.owner = THIS_MODULE; -- 2.28.0
Re: [PATCH v2] i2c: mv64xxx: Add bus error recovery
On Tue, 2020-08-25 at 11:26 +0200, Wolfram Sang wrote: > On Tue, Aug 25, 2020 at 10:52:54AM +1200, Mark Tomlinson wrote: > > This adds i2c bus recovery to the mv64xxx driver. > > Implement bus recovery to recover from SCL/SDA stuck low. > > This uses the generic recovery function, setting the clock/data lines as > GPIO pins, and sending 9 clocks to try and recover the bus. > > Signed-off-by: Mark Tomlinson > --- > Changes in v2: > - use generic GPIO recovery function. > > Thank you for doing that! Glad to see the new helper function works for > you as well. The initialization code is all well, but I wonder about the > use of i2c_recover_bus(). Recovery should be tried only if it is > detected that SDA is pulled low when the bus should be free. So, you > shouldn't call i2c_recover_bus() in unconditionally in probe(). Can your > HW detect if SDA is stuck low? We actually don't need this call, as the bootloader checks and leaves the bus in a good state. I will remove this from the patch.
[PATCH v2] i2c: mv64xxx: Add bus error recovery
This adds i2c bus recovery to the mv64xxx driver. Implement bus recovery to recover from SCL/SDA stuck low. This uses the generic recovery function, setting the clock/data lines as GPIO pins, and sending 9 clocks to try and recover the bus. Signed-off-by: Mark Tomlinson --- Changes in v2: - use generic GPIO recovery function. drivers/i2c/busses/i2c-mv64xxx.c | 27 ++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8d9d4ffdcd24..e0f0c76c0d3b 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,7 @@ struct mv64xxx_i2c_data { boolirq_clear_inverted; /* Clk div is 2 to the power n, not 2 to the power n + 1 */ boolclk_n_base_0; + struct i2c_bus_recovery_inforinfo; }; static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = { @@ -325,7 +327,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) drv_data->msg->flags); drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; mv64xxx_i2c_hw_init(drv_data); - drv_data->rc = -EIO; + i2c_recover_bus(_data->adapter); + drv_data->rc = -EAGAIN; } } @@ -562,6 +565,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data *drv_data) "time_left: %d\n", drv_data->block, (int)time_left); mv64xxx_i2c_hw_init(drv_data); + i2c_recover_bus(_data->adapter); } } else spin_unlock_irqrestore(_data->lock, flags); @@ -871,6 +875,22 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, } #endif /* CONFIG_OF */ +static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data, + struct device *dev) +{ + struct i2c_bus_recovery_info *rinfo = _data->rinfo; + + rinfo->pinctrl = devm_pinctrl_get(dev); + if (IS_ERR(rinfo->pinctrl)) { + if (PTR_ERR(rinfo->pinctrl) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_err(dev, "can't get pinctrl, bus recovery not supported\n"); + return PTR_ERR(rinfo->pinctrl); + } + drv_data->adapter.bus_recovery_info = rinfo; + return 0; +} + static int mv64xxx_i2c_probe(struct platform_device *pd) { @@ -927,6 +947,10 @@ mv64xxx_i2c_probe(struct platform_device *pd) goto exit_reset; } + rc = mv64xxx_i2c_init_recovery_info(drv_data, >dev); + if (rc == -EPROBE_DEFER) + goto exit_reset; + drv_data->adapter.dev.parent = >dev; drv_data->adapter.algo = _i2c_algo; drv_data->adapter.owner = THIS_MODULE; @@ -950,6 +974,7 @@ mv64xxx_i2c_probe(struct platform_device *pd) "mv64xxx: Can't add i2c adapter, rc: %d\n", -rc); goto exit_free_irq; } + i2c_recover_bus(_data->adapter); return 0; -- 2.28.0
[PATCH] gre6: Fix reception with IP6_TNL_F_RCV_DSCP_COPY
When receiving an IPv4 packet inside an IPv6 GRE packet, and the IP6_TNL_F_RCV_DSCP_COPY flag is set on the tunnel, the IPv4 header would get corrupted. This is due to the common ip6_tnl_rcv() function assuming that the inner header is always IPv6. This patch checks the tunnel protocol for IPv4 inner packets, but still defaults to IPv6. Fixes: 308edfdf1563 ("gre6: Cleanup GREv6 receive path, call common GRE functions") Signed-off-by: Mark Tomlinson --- net/ipv6/ip6_tunnel.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f635914f42ec..a0217e5bf3bc 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -915,7 +915,15 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, struct metadata_dst *tun_dst, bool log_ecn_err) { - return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate, + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb); + + dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; + if (tpi->proto == htons(ETH_P_IP)) + dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; + + return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); -- 2.28.0
[PATCH v4] PCI: Reduce warnings on possible RW1C corruption
For hardware that only supports 32-bit writes to PCI there is the possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited messages was introduced by fb2659230120, but rate-limiting is not the best choice here. Some devices may not show the warnings they should if another device has just produced a bunch of warnings. Also, the number of messages can be a nuisance on devices which are otherwise working fine. This patch changes the ratelimit to a single warning per bus. This ensures no bus is 'starved' of emitting a warning and also that there isn't a continuous stream of warnings. It would be preferable to have a warning per device, but the pci_dev structure is not available here, and a lookup from devfn would be far too slow. Suggested-by: Bjorn Helgaas Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit config writes") Signed-off-by: Mark Tomlinson --- changes in v4: - Use bitfield rather than bool to save memory (was meant to be in v3). drivers/pci/access.c | 9 ++--- include/linux/pci.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 79c4a2ef269a..b452467fd133 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", -size, pci_domain_nr(bus), bus->number, -PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", +size, pci_domain_nr(bus), bus->number, +PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = 1; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/include/linux/pci.h b/include/linux/pci.h index 34c1c4f45288..85211a787f8b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -626,6 +626,7 @@ struct pci_bus { struct bin_attribute*legacy_io; /* Legacy I/O for this bus */ struct bin_attribute*legacy_mem;/* Legacy mem */ unsigned intis_added:1; + unsigned intunsafe_warn:1; /* warned about RW1C config write */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) -- 2.28.0
[PATCH] RTC: Implement pretimeout watchdog for DS1307
If the hardware watchdog in the clock chip simply pulls the reset line of the CPU, then there is no chance to write a stack trace to help determine what may have been blocking the CPU. This patch adds a pretimeout to the watchdog, which, if enabled, sets a timer to go off before the hardware watchdog kicks in, and calls the standard pretimeout function, which can (for example) call panic. Signed-off-by: Mark Tomlinson --- drivers/rtc/rtc-ds1307.c | 35 ++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index 49702942bb08..647f8659d0bd 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -23,6 +23,7 @@ #include #include #include +#include /* * We can't determine type by probing, but if we expect pre-Linux code @@ -174,6 +175,10 @@ struct ds1307 { #ifdef CONFIG_COMMON_CLK struct clk_hw clks[2]; #endif +#ifdef CONFIG_WATCHDOG_CORE + struct timer_list soft_timer; + struct watchdog_device *wdt; +#endif }; struct chip_desc { @@ -863,12 +868,34 @@ static int m41txx_rtc_set_offset(struct device *dev, long offset) } #ifdef CONFIG_WATCHDOG_CORE +static void ds1388_soft_wdt_expire(struct timer_list *soft_timer) +{ + struct ds1307 *ds1307 = container_of(soft_timer, struct ds1307, soft_timer); + + watchdog_notify_pretimeout(ds1307->wdt); +} + +static void ds1388_soft_timer_set(struct watchdog_device *wdt_dev) +{ + struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev); + int soft_timeout; + + if (wdt_dev->pretimeout > 0) { + soft_timeout = wdt_dev->timeout - wdt_dev->pretimeout; + mod_timer(>soft_timer, jiffies + soft_timeout * HZ); + } else { + del_timer(>soft_timer); + } +} + static int ds1388_wdt_start(struct watchdog_device *wdt_dev) { struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev); u8 regs[2]; int ret; + ds1388_soft_timer_set(wdt_dev); + ret = regmap_update_bits(ds1307->regmap, DS1388_REG_FLAG, DS1388_BIT_WF, 0); if (ret) @@ -900,6 +927,7 @@ static int ds1388_wdt_stop(struct watchdog_device *wdt_dev) { struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev); + del_timer(>soft_timer); return regmap_update_bits(ds1307->regmap, DS1388_REG_CONTROL, DS1388_BIT_WDE | DS1388_BIT_RST, 0); } @@ -909,6 +937,7 @@ static int ds1388_wdt_ping(struct watchdog_device *wdt_dev) struct ds1307 *ds1307 = watchdog_get_drvdata(wdt_dev); u8 regs[2]; + ds1388_soft_timer_set(wdt_dev); return regmap_bulk_read(ds1307->regmap, DS1388_REG_WDOG_HUN_SECS, regs, sizeof(regs)); } @@ -923,6 +952,7 @@ static int ds1388_wdt_set_timeout(struct watchdog_device *wdt_dev, regs[0] = 0; regs[1] = bin2bcd(wdt_dev->timeout); + ds1388_soft_timer_set(wdt_dev); return regmap_bulk_write(ds1307->regmap, DS1388_REG_WDOG_HUN_SECS, regs, sizeof(regs)); } @@ -1652,7 +1682,8 @@ static void ds1307_clks_register(struct ds1307 *ds1307) #ifdef CONFIG_WATCHDOG_CORE static const struct watchdog_info ds1388_wdt_info = { - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | + WDIOF_MAGICCLOSE | WDIOF_PRETIMEOUT, .identity = "DS1388 watchdog", }; @@ -1681,6 +1712,8 @@ static void ds1307_wdt_register(struct ds1307 *ds1307) wdt->timeout = 99; wdt->max_timeout = 99; wdt->min_timeout = 1; + ds1307->wdt = wdt; + timer_setup(>soft_timer, ds1388_soft_wdt_expire, 0); watchdog_init_timeout(wdt, 0, ds1307->dev); watchdog_set_drvdata(wdt, ds1307); -- 2.28.0
[PATCH v3 2/2] PCI: Reduce warnings on possible RW1C corruption
For hardware that only supports 32-bit writes to PCI there is the possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited messages was introduced by fb2659230120, but rate-limiting is not the best choice here. Some devices may not show the warnings they should if another device has just produced a bunch of warnings. Also, the number of messages can be a nuisance on devices which are otherwise working fine. This patch changes the ratelimit to a single warning per bus. This ensures no bus is 'starved' of emitting a warning and also that there isn't a continuous stream of warnings. It would be preferable to have a warning per device, but the pci_dev structure is not available here, and a lookup from devfn would be far too slow. Suggested-by: Bjorn Helgaas Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit config writes") Signed-off-by: Mark Tomlinson --- drivers/pci/access.c | 9 ++--- include/linux/pci.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 79c4a2ef269a..ab85cb7df9b6 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", -size, pci_domain_nr(bus), bus->number, -PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", +size, pci_domain_nr(bus), bus->number, +PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = true; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/include/linux/pci.h b/include/linux/pci.h index 34c1c4f45288..5b6ab593ae09 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -613,6 +613,7 @@ struct pci_bus { unsigned char primary;/* Number of primary bridge */ unsigned char max_bus_speed; /* enum pci_bus_speed */ unsigned char cur_bus_speed; /* enum pci_bus_speed */ + boolunsafe_warn;/* warned about RW1C config write */ #ifdef CONFIG_PCI_DOMAINS_GENERIC int domain_nr; #endif -- 2.28.0
[PATCH v3 1/2] PCI: iproc: Set affinity mask on MSI interrupts
The core interrupt code expects the irq_set_affinity call to update the effective affinity for the interrupt. This was not being done, so update iproc_msi_irq_set_affinity() to do so. Fixes: 3bc2b2348835 ("PCI: iproc: Add iProc PCIe MSI support") Signed-off-by: Mark Tomlinson --- changes in v2: - Patch 1/2 Added Fixes tag - Patch 2/2 Replace original change with change suggested by Bjorn Helgaas. changes in v3: - Use bitfield rather than bool to save memory. drivers/pci/controller/pcie-iproc-msi.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index 3176ad3ab0e5..908475d27e0e 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data *data, struct iproc_msi *msi = irq_data_get_irq_chip_data(data); int target_cpu = cpumask_first(mask); int curr_cpu; + int ret; curr_cpu = hwirq_to_cpu(msi, data->hwirq); if (curr_cpu == target_cpu) - return IRQ_SET_MASK_OK_DONE; + ret = IRQ_SET_MASK_OK_DONE; + else { + /* steer MSI to the target CPU */ + data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + ret = IRQ_SET_MASK_OK; + } - /* steer MSI to the target CPU */ - data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + irq_data_update_effective_affinity(data, cpumask_of(target_cpu)); - return IRQ_SET_MASK_OK; + return ret; } static void iproc_msi_irq_compose_msi_msg(struct irq_data *data, -- 2.28.0
Re: [PATCH v2 2/2] PCI: Reduce warnings on possible RW1C corruption
On Fri, 2020-07-31 at 09:32 -0600, Rob Herring wrote: > > If we don't want to just warn when a 8 or 16 bit access occurs (I'm > not sure if 32-bit only accesses is possible or common. Seems like > PCI_COMMAND would always get written?), then a simple way to do this > is just move this out of line and do something like this where the bus > or device is created/registered: > > if (bus->ops->write == pci_generic_config_write32) > warn() > This doesn't work for many of the PCI drivers, since they wrap the call to pci_generic_config_write32() in their own function. > > > > diff --git a/include/linux/pci.h b/include/linux/pci.h > > index 34c1c4f45288..5b6ab593ae09 100644 > > --- a/include/linux/pci.h > > +++ b/include/linux/pci.h > > @@ -613,6 +613,7 @@ struct pci_bus { > > unsigned char primary;/* Number of primary bridge */ > > unsigned char max_bus_speed; /* enum pci_bus_speed */ > > unsigned char cur_bus_speed; /* enum pci_bus_speed */ > > + boolunsafe_warn;/* warned about RW1C config write */ > > Make this a bitfield next to 'is_added'. Will do, thanks.
[PATCH v2 2/2] PCI: Reduce warnings on possible RW1C corruption
For hardware that only supports 32-bit writes to PCI there is the possibility of clearing RW1C (write-one-to-clear) bits. A rate-limited messages was introduced by fb2659230120, but rate-limiting is not the best choice here. Some devices may not show the warnings they should if another device has just produced a bunch of warnings. Also, the number of messages can be a nuisance on devices which are otherwise working fine. This patch changes the ratelimit to a single warning per bus. This ensures no bus is 'starved' of emitting a warning and also that there isn't a continuous stream of warnings. It would be preferable to have a warning per device, but the pci_dev structure is not available here, and a lookup from devfn would be far too slow. Suggested-by: Bjorn Helgaas Fixes: fb2659230120 ("PCI: Warn on possible RW1C corruption for sub-32 bit config writes") Signed-off-by: Mark Tomlinson --- drivers/pci/access.c | 9 ++--- include/linux/pci.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 79c4a2ef269a..ab85cb7df9b6 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, * write happen to have any RW1C (write-one-to-clear) bits set, we * just inadvertently cleared something we shouldn't have. */ - dev_warn_ratelimited(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", -size, pci_domain_nr(bus), bus->number, -PCI_SLOT(devfn), PCI_FUNC(devfn), where); + if (!bus->unsafe_warn) { + dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", +size, pci_domain_nr(bus), bus->number, +PCI_SLOT(devfn), PCI_FUNC(devfn), where); + bus->unsafe_warn = true; + } mask = ~(((1 << (size * 8)) - 1) << ((where & 0x3) * 8)); tmp = readl(addr) & mask; diff --git a/include/linux/pci.h b/include/linux/pci.h index 34c1c4f45288..5b6ab593ae09 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -613,6 +613,7 @@ struct pci_bus { unsigned char primary;/* Number of primary bridge */ unsigned char max_bus_speed; /* enum pci_bus_speed */ unsigned char cur_bus_speed; /* enum pci_bus_speed */ + boolunsafe_warn;/* warned about RW1C config write */ #ifdef CONFIG_PCI_DOMAINS_GENERIC int domain_nr; #endif -- 2.28.0
[PATCH v2 1/2] PCI: iproc: Set affinity mask on MSI interrupts
The core interrupt code expects the irq_set_affinity call to update the effective affinity for the interrupt. This was not being done, so update iproc_msi_irq_set_affinity() to do so. Fixes: 3bc2b2348835 ("PCI: iproc: Add iProc PCIe MSI support") Signed-off-by: Mark Tomlinson --- changes in v2: - Patch 1/2 Added Fixes tag - Patch 2/2 Replace original change with change suggested by Bjorn Helgaas. drivers/pci/controller/pcie-iproc-msi.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index 3176ad3ab0e5..908475d27e0e 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data *data, struct iproc_msi *msi = irq_data_get_irq_chip_data(data); int target_cpu = cpumask_first(mask); int curr_cpu; + int ret; curr_cpu = hwirq_to_cpu(msi, data->hwirq); if (curr_cpu == target_cpu) - return IRQ_SET_MASK_OK_DONE; + ret = IRQ_SET_MASK_OK_DONE; + else { + /* steer MSI to the target CPU */ + data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + ret = IRQ_SET_MASK_OK; + } - /* steer MSI to the target CPU */ - data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + irq_data_update_effective_affinity(data, cpumask_of(target_cpu)); - return IRQ_SET_MASK_OK; + return ret; } static void iproc_msi_irq_compose_msi_msg(struct irq_data *data, -- 2.28.0
Re: [PATCH 2/3] PCI: iproc: Stop using generic config read/write functions
On Thu, 2020-07-30 at 11:09 -0500, Bjorn Helgaas wrote: > I think it would be better to have a warning once per device, so if > XYZ device has a problem and we look at the dmesg log, we would find a > single message for device XYZ as a hint. Would that reduce the > nuisance level enough? We would be OK with that. > So I think I did it wrong in fb2659230120 ("PCI: Warn on possible RW1C > corruption for sub-32 bit config writes"). Ratelimiting is the wrong > concept because what we want is a single warning per device, not a > limit on the similar messages for *all* devices, maybe something like > this: > > diff --git a/drivers/pci/access.c b/drivers/pci/access.c > index 79c4a2ef269a..e5f956b7e3b7 100644 > --- a/drivers/pci/access.c > +++ b/drivers/pci/access.c > @@ -160,9 +160,12 @@ int pci_generic_config_write32(struct pci_bus *bus, > unsigned int devfn, >* write happen to have any RW1C (write-one-to-clear) bits set, we >* just inadvertently cleared something we shouldn't have. >*/ > - dev_warn_ratelimited(>dev, "%d-byte config write to > %04x:%02x:%02x.%d offset %#x may corrupt adjacent RW1C bits\n", > + if (!(bus->unsafe_warn & (1 << devfn))) { > + dev_warn(>dev, "%d-byte config write to %04x:%02x:%02x.%d > offset %#x may corrupt adjacent RW1C bits\n", >size, pci_domain_nr(bus), bus->number, >PCI_SLOT(devfn), PCI_FUNC(devfn), where); > + bus->unsafe_warn |= 1 << devfn; > + } As I understand it, devfn is an 8-bit value with five bits of device and three bits of function. So (1 << devfn) is not going to fit in an 8-bit mask. Am I missing something here? (I do admit that my PCI knowledge is not great).
[PATCH 2/3] PCI: iproc: Stop using generic config read/write functions
The pci_generic_config_write32() function will give warning messages whenever writing less than 4 bytes at a time. As there is nothing we can do about this without changing the hardware, the message is just a nuisance. So instead of using the generic functions, use the functions that have already been written for reading/writing the config registers. Signed-off-by: Mark Tomlinson --- drivers/pci/controller/pcie-iproc.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 2c836eede42c..68ecd3050529 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -709,12 +709,13 @@ static int iproc_pcie_config_read32(struct pci_bus *bus, unsigned int devfn, { int ret; struct iproc_pcie *pcie = iproc_data(bus); + int busno = bus->number; iproc_pcie_apb_err_disable(bus, true); if (pcie->iproc_cfg_read) ret = iproc_pcie_config_read(bus, devfn, where, size, val); else - ret = pci_generic_config_read32(bus, devfn, where, size, val); + ret = iproc_pci_raw_config_read32(pcie, busno, devfn, where, size, val); iproc_pcie_apb_err_disable(bus, false); return ret; @@ -724,9 +725,11 @@ static int iproc_pcie_config_write32(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { int ret; + struct iproc_pcie *pcie = iproc_data(bus); + int busno = bus->number; iproc_pcie_apb_err_disable(bus, true); - ret = pci_generic_config_write32(bus, devfn, where, size, val); + ret = iproc_pci_raw_config_write32(pcie, busno, devfn, where, size, val); iproc_pcie_apb_err_disable(bus, false); return ret; -- 2.28.0
[PATCH 3/3] PCI: iproc: Set affinity mask on MSI interrupts
The core interrupt code expects the irq_set_affinity call to update the effective affinity for the interrupt. This was not being done, so update iproc_msi_irq_set_affinity() to do so. Signed-off-by: Mark Tomlinson --- drivers/pci/controller/pcie-iproc-msi.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index 3176ad3ab0e5..908475d27e0e 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -209,15 +209,20 @@ static int iproc_msi_irq_set_affinity(struct irq_data *data, struct iproc_msi *msi = irq_data_get_irq_chip_data(data); int target_cpu = cpumask_first(mask); int curr_cpu; + int ret; curr_cpu = hwirq_to_cpu(msi, data->hwirq); if (curr_cpu == target_cpu) - return IRQ_SET_MASK_OK_DONE; + ret = IRQ_SET_MASK_OK_DONE; + else { + /* steer MSI to the target CPU */ + data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + ret = IRQ_SET_MASK_OK; + } - /* steer MSI to the target CPU */ - data->hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq) + target_cpu; + irq_data_update_effective_affinity(data, cpumask_of(target_cpu)); - return IRQ_SET_MASK_OK; + return ret; } static void iproc_msi_irq_compose_msi_msg(struct irq_data *data, -- 2.28.0
[PATCH 1/3] PCI: iproc: Add bus number parameter to read/write functions
This makes the read/write functions more generic, allowing them to be used from other places. Signed-off-by: Mark Tomlinson --- drivers/pci/controller/pcie-iproc.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 8c7f875acf7f..2c836eede42c 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -660,13 +660,13 @@ static void __iomem *iproc_pcie_bus_map_cfg_bus(struct pci_bus *bus, where); } -static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie, +static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie, int busno, unsigned int devfn, int where, int size, u32 *val) { void __iomem *addr; - addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3); + addr = iproc_pcie_map_cfg_bus(pcie, busno, devfn, where & ~0x3); if (!addr) { *val = ~0; return PCIBIOS_DEVICE_NOT_FOUND; @@ -680,14 +680,14 @@ static int iproc_pci_raw_config_read32(struct iproc_pcie *pcie, return PCIBIOS_SUCCESSFUL; } -static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie, +static int iproc_pci_raw_config_write32(struct iproc_pcie *pcie, int busno, unsigned int devfn, int where, int size, u32 val) { void __iomem *addr; u32 mask, tmp; - addr = iproc_pcie_map_cfg_bus(pcie, 0, devfn, where & ~0x3); + addr = iproc_pcie_map_cfg_bus(pcie, busno, devfn, where & ~0x3); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -793,7 +793,7 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie) } /* make sure we are not in EP mode */ - iproc_pci_raw_config_read32(pcie, 0, PCI_HEADER_TYPE, 1, _type); + iproc_pci_raw_config_read32(pcie, 0, 0, PCI_HEADER_TYPE, 1, _type); if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) { dev_err(dev, "in EP mode, hdr=%#02x\n", hdr_type); return -EFAULT; @@ -803,15 +803,16 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie) #define PCI_BRIDGE_CTRL_REG_OFFSET 0x43c #define PCI_CLASS_BRIDGE_MASK 0x00 #define PCI_CLASS_BRIDGE_SHIFT 8 - iproc_pci_raw_config_read32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET, + iproc_pci_raw_config_read32(pcie, 0, 0, PCI_BRIDGE_CTRL_REG_OFFSET, 4, ); class &= ~PCI_CLASS_BRIDGE_MASK; class |= (PCI_CLASS_BRIDGE_PCI << PCI_CLASS_BRIDGE_SHIFT); - iproc_pci_raw_config_write32(pcie, 0, PCI_BRIDGE_CTRL_REG_OFFSET, + iproc_pci_raw_config_write32(pcie, 0, 0, PCI_BRIDGE_CTRL_REG_OFFSET, 4, class); /* check link status to see if link is active */ - iproc_pci_raw_config_read32(pcie, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, + iproc_pci_raw_config_read32(pcie, 0, 0, + IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, 2, _status); if (link_status & PCI_EXP_LNKSTA_NLW) link_is_active = true; @@ -821,19 +822,19 @@ static int iproc_pcie_check_link(struct iproc_pcie *pcie) #define PCI_TARGET_LINK_SPEED_MASK 0xf #define PCI_TARGET_LINK_SPEED_GEN2 0x2 #define PCI_TARGET_LINK_SPEED_GEN1 0x1 - iproc_pci_raw_config_read32(pcie, 0, + iproc_pci_raw_config_read32(pcie, 0, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2, 4, _ctrl); if ((link_ctrl & PCI_TARGET_LINK_SPEED_MASK) == PCI_TARGET_LINK_SPEED_GEN2) { link_ctrl &= ~PCI_TARGET_LINK_SPEED_MASK; link_ctrl |= PCI_TARGET_LINK_SPEED_GEN1; - iproc_pci_raw_config_write32(pcie, 0, + iproc_pci_raw_config_write32(pcie, 0, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKCTL2, 4, link_ctrl); msleep(100); - iproc_pci_raw_config_read32(pcie, 0, + iproc_pci_raw_config_read32(pcie, 0, 0, IPROC_PCI_EXP_CAP + PCI_EXP_LNKSTA, 2, _status); if (link_status & PCI_EXP_LNKSTA_NLW) -- 2.28.0
[PATCH] ipv6: Support more than 32 MIFS
The function ip6mr_mfc_add() declared an array of ttls. If MAXMIFS is large, this would create a large stack frame. This is fixed, and made more efficient, by passing mf6cc_ifset to ip6mr_update_thresholds(). Signed-off-by: Mark Tomlinson --- As background to this patch, we have MAXMIFS set to 1025 in our kernel. This creates other issues apart from what this patch fixes, but this change does make the IPv4 and IPv6 code look more similar, and reduces total amount of code. Without the double handling of TTLs, I think it is also easier to understand. Hence I thought it could still become part of the main kernel. net/ipv6/ip6mr.c | 20 ++-- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1f4d20e97c07..7123849d201b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -836,7 +836,7 @@ static void ipmr_expire_process(struct timer_list *t) static void ip6mr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, - unsigned char *ttls) + struct if_set *ifset) { int vifi; @@ -845,9 +845,8 @@ static void ip6mr_update_thresholds(struct mr_table *mrt, memset(cache->mfc_un.res.ttls, 255, MAXMIFS); for (vifi = 0; vifi < mrt->maxvif; vifi++) { - if (VIF_EXISTS(mrt, vifi) && - ttls[vifi] && ttls[vifi] < 255) { - cache->mfc_un.res.ttls[vifi] = ttls[vifi]; + if (VIF_EXISTS(mrt, vifi) && IF_ISSET(vifi, ifset)) { + cache->mfc_un.res.ttls[vifi] = 1; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; if (cache->mfc_un.res.maxvif <= vifi) @@ -1406,21 +1405,14 @@ void ip6_mr_cleanup(void) static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, struct mf6cctl *mfc, int mrtsock, int parent) { - unsigned char ttls[MAXMIFS]; struct mfc6_cache *uc, *c; struct mr_mfc *_uc; bool found; - int i, err; + int err; if (mfc->mf6cc_parent >= MAXMIFS) return -ENFILE; - memset(ttls, 255, MAXMIFS); - for (i = 0; i < MAXMIFS; i++) { - if (IF_ISSET(i, >mf6cc_ifset)) - ttls[i] = 1; - } - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ip6mr_cache_find_parent(mrt, >mf6cc_origin.sin6_addr, @@ -1429,7 +1421,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, if (c) { write_lock_bh(_lock); c->_c.mfc_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, >_c, ttls); + ip6mr_update_thresholds(mrt, >_c, >mf6cc_ifset); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(_lock); @@ -1450,7 +1442,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; c->_c.mfc_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, >_c, ttls); + ip6mr_update_thresholds(mrt, >_c, >mf6cc_ifset); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; -- 2.27.0
[PATCH] i2c: mv64xxx: Add bus error recovery
This adds i2c bus recovery to the mv64xxx driver. Implement bus recovery to recover from SCL/SDA stuck low. This uses the generic recovery function, setting the clock/data lines as GPIO pins, and sending 9 clocks to try and recover the bus. Signed-off-by: Mark Tomlinson --- drivers/i2c/busses/i2c-mv64xxx.c | 77 +++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 829b8c98ae51..e58853ba3ef0 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,10 @@ struct mv64xxx_i2c_data { boolirq_clear_inverted; /* Clk div is 2 to the power n, not 2 to the power n + 1 */ boolclk_n_base_0; + struct pinctrl *pinctrl; + struct i2c_bus_recovery_inforinfo; + struct pinctrl_state*pin_default_state; + struct pinctrl_state*pin_gpio_state; }; static struct mv64xxx_i2c_regs mv64xxx_i2c_regs_mv64xxx = { @@ -325,7 +330,8 @@ mv64xxx_i2c_fsm(struct mv64xxx_i2c_data *drv_data, u32 status) drv_data->msg->flags); drv_data->action = MV64XXX_I2C_ACTION_SEND_STOP; mv64xxx_i2c_hw_init(drv_data); - drv_data->rc = -EIO; + i2c_recover_bus(_data->adapter); + drv_data->rc = -EAGAIN; } } @@ -563,6 +569,7 @@ mv64xxx_i2c_wait_for_completion(struct mv64xxx_i2c_data *drv_data) "time_left: %d\n", drv_data->block, (int)time_left); mv64xxx_i2c_hw_init(drv_data); + i2c_recover_bus(_data->adapter); } } else spin_unlock_irqrestore(_data->lock, flags); @@ -872,6 +879,69 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, } #endif /* CONFIG_OF */ +/* + * Switch to bit bang mode to prepare for i2c generic recovery. + */ +static void mv64xxx_i2c_prepare_recovery(struct i2c_adapter *adap) +{ + struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap); + + pinctrl_select_state(drv_data->pinctrl, drv_data->pin_gpio_state); +} + +/* + * Return to normal i2c operation following recovery. + */ +static void mv64xxx_i2c_unprepare_recovery(struct i2c_adapter *adap) +{ + struct mv64xxx_i2c_data *drv_data = i2c_get_adapdata(adap); + + pinctrl_select_state(drv_data->pinctrl, drv_data->pin_default_state); +} + +static int mv64xxx_i2c_init_recovery_info(struct mv64xxx_i2c_data *drv_data, + struct platform_device *pd) +{ + struct i2c_bus_recovery_info *rinfo = _data->rinfo; + struct device *dev = >dev; + + drv_data->pinctrl = devm_pinctrl_get(dev); + if (!drv_data->pinctrl || IS_ERR(drv_data->pinctrl)) { + dev_err(dev, "can't get pinctrl, bus recovery not supported\n"); + return PTR_ERR(drv_data->pinctrl); + } + + drv_data->pin_default_state = pinctrl_lookup_state(drv_data->pinctrl, + PINCTRL_STATE_DEFAULT); + drv_data->pin_gpio_state = pinctrl_lookup_state(drv_data->pinctrl, + "gpio"); + rinfo->scl_gpiod = devm_gpiod_get(dev, "scl", + GPIOD_OUT_HIGH_OPEN_DRAIN); + rinfo->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_IN); + if (PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER || + PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + if (IS_ERR(rinfo->sda_gpiod) || + IS_ERR(rinfo->scl_gpiod) || + IS_ERR(drv_data->pin_default_state) || + IS_ERR(drv_data->pin_gpio_state)) { + dev_dbg(dev, "recovery information incomplete\n"); + return 0; + } + + dev_dbg(dev, "using scl-gpio %d and sda-gpio %d for recovery\n", + rinfo->scl_gpiod ? desc_to_gpio(rinfo->scl_gpiod) : -1, + rinfo->sda_gpiod ? desc_to_gpio(rinfo->sda_gpiod) : -1); + + rinfo->prepare_recovery = mv64xxx_i2c_prepare_recovery; + rinfo->unprepare_recovery = mv64xxx_i2c_unprepare_recovery; + rinfo->recover_bus = i2c_generic_scl_recovery; + drv_data->adapter.bus_recovery_info = rinfo; + + return 0; +} + static int mv64xxx_i2c_probe(struct platform_device *pd) { @@ -939,6 +1009,10 @@ mv64xxx_i2c_probe(struct platform_device *pd) mv64xxx_i2c_hw_init(drv_data); + rc = mv64xxx_i2c_init_recovery_info(drv_data, pd); + if (rc == -EPROBE_DEFER) + goto exit_reset
[PATCH v2] pinctrl: nsp: Set irq handler based on trig type
Rather than always using handle_simple_irq() as the gpio_irq_chip handler, set a more appropriate handler based on the IRQ trigger type requested. This is important for level triggered interrupts which need to be masked during handling. Also, fix the interrupt acknowledge so that it clears only one interrupt instead of all interrupts which are currently active. Finally there is no need to clear the interrupt during the interrupt handler, since the edge-triggered handler will do that for us. Signed-off-by: Mark Tomlinson --- Changes in v2: - Don't perform unnecessary acks. drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index bed0124388c0..a00a42a61a90 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -154,15 +154,9 @@ static irqreturn_t nsp_gpio_irq_handler(int irq, void *data) level &= readl(chip->base + NSP_GPIO_INT_MASK); int_bits = level | event; - for_each_set_bit(bit, _bits, gc->ngpio) { - /* -* Clear the interrupt before invoking the -* handler, so we do not leave any window -*/ - writel(BIT(bit), chip->base + NSP_GPIO_EVENT); + for_each_set_bit(bit, _bits, gc->ngpio) generic_handle_irq( irq_linear_revmap(gc->irq.domain, bit)); - } } return int_bits ? IRQ_HANDLED : IRQ_NONE; @@ -178,7 +172,7 @@ static void nsp_gpio_irq_ack(struct irq_data *d) trigger_type = irq_get_trigger_type(d->irq); if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) - nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val); + writel(val, chip->base + NSP_GPIO_EVENT); } /* @@ -262,6 +256,12 @@ static int nsp_gpio_irq_set_type(struct irq_data *d, unsigned int type) nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio, falling); nsp_set_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio, level_low); + + if (type & IRQ_TYPE_EDGE_BOTH) + irq_set_handler_locked(d, handle_edge_irq); + else + irq_set_handler_locked(d, handle_level_irq); + raw_spin_unlock_irqrestore(>lock, flags); dev_dbg(chip->dev, "gpio:%u level_low:%s falling:%s\n", gpio, @@ -691,7 +691,7 @@ static int nsp_gpio_probe(struct platform_device *pdev) girq->num_parents = 0; girq->parents = NULL; girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_simple_irq; + girq->handler = handle_bad_irq; } ret = devm_gpiochip_add_data(dev, gc, chip); -- 2.27.0
Re: [PATCH] pinctrl: nsp: Set irq handler based on trig type
On Tue, 2020-06-30 at 15:26 -0700, Ray Jui wrote: > - u32 trigger_type; > > > > - trigger_type = irq_get_trigger_type(d->irq); > > - if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) > > - nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val); > > + nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val); > > > I have a question here. I assume writing a bit to this register will > result in clearing that bit, is that true? > > Based on the driver, the 'nsp_gpio_irq_handler' seems to rely on > 'NSP_GPIO_EVENT' register to figure out which GPIO the interrupt is for. > And if so, and if this is cleared here that is invoked before the actual > IRQ handler, how does this work? It seems that this change masked another issue I was having. However, the original code is still wrong as using nsp_set_bit() will do a read/modify/write and clear all events regardless of what gpio is. I have found another issue, where I'm getting many more edge-triggered interrupts than I think I should be, so I'll sort that and send a v2 of this patch.
Re: [PATCH] pinctrl: initialise nsp-mux earlier.
On Tue, 2020-06-30 at 20:14 -0700, Florian Fainelli wrote: > Sorry, it looks like I made a mistake in my testing (or I was lucky), > > and this patch doesn't fix the issue. What is happening is: > > 1) nsp-pinmux driver is registered (arch_initcall). > > 2) nsp-gpio-a driver is registered (arch_initcall_sync). > > 3) of_platform_default_populate_init() is called (also at level > > arch_initcall_sync), which scans the device tree, adds the nsp-gpio-a > > device, runs its probe, and this returns -EPROBE_DEFER with the error > > message. > > 4) Only now nsp-pinmux device is probed. > > > > Changing the 'arch_initcall_sync' to 'device_initcall' in nsp-gpio-a > > ensures that the pinmux is probed first since > > of_platform_default_populate_init() will be called between the two > > register calls, and the error goes away. Is this change acceptable as a > > solution? > > If probe deferral did not work, certainly but it sounds like this is > being done just for the sake of eliminating a round of probe deferral, > is there a functional problem this is fixing? No, I'm just trying to prevent an "error" message appearing in syslog. > > The actual error message in syslog is: > > > > kern.err kernel: gpiochip_add_data_with_key: GPIOs 480..511 > > (1820.gpio) failed to register, -517 > > > > So an end user sees "err" and "failed", and doesn't know what "-517" > > means. > > How about this instead: > > diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c > index 4fa075d49fbc..10d9d0c17c9e 100644 > --- a/drivers/gpio/gpiolib.c > +++ b/drivers/gpio/gpiolib.c > @@ -1818,9 +1818,10 @@ int gpiochip_add_data_with_key(struct gpio_chip > *gc, void *data, > ida_simple_remove(_ida, gdev->id); > err_free_gdev: > /* failures here can mean systems won't boot... */ > - pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, > - gdev->base, gdev->base + gdev->ngpio - 1, > - gc->label ? : "generic", ret); > + if (ret != -EPROBE_DEFER) > + pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", > + __func__, gdev->base, gdev->base + gdev->ngpio - 1, > + gc->label ? : "generic", ret); > kfree(gdev); > return ret; > } > That was one of my thoughts too. I found someone had tried that earlier, but it was rejected: https://patchwork.ozlabs.org/project/linux-gpio/patch/1516566774-1786-1-git-send-email-da...@lechnology.com/
Re: [PATCH] pinctrl: initialise nsp-mux earlier.
On Tue, 2020-06-30 at 15:08 -0700, Ray Jui wrote: > May I know which GPIO driver you are referring to on NSP? Both the iProc > GPIO driver and the NSP GPIO driver are initialized at the level of > 'arch_initcall_sync', which is supposed to be after 'arch_initcall' used > here in the pinmux driver Sorry, it looks like I made a mistake in my testing (or I was lucky), and this patch doesn't fix the issue. What is happening is: 1) nsp-pinmux driver is registered (arch_initcall). 2) nsp-gpio-a driver is registered (arch_initcall_sync). 3) of_platform_default_populate_init() is called (also at level arch_initcall_sync), which scans the device tree, adds the nsp-gpio-a device, runs its probe, and this returns -EPROBE_DEFER with the error message. 4) Only now nsp-pinmux device is probed. Changing the 'arch_initcall_sync' to 'device_initcall' in nsp-gpio-a ensures that the pinmux is probed first since of_platform_default_populate_init() will be called between the two register calls, and the error goes away. Is this change acceptable as a solution? > > though the probe will succeed when the driver is re-initialised, the > > error can be scary to end users. To fix this, change the time the > > Scary to end users? I don't know about that. -EPROBE_DEFER was > introduced exactly for this purpose. Perhaps users need to learn what > -EPROBE_DEFER errno means? The actual error message in syslog is: kern.err kernel: gpiochip_add_data_with_key: GPIOs 480..511 (1820.gpio) failed to register, -517 So an end user sees "err" and "failed", and doesn't know what "-517" means.
[PATCH] pinctrl: initialise nsp-mux earlier.
The GPIO specified in the DTS file references the pinctrl, which is specified after the GPIO. If the GPIO is initialised before pinctrl, an error message for the -EPROBE_DEFER ends up in the kernel log. Even though the probe will succeed when the driver is re-initialised, the error can be scary to end users. To fix this, change the time the pinctrl is probed, so that it is always before the GPIO driver. Signed-off-by: Mark Tomlinson --- drivers/pinctrl/bcm/pinctrl-nsp-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c index f1d60a708815..7586949f83ec 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-mux.c @@ -639,4 +639,4 @@ static int __init nsp_pinmux_init(void) { return platform_driver_register(_pinmux_driver); } -arch_initcall(nsp_pinmux_init); +postcore_initcall(nsp_pinmux_init); -- 2.27.0
[PATCH] pinctrl: nsp: Set irq handler based on trig type
Rather than always using handle_simple_irq() as the gpio_irq_chip handler, set a more appropriate handler based on the IRQ trigger type requested. This is important for level triggered interrupts which need to be masked during handling. Also, always acknowledge the interrupt regardless of whether it is edge or level triggered. Signed-off-by: Mark Tomlinson --- drivers/pinctrl/bcm/pinctrl-nsp-gpio.c | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c index bed0124388c0..349fb384113e 100644 --- a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c @@ -174,11 +174,8 @@ static void nsp_gpio_irq_ack(struct irq_data *d) struct nsp_gpio *chip = gpiochip_get_data(gc); unsigned gpio = d->hwirq; u32 val = BIT(gpio); - u32 trigger_type; - trigger_type = irq_get_trigger_type(d->irq); - if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) - nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val); + nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val); } /* @@ -262,6 +259,12 @@ static int nsp_gpio_irq_set_type(struct irq_data *d, unsigned int type) nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio, falling); nsp_set_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio, level_low); + + if (type & IRQ_TYPE_EDGE_BOTH) + irq_set_handler_locked(d, handle_edge_irq); + else + irq_set_handler_locked(d, handle_level_irq); + raw_spin_unlock_irqrestore(>lock, flags); dev_dbg(chip->dev, "gpio:%u level_low:%s falling:%s\n", gpio, @@ -691,7 +694,7 @@ static int nsp_gpio_probe(struct platform_device *pdev) girq->num_parents = 0; girq->parents = NULL; girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_simple_irq; + girq->handler = handle_bad_irq; } ret = devm_gpiochip_add_data(dev, gc, chip); -- 2.27.0
Re: [PATCH 4/5] spi: bcm-qspi: Make multiple data blocks interrupt-driven
On Mon, 2020-06-15 at 15:32 +0100, Mark Brown wrote: > On Mon, Jun 15, 2020 at 04:05:56PM +1200, Mark Tomlinson wrote: > > > When needing to send/receive data in small chunks, make this interrupt > > driven rather than waiting for a completion event for each small section > > of data. > > Again was this done for a reason and if so do we understand why doing > this from interrupt context is safe - how long can the interrupts be > when stuffing the FIFO from interrupt context? As I'm porting a Broadcom patch, I'm hoping someone else can add something to this. From the history it appears there was a hard limit (no small chunks), and this was changed to doing it in chunks with patch 345309fa7c0c92, apparently to improve performance. I believe this change further improves performance, but as the patch arrived without any documentation, I'm not certain. > > @@ -731,12 +733,14 @@ static inline u16 read_rxram_slot_u16(struct bcm_qspi > > *qspi, int slot) > > ((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8); > > } > > > > -static void read_from_hw(struct bcm_qspi *qspi, int slots) > > +static void read_from_hw(struct bcm_qspi *qspi) > > { > > Things might be clearer if this refactoring were split out into a > separate patch. Done. > > @@ -960,24 +966,21 @@ static int bcm_qspi_transfer_one(struct spi_master > > *master, > > struct spi_transfer *trans) > > { > > struct bcm_qspi *qspi = spi_master_get_devdata(master); > > - int slots; > > - unsigned long timeo = msecs_to_jiffies(100); > > + unsigned long timeo = msecs_to_jiffies(1000); > > That's a randomly chosen value - if we're now doing the entire transfer > then we should be trying to estimate the length of time the transfer > will take, for a very large transfer on a slow bus it's possible that > even a second won't be enough. > Again, the value came from Broadcom. Using the data length as an estimate sounds like a good idea. > > - complete(>mspi_done); > > + > > + read_from_hw(qspi); > > + > > + if (qspi->trans_pos.trans) { > > + write_to_hw(qspi); > > + } else { > > + complete(>mspi_done); > > + spi_finalize_current_transfer(qspi->master); > > + } > > + > > This is adding a spi_finalize_current_transfer() which we didn't have > before, and still leaving us doing cleanup work in the driver in another > thread. This is confused, the driver should only need to finalize the > transfer explicitly if it returned a timeout from transfer_one() but > nothing's changed there. I can remove the call to spi_finalize_current_transfer() from this patch. I'll try to check what does happen in the timeout case.
[PATCH 1/5] spi: bcm-qspi: Add support for setting BSPI clock
On iProc devices (unlike previous BCM SoCs) the clock rate of the SPI can be set. This patch adds the appropriate code for setting that. Reviewed-by: Callum Sinclair Reviewed-by: Chris Packham Signed-off-by: Mark Tomlinson --- drivers/spi/spi-bcm-qspi.c | 34 ++ 1 file changed, 34 insertions(+) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 681d09085175..8fc5b9b3757b 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -117,6 +117,13 @@ #define MSPI_MSPI_STATUS_SPIF BIT(0) +#define CRU_CTRL_REG 0x0 +#define QSPI_CLK_SEL_25M 0x00 +#define QSPI_CLK_SEL_50M 0x02 +#define QSPI_CLK_SEL_31M25 0x04 +#define QSPI_CLK_SEL_62M5 0x06 +#define QSPI_CLK_SEL_MASK 0x06 + #define INTR_BASE_BIT_SHIFT0x02 #define INTR_COUNT 0x07 @@ -170,6 +177,7 @@ enum base_type { MSPI, BSPI, CHIP_SELECT, + CRU_CTRL, BASEMAX, }; @@ -625,6 +633,7 @@ static void bcm_qspi_update_parms(struct bcm_qspi *qspi, static int bcm_qspi_setup(struct spi_device *spi) { struct bcm_qspi_parms *xp; + struct bcm_qspi *qspi = spi_master_get_devdata(spi->master); if (spi->bits_per_word > 16) return -EINVAL; @@ -639,6 +648,21 @@ static int bcm_qspi_setup(struct spi_device *spi) xp->speed_hz = spi->max_speed_hz; xp->mode = spi->mode; + if (qspi->base[CRU_CTRL]) { + u32 tmp = bcm_qspi_read(qspi, CRU_CTRL, CRU_CTRL_REG); + + /* Set BSPI clock rate */ + tmp &= ~QSPI_CLK_SEL_MASK; + if (spi->max_speed_hz >= 6250) + tmp |= QSPI_CLK_SEL_62M5; + else if (spi->max_speed_hz >= 5000) + tmp |= QSPI_CLK_SEL_50M; + else if (spi->max_speed_hz >= 3125) + tmp |= QSPI_CLK_SEL_31M25; + /* default is 25MHz */ + bcm_qspi_write(qspi, CRU_CTRL, CRU_CTRL_REG, tmp); + } + if (spi->bits_per_word) xp->bits_per_word = spi->bits_per_word; else @@ -1459,6 +1483,16 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi->soc_intc = NULL; } + /* iProc BSPI clock is set through CRU control */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cru_ctrl"); + if (res) { + qspi->base[CRU_CTRL] = devm_ioremap_resource(dev, res); + if (IS_ERR(qspi->base[CRU_CTRL])) { + ret = PTR_ERR(qspi->base[CRU_CTRL]); + goto qspi_probe_err; + } + } + ret = clk_prepare_enable(qspi->clk); if (ret) { dev_err(dev, "failed to prepare clock\n"); -- 2.27.0
[PATCH 4/5] spi: bcm-qspi: Make multiple data blocks interrupt-driven
When needing to send/receive data in small chunks, make this interrupt driven rather than waiting for a completion event for each small section of data. Reviewed-by: Callum Sinclair Reviewed-by: Chris Packham Signed-off-by: Mark Tomlinson --- drivers/spi/spi-bcm-qspi.c | 44 -- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index ce30ebf27f06..0cc51bcda300 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -200,12 +200,14 @@ struct bcm_qspi_dev_id { struct qspi_trans { struct spi_transfer *trans; int byte; + int slots; bool mspi_last_trans; }; struct bcm_qspi { struct platform_device *pdev; struct spi_master *master; + struct spi_device *spi_dev; struct clk *clk; u32 base_clk; u32 max_speed_hz; @@ -731,12 +733,14 @@ static inline u16 read_rxram_slot_u16(struct bcm_qspi *qspi, int slot) ((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8); } -static void read_from_hw(struct bcm_qspi *qspi, int slots) +static void read_from_hw(struct bcm_qspi *qspi) { struct qspi_trans tp; - int slot; + int slot, slots; bcm_qspi_disable_bspi(qspi); + tp = qspi->trans_pos; + slots = tp.slots; if (slots > MSPI_NUM_CDRAM) { /* should never happen */ @@ -744,8 +748,6 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots) return; } - tp = qspi->trans_pos; - for (slot = 0; slot < slots; slot++) { if (tp.trans->rx_buf) { if (tp.trans->bits_per_word <= 8) { @@ -803,11 +805,12 @@ static inline void write_cdram_slot(struct bcm_qspi *qspi, int slot, u32 val) } /* Return number of slots written */ -static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi) +static int write_to_hw(struct bcm_qspi *qspi) { struct qspi_trans tp; int slot = 0, tstatus = 0; u32 mspi_cdram = 0; + struct spi_device *spi = qspi->spi_dev; bcm_qspi_disable_bspi(qspi); tp = qspi->trans_pos; @@ -846,6 +849,9 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi) slot++; } + /* save slot number for read_from_hw() */ + qspi->trans_pos.slots = slot; + if (!slot) { dev_err(>pdev->dev, "%s: no data to send?", __func__); goto done; @@ -960,24 +966,21 @@ static int bcm_qspi_transfer_one(struct spi_master *master, struct spi_transfer *trans) { struct bcm_qspi *qspi = spi_master_get_devdata(master); - int slots; - unsigned long timeo = msecs_to_jiffies(100); + unsigned long timeo = msecs_to_jiffies(1000); if (!spi->cs_gpiod) bcm_qspi_chip_select(qspi, spi->chip_select); qspi->trans_pos.trans = trans; qspi->trans_pos.byte = 0; + qspi->spi_dev = spi; - while (qspi->trans_pos.byte < trans->len) { - reinit_completion(>mspi_done); + reinit_completion(>mspi_done); - slots = write_to_hw(qspi, spi); - if (!wait_for_completion_timeout(>mspi_done, timeo)) { - dev_err(>pdev->dev, "timeout waiting for MSPI\n"); - return -ETIMEDOUT; - } + write_to_hw(qspi); - read_from_hw(qspi, slots); + if (!wait_for_completion_timeout(>mspi_done, timeo)) { + dev_err(>pdev->dev, "timeout waiting for MSPI\n"); + return -ETIMEDOUT; } bcm_qspi_enable_bspi(qspi); @@ -1092,7 +1095,16 @@ static irqreturn_t bcm_qspi_mspi_l2_isr(int irq, void *dev_id) bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status); if (qspi->soc_intc) soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_DONE); - complete(>mspi_done); + + read_from_hw(qspi); + + if (qspi->trans_pos.trans) { + write_to_hw(qspi); + } else { + complete(>mspi_done); + spi_finalize_current_transfer(qspi->master); + } + return IRQ_HANDLED; } -- 2.27.0
[PATCH 2/5] spi: bcm-qspi: Improve debug reading SPI data
This patch prevents device debug when data is not read from hardware (i.e. when there is no receive buffer). Reviewed-by: Callum Sinclair Reviewed-by: Chris Packham Signed-off-by: Mark Tomlinson --- drivers/spi/spi-bcm-qspi.c | 31 --- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 8fc5b9b3757b..92e04d24359f 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -749,21 +749,22 @@ static void read_from_hw(struct bcm_qspi *qspi, int slots) tp = qspi->trans_pos; for (slot = 0; slot < slots; slot++) { - if (tp.trans->bits_per_word <= 8) { - u8 *buf = tp.trans->rx_buf; - - if (buf) - buf[tp.byte] = read_rxram_slot_u8(qspi, slot); - dev_dbg(>pdev->dev, "RD %02x\n", - buf ? buf[tp.byte] : 0x0); - } else { - u16 *buf = tp.trans->rx_buf; - - if (buf) - buf[tp.byte / 2] = read_rxram_slot_u16(qspi, - slot); - dev_dbg(>pdev->dev, "RD %04x\n", - buf ? buf[tp.byte / 2] : 0x0); + if (tp.trans->rx_buf) { + if (tp.trans->bits_per_word <= 8) { + u8 *buf = tp.trans->rx_buf; + + buf[tp.byte] = + read_rxram_slot_u8(qspi, slot); + dev_dbg(>pdev->dev, "RD %02x\n", + buf[tp.byte]); + } else { + u16 *buf = tp.trans->rx_buf; + + buf[tp.byte / 2] = + read_rxram_slot_u16(qspi, slot); + dev_dbg(>pdev->dev, "RD %04x\n", + buf[tp.byte / 2]); + } } update_qspi_trans_byte_count(qspi, , -- 2.27.0
[PATCH 0/5] Improvements to spi-bcm-qspi
This series of patches came from a single large Broadcom patch that implements drivers for a number of their integrated switch chips. Mostly this is just splitting the qspi driver into smaller parts and doesn't include much original from me. Mark Tomlinson (5): spi: bcm-qspi: Add support for setting BSPI clock spi: bcm-qspi: Improve debug reading SPI data spi: bcm-qspi: Do not split transfers into small chunks spi: bcm-qspi: Make multiple data blocks interrupt-driven spi: bcm-qspi: Improve interrupt handling drivers/spi/spi-bcm-qspi.c | 189 ++--- drivers/spi/spi-bcm-qspi.h | 5 +- 2 files changed, 115 insertions(+), 79 deletions(-) -- 2.27.0
[PATCH 3/5] spi: bcm-qspi: Do not split transfers into small chunks
Instead of splitting transfers into smaller parts, just perform the operation that the higher level asked for. Reviewed-by: Callum Sinclair Reviewed-by: Chris Packham Signed-off-by: Mark Tomlinson --- drivers/spi/spi-bcm-qspi.c | 69 +++--- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 92e04d24359f..ce30ebf27f06 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -78,8 +78,6 @@ #define BSPI_BPP_MODE_SELECT_MASK BIT(8) #define BSPI_BPP_ADDR_SELECT_MASK BIT(16) -#define BSPI_READ_LENGTH 256 - /* MSPI register offsets */ #define MSPI_SPCR0_LSB 0x000 #define MSPI_SPCR0_MSB 0x004 @@ -888,9 +886,9 @@ static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi, const struct spi_mem_op *op) { struct bcm_qspi *qspi = spi_master_get_devdata(spi->master); - u32 addr = 0, len, rdlen, len_words, from = 0; + u32 addr = 0, len, len_words, from = 0; int ret = 0; - unsigned long timeo = msecs_to_jiffies(100); + unsigned long timeo = msecs_to_jiffies(1500); struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc; if (bcm_qspi_bspi_ver_three(qspi)) @@ -925,47 +923,34 @@ static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi, * into RAF buffer read lengths */ len = op->data.nbytes; + reinit_completion(>bspi_done); + bcm_qspi_enable_bspi(qspi); + len_words = (len + 3) >> 2; + qspi->bspi_rf_op = op; + qspi->bspi_rf_op_status = 0; qspi->bspi_rf_op_idx = 0; + qspi->bspi_rf_op_len = len; + dev_dbg(>pdev->dev, "bspi xfr addr 0x%x len 0x%x", addr, len); - do { - if (len > BSPI_READ_LENGTH) - rdlen = BSPI_READ_LENGTH; - else - rdlen = len; - - reinit_completion(>bspi_done); - bcm_qspi_enable_bspi(qspi); - len_words = (rdlen + 3) >> 2; - qspi->bspi_rf_op = op; - qspi->bspi_rf_op_status = 0; - qspi->bspi_rf_op_len = rdlen; - dev_dbg(>pdev->dev, - "bspi xfr addr 0x%x len 0x%x", addr, rdlen); - bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr); - bcm_qspi_write(qspi, BSPI, BSPI_RAF_NUM_WORDS, len_words); - bcm_qspi_write(qspi, BSPI, BSPI_RAF_WATERMARK, 0); - if (qspi->soc_intc) { - /* -* clear soc MSPI and BSPI interrupts and enable -* BSPI interrupts. -*/ - soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_BSPI_DONE); - soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, true); - } - - /* Must flush previous writes before starting BSPI operation */ - mb(); - bcm_qspi_bspi_lr_start(qspi); - if (!wait_for_completion_timeout(>bspi_done, timeo)) { - dev_err(>pdev->dev, "timeout waiting for BSPI\n"); - ret = -ETIMEDOUT; - break; - } + bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr); + bcm_qspi_write(qspi, BSPI, BSPI_RAF_NUM_WORDS, len_words); + bcm_qspi_write(qspi, BSPI, BSPI_RAF_WATERMARK, 0); + if (qspi->soc_intc) { + /* +* clear soc MSPI and BSPI interrupts and enable +* BSPI interrupts. +*/ + soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_BSPI_DONE); + soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, true); + } - /* set msg return length */ - addr += rdlen; - len -= rdlen; - } while (len); + /* Must flush previous writes before starting BSPI operation */ + mb(); + bcm_qspi_bspi_lr_start(qspi); + if (!wait_for_completion_timeout(>bspi_done, timeo)) { + dev_err(>pdev->dev, "timeout waiting for BSPI\n"); + ret = -ETIMEDOUT; + } return ret; } -- 2.27.0
[PATCH 5/5] spi: bcm-qspi: Improve interrupt handling
Acknowledge interrupts correctly and add support for fifo-full interrupt, distinguishing it from the done interrupt. Reviewed-by: Callum Sinclair Reviewed-by: Chris Packham Signed-off-by: Mark Tomlinson --- drivers/spi/spi-bcm-qspi.c | 11 ++- drivers/spi/spi-bcm-qspi.h | 5 - 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 0cc51bcda300..3753eff8a154 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1123,6 +1123,8 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id) if (qspi->bspi_rf_op_len == 0) { qspi->bspi_rf_op = NULL; if (qspi->soc_intc) { + /* Ack BSPI done interrupt */ + soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_DONE); /* disable soc BSPI interrupt */ soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, false); @@ -1134,11 +1136,10 @@ static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id) bcm_qspi_bspi_lr_clear(qspi); else bcm_qspi_bspi_flush_prefetch_buffers(qspi); - } - - if (qspi->soc_intc) - /* clear soc BSPI interrupt */ - soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_DONE); + } else if (qspi->soc_intc) + /* Ack FIFO full interrupt */ + soc_intc->bcm_qspi_int_ack(soc_intc, + BSPI_FIFO_FULL); } status &= INTR_BSPI_LR_SESSION_DONE_MASK; diff --git a/drivers/spi/spi-bcm-qspi.h b/drivers/spi/spi-bcm-qspi.h index 01aec6460108..b68e275bc721 100644 --- a/drivers/spi/spi-bcm-qspi.h +++ b/drivers/spi/spi-bcm-qspi.h @@ -48,7 +48,8 @@ enum { MSPI_DONE = 0x1, BSPI_DONE = 0x2, BSPI_ERR = 0x4, - MSPI_BSPI_DONE = 0x7 + MSPI_BSPI_DONE = 0x7, + BSPI_FIFO_FULL = 0x8 }; struct bcm_qspi_soc_intc { @@ -84,6 +85,8 @@ static inline u32 get_qspi_mask(int type) return INTR_MSPI_DONE_MASK; case BSPI_DONE: return BSPI_LR_INTERRUPTS_ALL; + case BSPI_FIFO_FULL: + return INTR_BSPI_LR_FULLNESS_REACHED_MASK; case MSPI_BSPI_DONE: return QSPI_INTERRUPTS_ALL; case BSPI_ERR: -- 2.27.0
Re: [PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()
On 27/11/18 11:11 PM, Greg KH wrote: > On Tue, Nov 27, 2018 at 09:57:23AM +1300, Mark Tomlinson wrote: >> sysrq_do_reset() is called in softirq context, so it cannot call >> sync() directly. Instead, call orderly_reboot(), which creates a work >> item to run /sbin/reboot, or do emergency_sync and restart if the >> command fails. >> >> Signed-off-by: Mark Tomlinson >> --- >> drivers/tty/sysrq.c | 3 +-- >> 1 file changed, 1 insertion(+), 2 deletions(-) >> >> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c >> index ad1ee5d01b53..f2ca32c1ad7c 100644 >> --- a/drivers/tty/sysrq.c >> +++ b/drivers/tty/sysrq.c >> @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t) >> >> state->reset_requested = true; >> >> -ksys_sync(); >> -kernel_restart(NULL); >> +orderly_reboot(); > > Is this something new? Why haven't we had reports of this failing in > the past? Or has something changed recently to cause this to now be > needed? > > thanks, > > greg k-h > As far as I can tell, sysrq_do_reset() is only used when setting the "keyset" property in "/chosen/linux,sysrq-reset-seq" (which is what I want to do). No DTS file in the kernel tree sets this, so I don't know if anyone else is using this. I am wondering whether it ever worked. A reboot does occur after a kernel panic, so if the console is not available, it may appear to have worked. Pressing the standard 'sysrq' key followed by 'b'(reboot) instead calls sysrq_handle_reboot(), which does not call sync at all, but emergency_restart(). Also, the 's'(sync) key calls emergency_sync(), which performs the sync via a work queue.
Re: [PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()
On 27/11/18 11:11 PM, Greg KH wrote: > On Tue, Nov 27, 2018 at 09:57:23AM +1300, Mark Tomlinson wrote: >> sysrq_do_reset() is called in softirq context, so it cannot call >> sync() directly. Instead, call orderly_reboot(), which creates a work >> item to run /sbin/reboot, or do emergency_sync and restart if the >> command fails. >> >> Signed-off-by: Mark Tomlinson >> --- >> drivers/tty/sysrq.c | 3 +-- >> 1 file changed, 1 insertion(+), 2 deletions(-) >> >> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c >> index ad1ee5d01b53..f2ca32c1ad7c 100644 >> --- a/drivers/tty/sysrq.c >> +++ b/drivers/tty/sysrq.c >> @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t) >> >> state->reset_requested = true; >> >> -ksys_sync(); >> -kernel_restart(NULL); >> +orderly_reboot(); > > Is this something new? Why haven't we had reports of this failing in > the past? Or has something changed recently to cause this to now be > needed? > > thanks, > > greg k-h > As far as I can tell, sysrq_do_reset() is only used when setting the "keyset" property in "/chosen/linux,sysrq-reset-seq" (which is what I want to do). No DTS file in the kernel tree sets this, so I don't know if anyone else is using this. I am wondering whether it ever worked. A reboot does occur after a kernel panic, so if the console is not available, it may appear to have worked. Pressing the standard 'sysrq' key followed by 'b'(reboot) instead calls sysrq_handle_reboot(), which does not call sync at all, but emergency_restart(). Also, the 's'(sync) key calls emergency_sync(), which performs the sync via a work queue.
[PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()
sysrq_do_reset() is called in softirq context, so it cannot call sync() directly. Instead, call orderly_reboot(), which creates a work item to run /sbin/reboot, or do emergency_sync and restart if the command fails. Signed-off-by: Mark Tomlinson --- drivers/tty/sysrq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index ad1ee5d01b53..f2ca32c1ad7c 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t) state->reset_requested = true; - ksys_sync(); - kernel_restart(NULL); + orderly_reboot(); } static void sysrq_handle_reset_request(struct sysrq_state *state) -- 2.19.2
[PATCH RESEND] tty/sysrq: Do not call sync directly from sysrq_do_reset()
sysrq_do_reset() is called in softirq context, so it cannot call sync() directly. Instead, call orderly_reboot(), which creates a work item to run /sbin/reboot, or do emergency_sync and restart if the command fails. Signed-off-by: Mark Tomlinson --- drivers/tty/sysrq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index ad1ee5d01b53..f2ca32c1ad7c 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -660,8 +660,7 @@ static void sysrq_do_reset(struct timer_list *t) state->reset_requested = true; - ksys_sync(); - kernel_restart(NULL); + orderly_reboot(); } static void sysrq_handle_reset_request(struct sysrq_state *state) -- 2.19.2
[PATCH 1/1] JFFS2: Less locking when reading directory entries
At startup, reading a directory (for example to do an ls), requires finding information on every file. To support JFFS2 recovery from partially written blocks, the JFFS2 driver must scan all data blocks to verify the checksums are correct just to be able to correctly report the length of a file. This will take some time, and will be dependent on the amount of data on the filesystem. What makes this worse is that any path lookup will lock the dentry cache to add the new entry. The JFFS2 driver then spends time finding the file information (reading the entire file), before it returns the new dentry information allowing the cache to be unlocked. During this time, no other files in the same directory can be opened or even tested for existence. However, there is no need for the dentry cache to be locked for the scan of the file. The JFFS2 driver already locks the file, so the file will not be deleted or modified. It also ensures that if another process tries to scan the same file, the second process will be blocked and the scan only proceed once. To make the scan occur without locking the cache, a new vfs call has been added which allows a filesystem to scan the file, but not return anything. When the lookup occurs after this, the JFFS2 driver will find this information and can quickly return the filled-in dentry. Signed-off-by: Mark Tomlinson --- fs/jffs2/dir.c | 41 +-- fs/namei.c | 63 +++--- include/linux/fs.h | 1 + 3 files changed, 85 insertions(+), 20 deletions(-) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 1ba5c97..69c0ec4 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *); static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t); static int jffs2_rename (struct inode *, struct dentry *, struct inode *, struct dentry *); +static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name); const struct file_operations jffs2_dir_operations = { @@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations = { .create = jffs2_create, .lookup = jffs2_lookup, + .prescan = jffs2_prescan, .link = jffs2_link, .unlink = jffs2_unlink, .symlink = jffs2_symlink, @@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations = and we use the same hash function as the dentries. Makes this nice and simple */ -static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, - unsigned int flags) +/* The prescan function does not have a dentry to fill in, so create this common function + * which is just passed the name and the inode for the directory. + * This function is very similar to the original jffs2_lookup, except for the arguments + * and the fact that the dentry (now not passed) is not updated. + */ +static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr *d_name) { struct jffs2_inode_info *dir_f; struct jffs2_full_dirent *fd = NULL, *fd_list; @@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, jffs2_dbg(1, "jffs2_lookup()\n"); - if (target->d_name.len > JFFS2_MAX_NAME_LEN) + if (d_name->len > JFFS2_MAX_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); dir_f = JFFS2_INODE_INFO(dir_i); @@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, mutex_lock(_f->sem); /* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */ - for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= target->d_name.hash; fd_list = fd_list->next) { - if (fd_list->nhash == target->d_name.hash && + for (fd_list = dir_f->dents; fd_list && fd_list->nhash <= d_name->hash; fd_list = fd_list->next) { + if (fd_list->nhash == d_name->hash && (!fd || fd_list->version > fd->version) && - strlen(fd_list->name) == target->d_name.len && - !strncmp(fd_list->name, target->d_name.name, target->d_name.len)) { + strlen(fd_list->name) == d_name->len && + !strncmp(fd_list->name, d_name->name, d_name->len)) { fd = fd_list; } } @@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, if (IS_ERR(inode)) pr_warn("iget() failed for ino #%u\n", ino); } + return inode; +} + +/* Fill in an inode, and store the information in
[PATCH 0/1] JFFS2: Less locking when reading directory entries
I have posted this before, but have extended the patch into a few more functions. The intent of the code is as before -- to improve JFFS2 lookups by not locking i_mutex for long periods when files are not in cache. For our embedded environment, we see a *five second* improvement in boot time. This patch is an attempt to improve the speed of JFFS2 at startup. Our particular problem is that we have a 30MB file on NOR flash which takes about five seconds to read and test the data CRCs. During this time access to other files in the same directory is blocked, due to parent->d_inode->i_mutex being locked. This patch solves this problem by adding a 'pre-lookup' call down to JFFS2, which can be called without this mutex held. When the actual lookup is performed, the results are in JFFS2's cache, and the dentry can be filled in quickly. However, given that I do not have experience at Linux filesystem code, I can't be sure that this is a correct solution, or that there isn't a better way of achieving what I'm trying to do. I feel there must be a way to do this without creating a new VFS function call. I suspect other filesystems could benefit from this too, as a lot of them call the same d_splice_alias() function to fill in the dentry. JFFS2 already seems to have all the lower-level locks that are needed for this to work; I don't know if that's true in other filesystems which could be relying on the directory's i_mutex being locked. Because JFFS2 needs to walk the entire file, there are big gains to be made here; other filesystems may gain little to nothing. I'm not expecting that this patch will get applied as-is, but please let me know if there is any merit to it, whether it should work, and what still needs to be done to if this is to be made part of the kernel. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1] JFFS2: Less locking when reading directory entries
At startup, reading a directory (for example to do an ls), requires finding information on every file. To support JFFS2 recovery from partially written blocks, the JFFS2 driver must scan all data blocks to verify the checksums are correct just to be able to correctly report the length of a file. This will take some time, and will be dependent on the amount of data on the filesystem. What makes this worse is that any path lookup will lock the dentry cache to add the new entry. The JFFS2 driver then spends time finding the file information (reading the entire file), before it returns the new dentry information allowing the cache to be unlocked. During this time, no other files in the same directory can be opened or even tested for existence. However, there is no need for the dentry cache to be locked for the scan of the file. The JFFS2 driver already locks the file, so the file will not be deleted or modified. It also ensures that if another process tries to scan the same file, the second process will be blocked and the scan only proceed once. To make the scan occur without locking the cache, a new vfs call has been added which allows a filesystem to scan the file, but not return anything. When the lookup occurs after this, the JFFS2 driver will find this information and can quickly return the filled-in dentry. Signed-off-by: Mark Tomlinson mark.tomlin...@alliedtelesis.co.nz --- fs/jffs2/dir.c | 41 +-- fs/namei.c | 63 +++--- include/linux/fs.h | 1 + 3 files changed, 85 insertions(+), 20 deletions(-) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 1ba5c97..69c0ec4 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -36,6 +36,7 @@ static int jffs2_rmdir (struct inode *,struct dentry *); static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t); static int jffs2_rename (struct inode *, struct dentry *, struct inode *, struct dentry *); +static void jffs2_prescan(struct inode *dir_i, struct qstr *d_name); const struct file_operations jffs2_dir_operations = { @@ -51,6 +52,7 @@ const struct inode_operations jffs2_dir_inode_operations = { .create = jffs2_create, .lookup = jffs2_lookup, + .prescan = jffs2_prescan, .link = jffs2_link, .unlink = jffs2_unlink, .symlink = jffs2_symlink, @@ -74,8 +76,12 @@ const struct inode_operations jffs2_dir_inode_operations = and we use the same hash function as the dentries. Makes this nice and simple */ -static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, - unsigned int flags) +/* The prescan function does not have a dentry to fill in, so create this common function + * which is just passed the name and the inode for the directory. + * This function is very similar to the original jffs2_lookup, except for the arguments + * and the fact that the dentry (now not passed) is not updated. + */ +static struct inode *jffs2_lookup_common(struct inode *dir_i, struct qstr *d_name) { struct jffs2_inode_info *dir_f; struct jffs2_full_dirent *fd = NULL, *fd_list; @@ -84,7 +90,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, jffs2_dbg(1, jffs2_lookup()\n); - if (target-d_name.len JFFS2_MAX_NAME_LEN) + if (d_name-len JFFS2_MAX_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); dir_f = JFFS2_INODE_INFO(dir_i); @@ -92,11 +98,11 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, mutex_lock(dir_f-sem); /* NB: The 2.2 backport will need to explicitly check for '.' and '..' here */ - for (fd_list = dir_f-dents; fd_list fd_list-nhash = target-d_name.hash; fd_list = fd_list-next) { - if (fd_list-nhash == target-d_name.hash + for (fd_list = dir_f-dents; fd_list fd_list-nhash = d_name-hash; fd_list = fd_list-next) { + if (fd_list-nhash == d_name-hash (!fd || fd_list-version fd-version) - strlen(fd_list-name) == target-d_name.len - !strncmp(fd_list-name, target-d_name.name, target-d_name.len)) { + strlen(fd_list-name) == d_name-len + !strncmp(fd_list-name, d_name-name, d_name-len)) { fd = fd_list; } } @@ -108,6 +114,27 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, if (IS_ERR(inode)) pr_warn(iget() failed for ino #%u\n, ino); } + return inode; +} + +/* Fill in an inode, and store the information in cache. This allows a + * subsequent jffs2_lookup() call to proceed quickly, which is useful + * since the jffs2_lookup() call will have the directory entry cache + * locked. + */ +static void
[PATCH 0/1] JFFS2: Less locking when reading directory entries
I have posted this before, but have extended the patch into a few more functions. The intent of the code is as before -- to improve JFFS2 lookups by not locking i_mutex for long periods when files are not in cache. For our embedded environment, we see a *five second* improvement in boot time. This patch is an attempt to improve the speed of JFFS2 at startup. Our particular problem is that we have a 30MB file on NOR flash which takes about five seconds to read and test the data CRCs. During this time access to other files in the same directory is blocked, due to parent-d_inode-i_mutex being locked. This patch solves this problem by adding a 'pre-lookup' call down to JFFS2, which can be called without this mutex held. When the actual lookup is performed, the results are in JFFS2's cache, and the dentry can be filled in quickly. However, given that I do not have experience at Linux filesystem code, I can't be sure that this is a correct solution, or that there isn't a better way of achieving what I'm trying to do. I feel there must be a way to do this without creating a new VFS function call. I suspect other filesystems could benefit from this too, as a lot of them call the same d_splice_alias() function to fill in the dentry. JFFS2 already seems to have all the lower-level locks that are needed for this to work; I don't know if that's true in other filesystems which could be relying on the directory's i_mutex being locked. Because JFFS2 needs to walk the entire file, there are big gains to be made here; other filesystems may gain little to nothing. I'm not expecting that this patch will get applied as-is, but please let me know if there is any merit to it, whether it should work, and what still needs to be done to if this is to be made part of the kernel. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/