date:20171108

[PATCH net-next] net: mvpp2: fix GOP statistics loop start and stop conditions

2017-11-08 Thread Miquel Raynal

GOP statistics from all ports of one instance of the driver are gathered
with one work recalled in loop in a workqueue. The loop is started when
a port is up, and stopped when a port is down. This last condition is
obviously wrong.

Fix this by having a work per port. This way, starting and stoping it
when the port is up or down will be fine, while minimizing unnecessary
CPU usage.

Fixes: 118d6298f6f0 ("net: mvpp2: add ethtool GOP statistics")
Reported-by: Stefan Chulski 
Signed-off-by: Miquel Raynal 
---

Hi David,

This is the required fix for 118d6298f6f0 ("net: mvpp2: add ethtool GOP
statistics") that starts the statistics gathering per port, avoiding
the need to know if at least one port is still up or not before stopping
the loop. In the mean time it also avoid reading statistics of ports
being down.

Thank you,
Miquèl


 drivers/net/ethernet/marvell/mvpp2.c | 62 +---
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c 
b/drivers/net/ethernet/marvell/mvpp2.c
index aa38bca597f2..89bc47d2ffac 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -885,9 +885,7 @@ struct mvpp2 {
/* Maximum number of RXQs per port */
unsigned int max_port_rxqs;
 
-   /* Workqueue to gather hardware statistics with its lock */
-   struct mutex gather_stats_lock;
-   struct delayed_work stats_work;
+   /* Workqueue to gather hardware statistics */
char queue_name[30];
struct workqueue_struct *stats_queue;
 };
@@ -955,6 +953,10 @@ struct mvpp2_port {
struct mvpp2_pcpu_stats __percpu *stats;
u64 *ethtool_stats;
 
+   /* Per-port work and its lock to gather hardware statistics */
+   struct mutex gather_stats_lock;
+   struct delayed_work stats_work;
+
phy_interface_t phy_interface;
struct device_node *phy_node;
struct phy *comphy;
@@ -4895,32 +4897,25 @@ static void mvpp2_ethtool_get_strings(struct net_device 
*netdev, u32 sset,
 static void mvpp2_gather_hw_statistics(struct work_struct *work)
 {
struct delayed_work *del_work = to_delayed_work(work);
-   struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work);
-   struct mvpp2_port *port;
+   struct mvpp2_port *port = container_of(del_work, struct mvpp2_port,
+  stats_work);
u64 *pstats;
-   int i, j;
-
-   mutex_lock(&priv->gather_stats_lock);
+   int i;
 
-   for (i = 0; i < priv->port_count; i++) {
-   if (!priv->port_list[i])
-   continue;
+   mutex_lock(&port->gather_stats_lock);
 
-   port = priv->port_list[i];
-   pstats = port->ethtool_stats;
-   for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_regs); j++)
-   *pstats++ += mvpp2_read_count(port,
- &mvpp2_ethtool_regs[j]);
-   }
+   pstats = port->ethtool_stats;
+   for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+   *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
 
/* No need to read again the counters right after this function if it
 * was called asynchronously by the user (ie. use of ethtool).
 */
-   cancel_delayed_work(&priv->stats_work);
-   queue_delayed_work(priv->stats_queue, &priv->stats_work,
+   cancel_delayed_work(&port->stats_work);
+   queue_delayed_work(port->priv->stats_queue, &port->stats_work,
   MVPP2_MIB_COUNTERS_STATS_DELAY);
 
-   mutex_unlock(&priv->gather_stats_lock);
+   mutex_unlock(&port->gather_stats_lock);
 }
 
 static void mvpp2_ethtool_get_stats(struct net_device *dev,
@@ -4928,13 +4923,15 @@ static void mvpp2_ethtool_get_stats(struct net_device 
*dev,
 {
struct mvpp2_port *port = netdev_priv(dev);
 
-   /* Update statistics for all ports, copy only those actually needed */
-   mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
+   /* Update statistics for the given port, then take the lock to avoid
+* concurrent accesses on the ethtool_stats structure during its copy.
+*/
+   mvpp2_gather_hw_statistics(&port->stats_work.work);
 
-   mutex_lock(&port->priv->gather_stats_lock);
+   mutex_lock(&port->gather_stats_lock);
memcpy(data, port->ethtool_stats,
   sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));
-   mutex_unlock(&port->priv->gather_stats_lock);
+   mutex_unlock(&port->gather_stats_lock);
 }
 
 static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
@@ -7085,7 +7082,7 @@ static int mvpp2_open(struct net_device *dev)
mvpp22_init_rss(port);
 
/* Start hardware statistics gathering */
-   queue_delayed_work(priv->stats_queue, &priv->stats_work,
+   queue_delayed_work(priv->stats_queue, &por

Re: kernel BUG at net/key/af_key.c:LINE!

2017-11-08 Thread Dmitry Vyukov

On Wed, Nov 8, 2017 at 8:47 AM, Herbert Xu  wrote:
> On Tue, Oct 24, 2017 at 05:10:06PM +0200, Dmitry Vyukov wrote:
>> On Tue, Oct 24, 2017 at 5:08 PM, syzbot
>> 
>> wrote:
>> > Hello,
>> >
>> > syzkaller hit the following crash on
>> > 02a2b05395dde2f49eb67b51a5fbc6606943
>> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
>> > compiler: gcc (GCC) 7.1.1 20170620
>> > .config is attached
>> > Raw console output is attached.
>> > C reproducer is attached
>> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>> > for information about syzkaller reproducers
>>
>> This also happened on more recent commits, including net-next
>> 833e0e2f24fd0525090878f71e129a8a4cb8bf78 (Oct 10) with similar
>> signature:
>
> Unfortunately I cannot reproduce the crash with your reproducer.
> Does it always crash for you?
>
>> [ cut here ]
>> kernel BUG at net/key/af_key.c:2068!
>> invalid opcode:  [#1] SMP KASAN
>> Dumping ftrace buffer:
>>(ftrace buffer empty)
>> Modules linked in:
>> CPU: 1 PID: 11011 Comm: syz-executor1 Not tainted 4.14.0-rc4+ #80
>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>> BIOS Google 01/01/2011
>> task: 8801d4ecc1c0 task.stack: 8801c13f8000
>> RIP: 0010:pfkey_xfrm_policy2msg+0x209c/0x22b0 net/key/af_key.c:2068
>
> This shows that you have a xfrm policy that has a bogus family
> field in your policy database.  But it gives no clue as to how
> it got there.

Just triggered it within a second.
Are you using the provided config?
Also the repro needs to be compiled with -m32 (but it does not compile
without it due to missing __NR_mmap2, so I guess you passed -m32).

Re: kernel BUG at net/key/af_key.c:LINE!

2017-11-08 Thread Dmitry Vyukov

On Wed, Nov 8, 2017 at 8:59 AM, Dmitry Vyukov  wrote:
> On Wed, Nov 8, 2017 at 8:47 AM, Herbert Xu  
> wrote:
>> On Tue, Oct 24, 2017 at 05:10:06PM +0200, Dmitry Vyukov wrote:
>>> On Tue, Oct 24, 2017 at 5:08 PM, syzbot
>>> 
>>> wrote:
>>> > Hello,
>>> >
>>> > syzkaller hit the following crash on
>>> > 02a2b05395dde2f49eb67b51a5fbc6606943
>>> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
>>> > compiler: gcc (GCC) 7.1.1 20170620
>>> > .config is attached
>>> > Raw console output is attached.
>>> > C reproducer is attached
>>> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>>> > for information about syzkaller reproducers
>>>
>>> This also happened on more recent commits, including net-next
>>> 833e0e2f24fd0525090878f71e129a8a4cb8bf78 (Oct 10) with similar
>>> signature:
>>
>> Unfortunately I cannot reproduce the crash with your reproducer.
>> Does it always crash for you?
>>
>>> [ cut here ]
>>> kernel BUG at net/key/af_key.c:2068!
>>> invalid opcode:  [#1] SMP KASAN
>>> Dumping ftrace buffer:
>>>(ftrace buffer empty)
>>> Modules linked in:
>>> CPU: 1 PID: 11011 Comm: syz-executor1 Not tainted 4.14.0-rc4+ #80
>>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>>> BIOS Google 01/01/2011
>>> task: 8801d4ecc1c0 task.stack: 8801c13f8000
>>> RIP: 0010:pfkey_xfrm_policy2msg+0x209c/0x22b0 net/key/af_key.c:2068
>>
>> This shows that you have a xfrm policy that has a bogus family
>> field in your policy database.  But it gives no clue as to how
>> it got there.
>
> Just triggered it within a second.
> Are you using the provided config?
> Also the repro needs to be compiled with -m32 (but it does not compile
> without it due to missing __NR_mmap2, so I guess you passed -m32).


That was on linux-next:

commit 8b82a8a7ab53ee1a065ac69c835737a701f46b2e (HEAD, tag:
next-20171107, linux-next/master)
Author: Stephen Rothwell
Date:   Tue Nov 7 16:18:10 2017 +1100
Add linux-next specific files for 20171107

Re: linux-next: build warning after merge of the netfilter-next tree

2017-11-08 Thread Stephen Rothwell

Hi Dave,

On Tue, 7 Nov 2017 11:02:48 +1100 Stephen Rothwell  
wrote:
>
> Hi all,
> 
> After merging the netfilter-next tree, today's linux-next build (powerpc
> ppc64_defconfig) produced this warning:
> 
> net/netfilter/nf_conntrack_netlink.c:536:15: warning: 'ctnetlink_proto_size' 
> defined but not used [-Wunused-function]
>  static size_t ctnetlink_proto_size(const struct nf_conn *ct)
>^
> 
> Introduced by commit
> 
>   5caaed151a68 ("netfilter: conntrack: don't cache nlattr_tuple_size result 
> in nla_size")

I assume that this warning will now be in the net-next tree ...

-- 
Cheers,
Stephen Rothwell

Re: regression: UFO removal breaks kvm live migration

2017-11-08 Thread Willem de Bruijn

On Wed, Nov 8, 2017 at 4:49 PM, Jason Wang  wrote:
>
>
> On 2017年11月08日 15:26, David Miller wrote:
>>
>> From: Willem de Bruijn 
>> Date: Wed, 8 Nov 2017 12:36:26 +0900
>>
>>> On Tue, Nov 7, 2017 at 5:02 PM, Michal Kubecek  wrote:

 I didn't have time to think it through yet but perhaps we could allow
 setting TUN_F_UFO and ignore its value.
>>>
>>> If the feature is enabled guests may try to send UFO packets, which
>>> the host is no longer able to fragment.
>>>
>>> virtio_net_hdr_to_skb will drop the packets immediately based on
>>> gso_type and tun_get_user will return EINVAL.
>>>
>>> Still, perhaps that's preferable as migration will succeed and most
>>> guests won't ever try to send those packets in the first place.
>>
>> However, this would create the situation where there is no way
>> to properly probe for the actual presence of UFO support.
>
>
> I think we should not have any assumption on how guest will use the feature.
> So I could not come a better than bring it back partially for TAP, looks
> like we only need segment them in tun_get_user().

Live migration essentially expects that features can never be removed [1],
as feature bits are not renegotiated after migration. In the short term we'll
have to work around that, but in the long term that does not seem practical.

There already exist interfaces to renegotiate guest/host state at runtime,
including for offloads [2][3]. For newer guests, we should support a trigger
from the host to renegotiate offloads.

That won't help in the short term. I'm still reading up to see if there are
any other options besides reimplement or advertise-but-drop, such as
an implicit trigger that would make the guest renegotiate. It's unlikely, but
worth a look..

[1] 
https://lists.linuxfoundation.org/pipermail/virtualization/2014-November/028126.html
[2] 
https://lists.linuxfoundation.org/pipermail/virtualization/2013-April/023818.html
[3] https://patchwork.kernel.org/patch/9850785/

Re: [PATCH v3] scripts: add leaking_addresses.pl

2017-11-08 Thread Peter Zijlstra

On Tue, Nov 07, 2017 at 05:44:13PM -0500, Steven Rostedt wrote:
> On Tue, 7 Nov 2017 13:44:01 -0800
> Linus Torvalds  wrote:
> 
> > > Looking other places that stand out, it seems like
> > > /proc/lockdep_chains and /proc/lockdep (CONFIG_LOCKDEP=y) has a ton of
> > > %p usage. It's unclear to me if a hash is sufficient for meaningful
> > > debugging there?  
> > 
> > Maybe not, but that is also _so_ esoteric that I suspect the right fix
> > is to just make it root-only readable.
> 
> Also note, I don't believe anyone should be running a LOCKDEP
> configured kernel in a production (secured) environment. As it adds
> quite a bit of overhead. It's something you run on test environments to
> make sure it doesn't detect any possible deadlocks.
> 
> > 
> > I've never used it, we should check with people who have. I get the
> > feeling that this is purely for PeterZ debugging.
> 
> I've used it. But then again, I also debug lockdep ;-)
> 
> > 
> > The very first commit that introduced that code actually has a
> > 
> > (FIXME: should go into debugfs)
> > 
> > so I suspect it never should have been user-readable to begin with. I
> > guess it makes some things easier, but it really is *very* different
> > from things like profiling.
> 
> Want me to whip up a patch to move the file?

Fine by me; create /debug/lockdep/ for the 3 files or something like
that.

As to the actual addresses, they can be used to double check things are
in fact the same object (in case of name collisions), are in static
memory (as these things ought to be) etc.. But mostly they're not too
important.

And yes, as everybody says, LOCKDEP is debug tool; you run this on your
(local) dev kernels, anything else it out of spec.

Re: [PATCH 04/31] nds32: Exception handling

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 
>  arch/nds32/mm/alignment.c   |  564 
> +++

> diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
> new file mode 100644
> index 000..05589e7
> --- /dev/null
> +++ b/arch/nds32/mm/alignment.c

> +static int mode = 0x3;
> +module_param(mode, int, S_IWUSR | S_IRUGO);

It's an interesting question how to best handle alignment faults, both in
kernel and user mode. While it can help for debugging to have the handler,
I'd argue that you are better off in the long run not fixing up the faults
automatically but to modify the code that triggers them instead.

How about making the faults disabled by default?

> +static int _do_unaligned_access(unsigned long entry, unsigned long addr,
> +   unsigned long type, struct pt_regs *regs)
> +{
> +   unsigned long inst;
> +   int ret = -EFAULT;
> +
> +   if (user_mode(regs)) {
> +   /* user mode */
> +   if (!va_present(current->mm, addr))
> +   return ret;
> +   } else {
> +   /* kernel mode */
> +   if (!va_kernel_present(addr))
> +   return ret;
> +   }

This looks racy, the address might be present when you get here, but not
later when you actually access it. I think what you need here is something
like ARM does with get32_unaligned_check() etc and their fixup tables.

> +   inst = get_inst(regs->ipc);
> +
> +   DEBUG(mode & 0x04, 1,
> + "Faulting Addr: 0x%08lx, PC: 0x%08lx [ 0x%08lx ]\n", addr,
> + regs->ipc, inst);
> +
> +   if ((user_mode(regs) && (mode & 0x01))
> +   || (!user_mode(regs) && (mode & 0x02))) {
> +
> +   mm_segment_t seg = get_fs();
> +
> +   set_fs(KERNEL_DS);
> +
> +   if (inst & 0x8000)
> +   ret = do_16((inst >> 16) & 0x, regs);
> +   else
> +   ret = do_32(inst, regs);
> +
> +   set_fs(seg);
> +   }
> +
> +   return ret;
> +}

Doesn't this allow user space to read all of kernel memory simply by
passing unaligned addresses?

> +static const struct file_operations fops = {
> +   .open = simple_open,
> +   .read = proc_alignment_read,
> +   .write = proc_alignment_write,
> +};

This should really be a sysctl rather than an open-coded procfs file,
for consistency with other architectures.

Please have a look at that interface on other architectures and pick
whatever the majority do.

 Arnd

Re: regression: UFO removal breaks kvm live migration

2017-11-08 Thread Jason Wang




On 2017年11月08日 17:08, Willem de Bruijn wrote:

On Wed, Nov 8, 2017 at 4:49 PM, Jason Wang  wrote:


On 2017年11月08日 15:26, David Miller wrote:

From: Willem de Bruijn 
Date: Wed, 8 Nov 2017 12:36:26 +0900


On Tue, Nov 7, 2017 at 5:02 PM, Michal Kubecek  wrote:

I didn't have time to think it through yet but perhaps we could allow
setting TUN_F_UFO and ignore its value.

If the feature is enabled guests may try to send UFO packets, which
the host is no longer able to fragment.

virtio_net_hdr_to_skb will drop the packets immediately based on
gso_type and tun_get_user will return EINVAL.

Still, perhaps that's preferable as migration will succeed and most
guests won't ever try to send those packets in the first place.

However, this would create the situation where there is no way
to properly probe for the actual presence of UFO support.


I think we should not have any assumption on how guest will use the feature.
So I could not come a better than bring it back partially for TAP, looks
like we only need segment them in tun_get_user().

Live migration essentially expects that features can never be removed [1],
as feature bits are not renegotiated after migration. In the short term we'll
have to work around that, but in the long term that does not seem practical.

There already exist interfaces to renegotiate guest/host state at runtime,
including for offloads [2][3]. For newer guests, we should support a trigger
from the host to renegotiate offloads.


I could not think of a real use case for this other than trying to 
workaround a bug.




That won't help in the short term. I'm still reading up to see if there are
any other options besides reimplement or advertise-but-drop, such as
an implicit trigger that would make the guest renegotiate. It's unlikely, but
worth a look..


Yes, this looks hard. And even if we can manage to do this, it looks an 
overkill since it will impact all guest after migration.


Thanks



[1] 
https://lists.linuxfoundation.org/pipermail/virtualization/2014-November/028126.html
[2] 
https://lists.linuxfoundation.org/pipermail/virtualization/2013-April/023818.html
[3] https://patchwork.kernel.org/patch/9850785/

Re: kernel BUG at net/key/af_key.c:LINE!

2017-11-08 Thread Herbert Xu

On Tue, Oct 24, 2017 at 05:10:06PM +0200, Dmitry Vyukov wrote:
> On Tue, Oct 24, 2017 at 5:08 PM, syzbot
> 
> wrote:
> > Hello,
> >
> > syzkaller hit the following crash on
> > 02a2b05395dde2f49eb67b51a5fbc6606943
> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
> > compiler: gcc (GCC) 7.1.1 20170620
> > .config is attached
> > Raw console output is attached.
> > C reproducer is attached
> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
> > for information about syzkaller reproducers
> 
> This also happened on more recent commits, including net-next
> 833e0e2f24fd0525090878f71e129a8a4cb8bf78 (Oct 10) with similar
> signature:

Unfortunately I cannot reproduce the crash with your reproducer.
Does it always crash for you?

> [ cut here ]
> kernel BUG at net/key/af_key.c:2068!
> invalid opcode:  [#1] SMP KASAN
> Dumping ftrace buffer:
>(ftrace buffer empty)
> Modules linked in:
> CPU: 1 PID: 11011 Comm: syz-executor1 Not tainted 4.14.0-rc4+ #80
> Hardware name: Google Google Compute Engine/Google Compute Engine,
> BIOS Google 01/01/2011
> task: 8801d4ecc1c0 task.stack: 8801c13f8000
> RIP: 0010:pfkey_xfrm_policy2msg+0x209c/0x22b0 net/key/af_key.c:2068

This shows that you have a xfrm policy that has a bogus family
field in your policy database.  But it gives no clue as to how
it got there.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Re: [PATCH 00/31] Andes(nds32) Linux Kernel Port

2017-11-08 Thread David Howells

Greentime Hu  wrote:

> The build script and toolchain repositories are able to be found here:
>   https://github.com/andestech/build_script.git

Is arch support in upstream binutils and gcc?

David

[PATCHv2 1/1] bnx2x: fix slowpath null crash

2017-11-08 Thread Zhu Yanjun

When "NETDEV WATCHDOG: em4 (bnx2x): transmit queue 2 timed out" occurs,
BNX2X_SP_RTNL_TX_TIMEOUT is set. In the function bnx2x_sp_rtnl_task,
bnx2x_nic_unload and bnx2x_nic_load are executed to shutdown and open
NIC. In the function bnx2x_nic_load, bnx2x_alloc_mem allocates dma
failure. The message "bnx2x: [bnx2x_alloc_mem:8399(em4)]Can't
allocate memory" pops out. The variable slowpath is set to NULL.
When shutdown the NIC, the function bnx2x_nic_unload is called. In
the function bnx2x_nic_unload, the following functions are executed.
bnx2x_chip_cleanup
bnx2x_set_storm_rx_mode
bnx2x_set_q_rx_mode
bnx2x_set_q_rx_mode
bnx2x_config_rx_mode
bnx2x_set_rx_mode_e2
In the function bnx2x_set_rx_mode_e2, the variable slowpath is operated.
Then the crash occurs.
To fix this crash, the variable slowpath is checked. And in the function
bnx2x_sp_rtnl_task, after dma memory allocation fails, another shutdown
and open NIC is executed.

CC: Joe Jin 
CC: Junxiao Bi 
Signed-off-by: Zhu Yanjun 
Acked-by: Ariel Elior 
---
V1->v2
Changes: add Acker and remove unnecessary brackets
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 
b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c12b4d3..fbd302a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9332,7 +9332,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int 
unload_mode, bool keep_link)
/* Schedule the rx_mode command */
if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state))
set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state);
-   else
+   else if (bp->slowpath)
bnx2x_set_storm_rx_mode(bp);
 
/* Cleanup multicast configuration */
@@ -10271,8 +10271,15 @@ static void bnx2x_sp_rtnl_task(struct work_struct 
*work)
smp_mb();
 
bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
-   bnx2x_nic_load(bp, LOAD_NORMAL);
-
+   /*When ret value shows failure of allocation failure,
+*the nic is rebooted again. If open still fails, a error
+*message to notify the user.
+*/
+   if (bnx2x_nic_load(bp, LOAD_NORMAL) == -ENOMEM) {
+   bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
+   if (bnx2x_nic_load(bp, LOAD_NORMAL))
+   BNX2X_ERR("Open the NIC fails again!\n");
+   }
rtnl_unlock();
return;
}
-- 
2.7.4

Re: [PATCH 05/31] nds32: MMU definitions

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:

> +/*
> + * The DMA mask corresponding to the maximum bus address allocatable
> + * using GFP_DMA.  The default here places no restriction on DMA
> + * allocations.  This must be the smallest DMA mask in the system,
> + * so a successful GFP_DMA allocation will always satisfy this.
> + */
> +#ifndef ISA_DMA_THRESHOLD
> +#define ISA_DMA_THRESHOLD  (0xULL)
> +#endif

I see this one only in powerpc, I think it can be removed.

> +
> +/*
> + * Optional device DMA address remapping. Do _not_ use directly!
> + * We should really eliminate virt_to_bus() here - it's deprecated.
> + */
> +#define page_to_dma(dev, page) ((dma_addr_t)__virt_to_phys((unsigned 
> long)page_address(page)))
> +#define dma_to_virt(dev, addr) ((void *)__phys_to_virt(addr))
> +#define virt_to_dma(dev, addr) ((dma_addr_t)__virt_to_phys((unsigned 
> long)(addr)))

This looks like an older comment, I think we have eliminated them from the
mainline kernel in all drivers, so you should remove them here too.

> diff --git a/arch/nds32/include/asm/shmparam.h 
> b/arch/nds32/include/asm/shmparam.h
> new file mode 100644
> index 000..5679648
> --- /dev/null
> +++ b/arch/nds32/include/asm/shmparam.h

> +
> +/*
> + * This should be the size of the virtually indexed cache/ways,
> + * whichever is greater since the cache aliases every size/ways
> + * bytes.
> + */
> +#defineSHMLBA  (4 * PAGE_SIZE) /* attach addr a multiple of this */
> +#defineREALSHMLBA  SHMLBA

I don't see REALSHMLBA anywhere in the kernel, do you need it?

For SHMLBA, I think it should be defined as an absolute number, using
the maximum
that you might need for any possible value of PAGE_SIZE. Otherwise user space
has a much harder time figuring out what it should use.

Arnd

Re: [PATCH 1/2] net: macb: add of_phy_deregister_fixed_link to error paths

2017-11-08 Thread Michael Grzeschik

On Wed, Nov 08, 2017 at 01:22:57PM +0900, David Miller wrote:
> From: Michael Grzeschik 
> Date: Mon,  6 Nov 2017 12:10:04 +0100
> 
> > We add the call of_phy_deregister_fixed_link to all associated
> > error paths for memory clean up.
> > 
> > Signed-off-by: Michael Grzeschik 
> > ---
> >  drivers/net/ethernet/cadence/macb_main.c | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/drivers/net/ethernet/cadence/macb_main.c 
> > b/drivers/net/ethernet/cadence/macb_main.c
> > index 6df2cad61647a..2c2acd011329a 100644
> > --- a/drivers/net/ethernet/cadence/macb_main.c
> > +++ b/drivers/net/ethernet/cadence/macb_main.c
> > @@ -611,6 +611,8 @@ static int macb_mii_init(struct macb *bp)
> >  err_out_unregister_bus:
> > mdiobus_unregister(bp->mii_bus);
> >  err_out_free_mdiobus:
> > +   if ((np) && (of_phy_is_fixed_link(np)))
> 
> Please don't use so many parenthesis in your conditionals:
> 
>   if (np && of_phy_is_fixed_link(np))
> 
> is more than sufficient.
> 
> Please fix this in your entire set of patches.

There are only two patches and not even in one series.
I will resend them both with this one fixed and create
a series. As the second one depends on this one.

Thanks,
Michael

-- 
Pengutronix e.K.   | |
Industrial Linux Solutions | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature

Re: [PATCH 00/31] Andes(nds32) Linux Kernel Port

2017-11-08 Thread Greentime Hu

2017-11-08 16:32 GMT+08:00 David Howells :
> Greentime Hu  wrote:
>
>> The build script and toolchain repositories are able to be found here:
>>   https://github.com/andestech/build_script.git
>
> Is arch support in upstream binutils and gcc?

Yes, it is but only supporting elf-toolchain now. We will do the
upstream for Linux-toolchain in the next step.

Re: [PATCH 08/31] nds32: Cache and TLB routines

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:

> +#ifndef __NDS32_PROCFNS_H__
> +#define __NDS32_PROCFNS_H__
> +
> +#define CPU_NAME n13
> +
> +#ifdef __KERNEL__
> +
> +#ifdef __STDC__
> +#define cpu_fn(name,fn)   name##fn
> +#else
> +#define cpu_fn(name,fn)   name/**/fn
> +#endif
> +#define __cpu_fn(name,fn) cpu_fn(name,fn)
> +
> +#define cpu_proc_init  __cpu_fn( CPU_NAME, _proc_init)
> +#define cpu_proc_fin   __cpu_fn( CPU_NAME, _proc_fin)
> +#define cpu_do_idle__cpu_fn( CPU_NAME, _do_idle)
> +#define cpu_reset  __cpu_fn( CPU_NAME, _reset)
> +#define cpu_switch_mm  __cpu_fn( CPU_NAME, _switch_mm)

I see you have copied this from ARM. Do you actually need the same complexity,
with the ability to build either optimal code for a particular CPU or
a multi-CPU
version?

Most other architectures seem to have settled for doing just one of the two
models. How many CPU implementations to you expect to support that
differ in all of those functions?

  Arnd

Re: [PATCH 05/31] nds32: MMU definitions

2017-11-08 Thread Greentime Hu

2017-11-08 16:36 GMT+08:00 Arnd Bergmann :
> On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
>
>> +/*
>> + * The DMA mask corresponding to the maximum bus address allocatable
>> + * using GFP_DMA.  The default here places no restriction on DMA
>> + * allocations.  This must be the smallest DMA mask in the system,
>> + * so a successful GFP_DMA allocation will always satisfy this.
>> + */
>> +#ifndef ISA_DMA_THRESHOLD
>> +#define ISA_DMA_THRESHOLD  (0xULL)
>> +#endif
>
> I see this one only in powerpc, I think it can be removed.

Thanks. I will remove it in next version patch.

>
>> +
>> +/*
>> + * Optional device DMA address remapping. Do _not_ use directly!
>> + * We should really eliminate virt_to_bus() here - it's deprecated.
>> + */
>> +#define page_to_dma(dev, page) 
>> ((dma_addr_t)__virt_to_phys((unsigned long)page_address(page)))
>> +#define dma_to_virt(dev, addr) ((void *)__phys_to_virt(addr))
>> +#define virt_to_dma(dev, addr) 
>> ((dma_addr_t)__virt_to_phys((unsigned long)(addr)))
>
> This looks like an older comment, I think we have eliminated them from the
> mainline kernel in all drivers, so you should remove them here too.

Thanks. I will remove it in next version patch.

>> diff --git a/arch/nds32/include/asm/shmparam.h 
>> b/arch/nds32/include/asm/shmparam.h
>> new file mode 100644
>> index 000..5679648
>> --- /dev/null
>> +++ b/arch/nds32/include/asm/shmparam.h
>
>> +
>> +/*
>> + * This should be the size of the virtually indexed cache/ways,
>> + * whichever is greater since the cache aliases every size/ways
>> + * bytes.
>> + */
>> +#defineSHMLBA  (4 * PAGE_SIZE) /* attach addr a multiple of this */
>> +#defineREALSHMLBA  SHMLBA
>
> I don't see REALSHMLBA anywhere in the kernel, do you need it?
>
> For SHMLBA, I think it should be defined as an absolute number, using
> the maximum
> that you might need for any possible value of PAGE_SIZE. Otherwise user space
> has a much harder time figuring out what it should use.

Thanks. I will remove REALSHMLBA. I will consider to use 8KB or larger
page size in the next version patch.

Re: [PATCH 10/31] nds32: IRQ handling

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:

> +
> +#ifdef CONFIG_TRACE_IRQFLAGS
> +void notrace arch_trace_hardirqs_on(void)
> +{
> +   trace_hardirqs_on();
> +}
> +
> +void notrace arch_trace_hardirqs_off(void)
> +{
> +   trace_hardirqs_off();
> +}
> +#endif

I don't see those wrappers on other architectures, what do you need them for?

   Arnd

Re: [PATCH 11/31] nds32: Atomic operations

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 
> ---
>  arch/nds32/include/asm/futex.h|  116 
>  arch/nds32/include/asm/spinlock.h |  178 
> +
>  2 files changed, 294 insertions(+)
>  create mode 100644 arch/nds32/include/asm/futex.h
>  create mode 100644 arch/nds32/include/asm/spinlock.h


> diff --git a/arch/nds32/include/asm/spinlock.h 
> b/arch/nds32/include/asm/spinlock.h
> new file mode 100644
> index 000..dd5fc71
> --- /dev/null
> +++ b/arch/nds32/include/asm/spinlock.h
> @@ -0,0 +1,178 @@
> +
> +#define arch_spin_unlock_wait(lock) \
> +   do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)

This was removed from the other architectures in commit
952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific definitions")

Please remove this as well.

Palmer, I see riscv has the same thing, please also add a patch to your
tree to remove it.

> +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
> +
> +static inline void arch_spin_lock(arch_spinlock_t * lock)
> +{
> +   unsigned long tmp;
> +
> +   __asm__ __volatile__("1:\n"
> +"\tllw\t%0, [%1]\n"
> +"\tbnez\t%0, 1b\n"
> +"\tmovi\t%0, #0x1\n"
> +"\tscw\t%0, [%1]\n"
> +"\tbeqz\t%0, 1b\n"
> +:"=&r"(tmp)
> +:"r"(&lock->lock)
> +:"memory");

The coding style seems inconsistent here, the other inline asm uses real tabs
instead of \t, and 'asm volatile' is generally preferred over '__asm__
__volatile__'.

   Arnd

Re: [PATCH net-next 2/4] net: phy: sfp: Use correct endian for sfp->id.ext.options

2017-11-08 Thread Russell King - ARM Linux

On Tue, Nov 07, 2017 at 07:49:09PM -0800, Florian Fainelli wrote:
> The extended ID options 16-bit value is big-endian (and actually annotated as
> such), but we would be accessing it with our CPU endian, which would not
> allow the correct detection of whether the LOS signal is inverted or not.
> 
> Fixes: 73970055450e ("sfp: add SFP module support")
> Signed-off-by: Florian Fainelli 
> ---
>  drivers/net/phy/sfp.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
> index 942288aa9cdb..dfb28b269687 100644
> --- a/drivers/net/phy/sfp.c
> +++ b/drivers/net/phy/sfp.c
> @@ -355,7 +355,7 @@ static void sfp_sm_link_check_los(struct sfp *sfp)
>* SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
>* the same as SFP_OPTIONS_LOS_NORMAL set.
>*/
> - if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED)
> + if (be16_to_cpu(sfp->id.ext.options) & SFP_OPTIONS_LOS_INVERTED)

It would be more efficient to convert the constants to BE16 rather
than an indeterminant number to CPU endian.  The compiler can optimise
the constant.  Same for the other two hunks.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

Re: [PATCH net-next 3/4] net: phy: sfp: Separate enumerations and states

2017-11-08 Thread Russell King - ARM Linux

On Tue, Nov 07, 2017 at 07:49:10PM -0800, Florian Fainelli wrote:
> Create separate enumerations for the SFP physical state (computed from GPIOs),
> device state, module state, and actual state machine. This will make it easier
> to make sure the correct states are used, and also pretty print those to help
> debugging.

The compiler does no type checking of these, so I don't see how it
makes it any "easier to make sure the correct states are used".

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

[PATCH v2 2/2] net: macb: add of_node_put to error paths

2017-11-08 Thread Michael Grzeschik

We add the call of_node_put(bp->phy_node) to all associated error
paths for memory clean up.

Signed-off-by: Michael Grzeschik 
---
v2: removed extra of_node_put from macb_remove

 drivers/net/ethernet/cadence/macb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c 
b/drivers/net/ethernet/cadence/macb_main.c
index b7846d6e9234e..0f24ca5a24b53 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -611,6 +611,7 @@ static int macb_mii_init(struct macb *bp)
 err_out_unregister_bus:
mdiobus_unregister(bp->mii_bus);
 err_out_free_mdiobus:
+   of_node_put(bp->phy_node);
if (np && of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
mdiobus_free(bp->mii_bus);
@@ -3554,6 +3555,7 @@ static int macb_probe(struct platform_device *pdev)
 err_out_unregister_mdio:
phy_disconnect(dev->phydev);
mdiobus_unregister(bp->mii_bus);
+   of_node_put(bp->phy_node);
if (np && of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
mdiobus_free(bp->mii_bus);
-- 
2.11.0

[PATCH v2 1/2] net: macb: add of_phy_deregister_fixed_link to error paths

2017-11-08 Thread Michael Grzeschik

We add the call of_phy_deregister_fixed_link to all associated
error paths for memory clean up.

Signed-off-by: Michael Grzeschik 
---
v2: removed extra parenthesis

drivers/net/ethernet/cadence/macb_main.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c 
b/drivers/net/ethernet/cadence/macb_main.c
index 6df2cad61647a..b7846d6e9234e 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -611,6 +611,8 @@ static int macb_mii_init(struct macb *bp)
 err_out_unregister_bus:
mdiobus_unregister(bp->mii_bus);
 err_out_free_mdiobus:
+   if (np && of_phy_is_fixed_link(np))
+   of_phy_deregister_fixed_link(np);
mdiobus_free(bp->mii_bus);
 err_out:
return err;
@@ -3552,6 +3554,8 @@ static int macb_probe(struct platform_device *pdev)
 err_out_unregister_mdio:
phy_disconnect(dev->phydev);
mdiobus_unregister(bp->mii_bus);
+   if (np && of_phy_is_fixed_link(np))
+   of_phy_deregister_fixed_link(np);
mdiobus_free(bp->mii_bus);
 
/* Shutdown the PHY if there is a GPIO reset */
@@ -3574,6 +3578,7 @@ static int macb_remove(struct platform_device *pdev)
 {
struct net_device *dev;
struct macb *bp;
+   struct device_node *np = pdev->dev.of_node;
 
dev = platform_get_drvdata(pdev);
 
@@ -3582,6 +3587,8 @@ static int macb_remove(struct platform_device *pdev)
if (dev->phydev)
phy_disconnect(dev->phydev);
mdiobus_unregister(bp->mii_bus);
+   if (np && of_phy_is_fixed_link(np))
+   of_phy_deregister_fixed_link(np);
dev->phydev = NULL;
mdiobus_free(bp->mii_bus);
 
-- 
2.11.0

[PATCH v2 0/2] net: macb: add error handling on probe and

2017-11-08 Thread Michael Grzeschik

This series adds more error handling to the macb driver.

Michael Grzeschik (2):
  net: macb: add of_phy_deregister_fixed_link to error paths
  net: macb: add of_node_put to error paths

 drivers/net/ethernet/cadence/macb_main.c | 9 +
 1 file changed, 9 insertions(+)

-- 
2.11.0

[net-next v2 1/1] tipc: improve link resiliency when rps is activated

2017-11-08 Thread Jon Maloy

Currently, the TIPC RPS dissector is based only on the incoming packets'
source node address, hence steering all traffic from a node to the same
core. We have seen that this makes the links vulnerable to starvation
and unnecessary resets when we turn down the link tolerance to very low
values.

To reduce the risk of this happening, we exempt probe and probe replies
packets from the convergence to one core per source node. Instead, we do
the opposite, - we try to diverge those packets across as many cores as
possible, by randomizing the flow selector key.

To make such packets identifiable to the dissector, we add a new
'is_keepalive' bit to word 0 of the LINK_PROTOCOL header. This bit is
set both for PROBE and PROBE_REPLY messages, and only for those.

It should be noted that these packets are not part of any flow anyway,
and only constitute a minuscule fraction of all packets sent across a
link. Hence, there is no risk that this will affect overall performance.

Acked-by: Ying Xue 
Signed-off-by: Jon Maloy 

---
v2: - Returning random key directly, as per feedback from David  Miller
- Renamed the 'is_monitor' bit to 'is_keepalive'
---
 include/net/flow_dissector.h | 12 -
 include/net/tipc.h   | 62 
 net/core/flow_dissector.c| 30 ++---
 net/tipc/link.c  | 26 +++
 net/tipc/msg.h   | 10 +++
 5 files changed, 108 insertions(+), 32 deletions(-)
 create mode 100644 include/net/tipc.h

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 22aba32..9a07477 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -84,11 +84,11 @@ struct flow_dissector_key_ipv6_addrs {
 };
 
 /**
- * struct flow_dissector_key_tipc_addrs:
- * @srcnode: source node address
+ * struct flow_dissector_key_tipc:
+ * @key: source node address combined with selector
  */
-struct flow_dissector_key_tipc_addrs {
-   __be32 srcnode;
+struct flow_dissector_key_tipc {
+   __be32 key;
 };
 
 /**
@@ -100,7 +100,7 @@ struct flow_dissector_key_addrs {
union {
struct flow_dissector_key_ipv4_addrs v4addrs;
struct flow_dissector_key_ipv6_addrs v6addrs;
-   struct flow_dissector_key_tipc_addrs tipcaddrs;
+   struct flow_dissector_key_tipc tipckey;
};
 };
 
@@ -192,7 +192,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
-   FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs 
*/
+   FLOW_DISSECTOR_KEY_TIPC, /* struct flow_dissector_key_tipc */
FLOW_DISSECTOR_KEY_ARP, /* struct flow_dissector_key_arp */
FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */
FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
diff --git a/include/net/tipc.h b/include/net/tipc.h
new file mode 100644
index 000..07670ec
--- /dev/null
+++ b/include/net/tipc.h
@@ -0,0 +1,62 @@
+/*
+ * include/net/tipc.h: Include file for TIPC message header routines
+ *
+ * Copyright (c) 2017 Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWAR

Re: [PATCH 08/31] nds32: Cache and TLB routines

2017-11-08 Thread Greentime Hu

2017-11-08 16:45 GMT+08:00 Arnd Bergmann :
> On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
>
>> +#ifndef __NDS32_PROCFNS_H__
>> +#define __NDS32_PROCFNS_H__
>> +
>> +#define CPU_NAME n13
>> +
>> +#ifdef __KERNEL__
>> +
>> +#ifdef __STDC__
>> +#define cpu_fn(name,fn)   name##fn
>> +#else
>> +#define cpu_fn(name,fn)   name/**/fn
>> +#endif
>> +#define __cpu_fn(name,fn) cpu_fn(name,fn)
>> +
>> +#define cpu_proc_init  __cpu_fn( CPU_NAME, _proc_init)
>> +#define cpu_proc_fin   __cpu_fn( CPU_NAME, _proc_fin)
>> +#define cpu_do_idle__cpu_fn( CPU_NAME, _do_idle)
>> +#define cpu_reset  __cpu_fn( CPU_NAME, _reset)
>> +#define cpu_switch_mm  __cpu_fn( CPU_NAME, _switch_mm)
>
> I see you have copied this from ARM. Do you actually need the same complexity,
> with the ability to build either optimal code for a particular CPU or
> a multi-CPU
> version?
>
> Most other architectures seem to have settled for doing just one of the two
> models. How many CPU implementations to you expect to support that
> differ in all of those functions?
>

I think we can simplify the implementations because we may not have that
many implementations. I will refine it in the next version patch.

Re: [PATCH 12/31] nds32: Device specific operations

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:

> +
> +#define ioremap(cookie,size)   __ioremap(cookie,size,0,1)
> +#define ioremap_nocache(cookie,size)   __ioremap(cookie,size,0,1)
> +#define iounmap(cookie)__iounmap(cookie)

> +#include 

asm-generic/io.h now provides an ioremap_nocache() helper along with
ioremap_uc/ioremap_wc/ioremap_wt, so I think you can remove the
ioremap_nocache definition here. You might also be able to remove
__ioremap and __iounmap, and only provide ioremap/iounmap, plus
the identity macro 'define ioremap ioremap'

> +void __iomem *__ioremap(unsigned long phys_addr, size_t size,
> +   unsigned long flags, unsigned long align)

The 'align' argument is unused here, and not used on other architectures
either.

> +{
> +   struct vm_struct *area;
> +   unsigned long addr, offset, last_addr;
> +   pgprot_t prot;
> +
> +   /* Don't allow wraparound or zero size */
> +   last_addr = phys_addr + size - 1;
> +   if (!size || last_addr < phys_addr)
> +   return NULL;
> +
> +   /*
> +* Mappings have to be page-aligned
> +*/
> +   offset = phys_addr & ~PAGE_MASK;
> +   phys_addr &= PAGE_MASK;
> +   size = PAGE_ALIGN(last_addr + 1) - phys_addr;
> +
> +   /*
> +* Ok, go for it..
> +*/
> +   area = get_vm_area(size, VM_IOREMAP);

Better use get_vm_area_caller here to have the ioremap areas show up
in a more useful form in /proc/vmallocinfo

Please also have a look at what you can do for memremap().

Since you have no cacheable version of ioremap_wb/wt, it will
return an uncached mapping all the time, which is not ideal.

 Arnd

Re: [PATCH 10/31] nds32: IRQ handling

2017-11-08 Thread Greentime Hu

2017-11-08 16:49 GMT+08:00 Arnd Bergmann :
> On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
>
>> +
>> +#ifdef CONFIG_TRACE_IRQFLAGS
>> +void notrace arch_trace_hardirqs_on(void)
>> +{
>> +   trace_hardirqs_on();
>> +}
>> +
>> +void notrace arch_trace_hardirqs_off(void)
>> +{
>> +   trace_hardirqs_off();
>> +}
>> +#endif
>
> I don't see those wrappers on other architectures, what do you need them for?

Thanks.

I will remove them in the next version patch.

Re: [PATCH 13/31] nds32: DMA mapping API

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:

> +static void consistent_sync(void *vaddr, size_t size, int direction)
> +{
> +   unsigned long start = (unsigned long)vaddr;
> +   unsigned long end = start + size;
> +
> +   switch (direction) {
> +   case DMA_FROM_DEVICE:   /* invalidate only */
> +   cpu_dma_inval_range(start, end);
> +   break;
> +   case DMA_TO_DEVICE: /* writeback only */
> +   cpu_dma_wb_range(start, end);
> +   break;
> +   case DMA_BIDIRECTIONAL: /* writeback and invalidate */
> +   cpu_dma_wbinval_range(start, end);
> +   break;
> +   default:
> +   BUG();
> +   }
> +}

> +
> +static void
> +nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
> + size_t size, enum dma_data_direction dir)
> +{
> +   consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
> +}
> +
> +static void
> +nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
> +size_t size, enum dma_data_direction dir)
> +{
> +   consistent_sync((void *)dma_to_virt(dev, handle), size, dir);
> +}

You do the same cache operations for _to_cpu and _to_device, which
usually works,
but is more expensive than you need. It's better to take the ownership into
account and only do what you need.

 Arnd

[PATCH] 6lowpan: Combine two condition checks into one statement in lowpan_xmit()

2017-11-08 Thread SF Markus Elfring

From: Markus Elfring 
Date: Wed, 8 Nov 2017 10:10:41 +0100

The same exception handling was used in an if branch of two
separate statements.

* Merge their condition checks into a single statement instead.

* Delete the local variable "ret" which became unnecessary
  with this refactoring.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
 net/ieee802154/6lowpan/tx.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index e6ff5128e61a..1b0757db2078 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -255,7 +255,7 @@ static int lowpan_header(struct sk_buff *skb, struct 
net_device *ldev,
 netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
 {
struct ieee802154_hdr wpan_hdr;
-   int max_single, ret;
+   int max_single;
u16 dgram_size, dgram_offset;
 
pr_debug("package xmit\n");
@@ -269,13 +269,8 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct 
net_device *ldev)
if (!skb)
return NET_XMIT_DROP;
 
-   ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset);
-   if (ret < 0) {
-   kfree_skb(skb);
-   return NET_XMIT_DROP;
-   }
-
-   if (ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
+   if (lowpan_header(skb, ldev, &dgram_size, &dgram_offset) < 0 ||
+   ieee802154_hdr_peek(skb, &wpan_hdr) < 0) {
kfree_skb(skb);
return NET_XMIT_DROP;
}
-- 
2.15.0

Re: [lldp-devel] Fwd: [PATCH RESEND] Fix segfault on "lldptool -t -i eth2 -V PFC -c enabled"

2017-11-08 Thread Sowmini Varadhan

On (11/08/17 08:02), Hannes Reinecke wrote:
> We tried to get the list maintainership moved to over to us by the time
> Intel decided to drop FCoE. But for some reasons this never worked out,
> so while the list is technically alive we don't have any means of
> managing it.
> Maybe we should be moving it to somewhere else.

I'd support that.. any thoughts on where to move it to?

> But yes, we _do_ take fixes for it.

I just tried setting up a git pull-request for this to Valentin's repo,
I'm not sure if I followed procedures correctly (sending text patches to 
a list comes more naturally to me, and I may have fat-fingered something)

To whom should I send this patch? 

We also have a couple of other patches in the pipeline that we are
testing, so setting up a mailing list would be welcomed!

--Sowmini

Re: [PATCH v2 1/2] net: macb: add of_phy_deregister_fixed_link to error paths

2017-11-08 Thread Nicolas Ferre

On 08/11/2017 at 09:56, Michael Grzeschik wrote:
> We add the call of_phy_deregister_fixed_link to all associated
> error paths for memory clean up.
> 
> Signed-off-by: Michael Grzeschik 
Acked-by: Nicolas Ferre 
> ---
> v2: removed extra parenthesis
> 
> drivers/net/ethernet/cadence/macb_main.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/net/ethernet/cadence/macb_main.c 
> b/drivers/net/ethernet/cadence/macb_main.c
> index 6df2cad61647a..b7846d6e9234e 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -611,6 +611,8 @@ static int macb_mii_init(struct macb *bp)
>  err_out_unregister_bus:
>   mdiobus_unregister(bp->mii_bus);
>  err_out_free_mdiobus:
> + if (np && of_phy_is_fixed_link(np))
> + of_phy_deregister_fixed_link(np);
>   mdiobus_free(bp->mii_bus);
>  err_out:
>   return err;
> @@ -3552,6 +3554,8 @@ static int macb_probe(struct platform_device *pdev)
>  err_out_unregister_mdio:
>   phy_disconnect(dev->phydev);
>   mdiobus_unregister(bp->mii_bus);
> + if (np && of_phy_is_fixed_link(np))
> + of_phy_deregister_fixed_link(np);
>   mdiobus_free(bp->mii_bus);
>  
>   /* Shutdown the PHY if there is a GPIO reset */
> @@ -3574,6 +3578,7 @@ static int macb_remove(struct platform_device *pdev)
>  {
>   struct net_device *dev;
>   struct macb *bp;
> + struct device_node *np = pdev->dev.of_node;
>  
>   dev = platform_get_drvdata(pdev);
>  
> @@ -3582,6 +3587,8 @@ static int macb_remove(struct platform_device *pdev)
>   if (dev->phydev)
>   phy_disconnect(dev->phydev);
>   mdiobus_unregister(bp->mii_bus);
> + if (np && of_phy_is_fixed_link(np))
> + of_phy_deregister_fixed_link(np);
>   dev->phydev = NULL;
>   mdiobus_free(bp->mii_bus);
>  
> 


-- 
Nicolas Ferre

Re: [PATCH v2 2/2] net: macb: add of_node_put to error paths

2017-11-08 Thread Nicolas Ferre

On 08/11/2017 at 09:56, Michael Grzeschik wrote:
> We add the call of_node_put(bp->phy_node) to all associated error
> paths for memory clean up.
> 
> Signed-off-by: Michael Grzeschik 
Acked-by: Nicolas Ferre 
> ---
> v2: removed extra of_node_put from macb_remove
> 
>  drivers/net/ethernet/cadence/macb_main.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/net/ethernet/cadence/macb_main.c 
> b/drivers/net/ethernet/cadence/macb_main.c
> index b7846d6e9234e..0f24ca5a24b53 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -611,6 +611,7 @@ static int macb_mii_init(struct macb *bp)
>  err_out_unregister_bus:
>   mdiobus_unregister(bp->mii_bus);
>  err_out_free_mdiobus:
> + of_node_put(bp->phy_node);
>   if (np && of_phy_is_fixed_link(np))
>   of_phy_deregister_fixed_link(np);
>   mdiobus_free(bp->mii_bus);
> @@ -3554,6 +3555,7 @@ static int macb_probe(struct platform_device *pdev)
>  err_out_unregister_mdio:
>   phy_disconnect(dev->phydev);
>   mdiobus_unregister(bp->mii_bus);
> + of_node_put(bp->phy_node);
>   if (np && of_phy_is_fixed_link(np))
>   of_phy_deregister_fixed_link(np);
>   mdiobus_free(bp->mii_bus);
> 


-- 
Nicolas Ferre

Re: [lldp-devel] Fwd: [PATCH RESEND] Fix segfault on "lldptool -t -i eth2 -V PFC -c enabled"

2017-11-08 Thread Valentin Vidic

On Wed, Nov 08, 2017 at 04:24:48AM -0500, Sowmini Varadhan wrote:
> I just tried setting up a git pull-request for this to Valentin's repo,
> I'm not sure if I followed procedures correctly (sending text patches to 
> a list comes more naturally to me, and I may have fat-fingered something)
> 
> To whom should I send this patch? 
> 
> We also have a couple of other patches in the pipeline that we are
> testing, so setting up a mailing list would be welcomed!

Right, but my repo is just for Debian packaging :)  We would need a
SUSE repo or email address where to send patches - I also have few small
fixes for lldpad and fcoe waiting.

-- 
Valentin

Re: [PATCH 15/31] nds32: System calls handling

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:
> From: Greentime Hu 

> +#endif /* __ASM_NDS32_SYSCALLS_H */
> diff --git a/arch/nds32/include/asm/unistd.h b/arch/nds32/include/asm/unistd.h
> new file mode 100644
> index 000..b30adca
> --- /dev/null
> +++ b/arch/nds32/include/asm/unistd.h
> @@ -0,0 +1,21 @@

> +#define __ARCH_WANT_SYS_LLSEEK

This gets set from include/asm-generic/unistd.h if you include that file.

> +#define __ARCH_WANT_SYS_CLONE

This seems ok, though it would be nice to have the reverse logic and have
architectures opt-out of the generic version when they need to provide their
own, rather than having most architectures set it.

> +#define __ARCH_WANT_SYS_OLD_MMAP

I don't see why you need this, can it be dropped?

> diff --git a/arch/nds32/include/uapi/asm/unistd.h 
> b/arch/nds32/include/uapi/asm/unistd.h
> new file mode 100644
> index 000..01b466d
> --- /dev/null
> +++ b/arch/nds32/include/uapi/asm/unistd.h

> +#define __NR_ipc   (__NR_arch_specific_syscall + 2)
> +#define __NR_sysfs (__NR_arch_specific_syscall + 3)
> +#define __NR__llseek __NR_llseek



> +__SYSCALL(__NR_cacheflush, sys_cacheflush)
> +__SYSCALL(__NR_syscall, sys_syscall)
> +__SYSCALL(__NR_ipc, sys_ipc)
> +__SYSCALL(__NR_sysfs, sys_sysfs)
> +
> +__SYSCALL(__NR_fadvise64_64, sys_fadvise64_64_wrapper)
> +__SYSCALL(__NR_rt_sigreturn, sys_rt_sigreturn_wrapper)
> +__SYSCALL(__NR_mmap, sys_old_mmap)

Usually we handle those overrides by defining the macros in asm/unistd.h
before including the asm-generic version. Can you do that as well for
consistency?

I don't see a reason for sys_ipc, sys_sysfs or sys_old_mmap() here
in a new architecture. Can you drop those or explain why you need them?

> +/*
> + * Special system call wrappers
> + *
> + * $r0 = syscall number
> + * $r8 = syscall table
> + */
> +   .type   sys_syscall, #function
> +ENTRY(sys_syscall)
> +   addi$p1, $r0, #-__NR_syscalls
> +   bgtz$p1, 3f
> +   move$p1, $r0
> +   move$r0, $r1
> +   move$r1, $r2
> +   move$r2, $r3
> +   move$r3, $r4
> +   move$r4, $r5
> +! add for syscall 6 args
> +   lwi $r5, [$sp + #SP_OFFSET ]
> +   lwi $r5, [$r5]
> +! ~add for syscall 6 args
> +
> +   lw  $p1, [tbl+$p1<<2]
> +   jr  $p1
> +3: b   sys_ni_syscall
> +ENDPROC(sys_syscall)

Can you explain what this is used for?

> --- /dev/null
> +++ b/arch/nds32/kernel/sys_nds32.c
> +
> +long sys_mmap2(unsigned long addr, unsigned long len,
> +  unsigned long prot, unsigned long flags,
> +  unsigned long fd, unsigned long pgoff)
> +{
> +   if (pgoff & (~PAGE_MASK >> 12))
> +   return -EINVAL;
> +
> +   return sys_mmap_pgoff(addr, len, prot, flags, fd,
> + pgoff >> (PAGE_SHIFT - 12));
> +}
> +
> +asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset,
> +loff_t len)
> +{
> +   return sys_fadvise64_64(fd, offset, len, advice);
> +}

You should always use SYSCALL_DEFINE*() macros to define entry points
for your own syscalls in C code for consistency. I also wonder if we should
just move those two into common code, a lot of architectures need the first
one in particular.

Arnd

Re: Page allocator bottleneck

2017-11-08 Thread Mel Gorman

On Wed, Nov 08, 2017 at 02:42:04PM +0900, Tariq Toukan wrote:
> > > Hi all,
> > > 
> > > After leaving this task for a while doing other tasks, I got back to it 
> > > now
> > > and see that the good behavior I observed earlier was not stable.
> > > 
> > > Recall: I work with a modified driver that allocates a page (4K) per 
> > > packet
> > > (MTU=1500), in order to simulate the stress on page-allocator in 200Gbps
> > > NICs.
> > > 
> > 
> > There is almost new in the data that hasn't been discussed before. The
> > suggestion to free on a remote per-cpu list would be expensive as it would
> > require per-cpu lists to have a lock for safe remote access.
>
> That's right, but each such lock will be significantly less congested than
> the buddy allocator lock.

That is not necessarily true if all the allocations and frees always happen
on the same CPUs. The contention will be equivalent to the zone lock.
Your point will only hold true if there are also heavy allocation streams
from other CPUs that are unrelated.

> In the flow in subject two cores need to
> synchronize (one allocates, one frees).
> We also need to evaluate the cost of acquiring and releasing the lock in the
> case of no congestion at all.
> 

If the per-cpu structures have a lock, there will be a light amount of
overhead. Nothing too severe, but it shouldn't be done lightly either.

> >  However,
> > I'd be curious if you could test the mm-pagealloc-irqpvec-v1r4 branch
> > ttps://git.kernel.org/pub/scm/linux/kernel/git/mel/linux.git .  It's an
> > unfinished prototype I worked on a few weeks ago. I was going to revisit
> > in about a months time when 4.15-rc1 was out. I'd be interested in seeing
> > if it has a postive gain in normal page allocations without destroying
> > the performance of interrupt and softirq allocation contexts. The
> > interrupt/softirq context testing is crucial as that is something that
> > hurt us before when trying to improve page allocator performance.
> > 
> Yes, I will test that once I get back in office (after netdev conference and
> vacation).

Thanks.

> Can you please elaborate in a few words about the idea behind the prototype?
> Does it address page-allocator scalability issues, or only the rate of
> single core page allocations?

Short answer -- maybe. All scalability issues or rates of allocation are
context and workload dependant so the question is impossible to answer
for the general case.

Broadly speaking, the patch reintroduces the per-cpu lists being for !irq
context allocations again. The last time we did this, hard and soft IRQ
allocations went through the buddy allocator which couldn't scale and
the patch was reverted. With this patch, it goes through a very large
pagevec-like structure that is protected by a lock but the fast paths
for alloc/free are extremely simple operations so the lock hold times are
very small. Potentially, a development path is that the current per-cpu
allocator is replaced with pagevec-like structures that are dynamically
allocated which would also allow pages to be freed to remote CPU lists
(if we could detect when that is appropriate which is unclear). We could
also drain remote lists without using IPIs. The downside is that the memory
footprint of the allocator would be higher and the size could no longer
be tuned so there would need to be excellent justification for such a move.

I haven't posted the patches properly yet because mmotm is carrying too
many patches as it is and this patch indirectly depends on the contents. I
also didn't write memory hot-remove support which would be a requirement
before merging. I hadn't intended to put further effort into it until I
had some evidence the approach had promise. My own testing indicated it
worked but the drivers I was using for network tests did not allocate
intensely enough to show any major gain/loss.

-- 
Mel Gorman
SUSE Labs

Re: [PATCH net-next v2 2/2] hv_netvsc: hide warnings about uninitialized/missing rndis device

2017-11-08 Thread Vitaly Kuznetsov

Stephen Hemminger  writes:

> On Nov 2, 2017 19:35, "Vitaly Kuznetsov"  wrote:
>
>  Hyper-V hosts are known to send RNDIS messages even after we halt the
>  device in rndis_filter_halt_device(). Remove user visible messages
>  as they are not really useful.
>
>  Signed-off-by: Vitaly Kuznetsov 
>  ---
>  drivers/net/hyperv/rndis_filter.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
>  diff --git a/drivers/net/hyperv/rndis_filter.c 
> b/drivers/net/hyperv/rndis_filter.c
>  index 0648eebda829..8b1242b8d8ef 100644
>  --- a/drivers/net/hyperv/rndis_filter.c
>  +++ b/drivers/net/hyperv/rndis_filter.c
>  @@ -407,13 +407,13 @@ int rndis_filter_receive(struct net_device *ndev,
>
>  /* Make sure the rndis device state is initialized */
>  if (unlikely(!rndis_dev)) {
>  - netif_err(net_device_ctx, rx_err, ndev,
>  + netif_dbg(net_device_ctx, rx_err, ndev,
>  "got rndis message but no rndis device!\n");
>  return NVSP_STAT_FAIL;
>  }
>
>  if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
>  - netif_err(net_device_ctx, rx_err, ndev,
>  + netif_dbg(net_device_ctx, rx_err, ndev,
>  "got rndis message uninitialized\n");
>  return NVSP_STAT_FAIL;
>  }
>  --
>  2.13.6
>
>  ___
>  devel mailing list
>  de...@linuxdriverproject.org
>  http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
>
> This should never happen since host removal should be setting device
> down which disables NAPI. If this is not working correctly, that needs
> to be fixed (rather than silencing the message).

This happens in between we halt RNDIS device in
rndis_filter_halt_device() and NAPI shutdown from netvsc_device_remove()
while the window is still open.

>
> Don't shoot the messenger

These messages are of no use for a random user: you change MTU on your
device and see 'got rndis message uninitialized' - what are you supposed
to do? I leave them at debugging level for us to actually debug.

-- 
  Vitaly

Re: [PATCH 16/31] nds32: VDSO support

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:

> --- /dev/null
> +++ b/arch/nds32/include/asm/vdso_datapage.h
> @@ -0,0 +1,51 @@

> +#ifndef __ASM_VDSO_DATAPAGE_H
> +#define __ASM_VDSO_DATAPAGE_H
> +
> +#ifdef __KERNEL__
> +
> +#ifndef __ASSEMBLY__
> +
> +struct vdso_data {
> +   bool cycle_count_down;  /* timer cyclye counter is decrease with time 
> */
> +   u32 cycle_count_offset; /* offset of timer cycle counter register */
> +   u32 seq_count;  /* sequence count - odd during updates */
> +   u32 xtime_coarse_sec;   /* coarse time */
> +   u32 xtime_coarse_nsec;
> +
> +   u32 wtm_clock_sec;  /* wall to monotonic offset */
> +   u32 wtm_clock_nsec;
> +   u32 xtime_clock_sec;/* CLOCK_REALTIME - seconds */
> +   u32 cs_mult;/* clocksource multiplier */
> +   u32 cs_shift;   /* Cycle to nanosecond divisor (power of two) 
> */
> +
> +   u64 cs_cycle_last;  /* last cycle value */
> +   u64 cs_mask;/* clocksource mask */
> +
> +   u64 xtime_clock_nsec;   /* CLOCK_REALTIME sub-ns base */
> +   u32 tz_minuteswest; /* timezone info for gettimeofday(2) */
> +   u32 tz_dsttime;
> +};

I need some insight from Deepa and Palmer here: to prepare for 64-bit
time_t in the
future, would it make sense to define the vdso to use 64-bit seconds numbers
consistently, and provide vdso symbols that return 64-bit times, having the
glibc convert that to normal timespec values, or should we leave it for now?

For the normal syscalls I think we are better off keeping things consistent
between architectures, but the vdso is architecture specific by definition, so
we may as well use 64-bit times there now (same for risc-v, which still
has time to modify this before the 4.15 release and glibc merge).

> +/*
> + * This controls what symbols we export from the DSO.
> + */
> +VERSION
> +{
> +   LINUX_2.6 {
> +   global:
> +   __kernel_rt_sigreturn;
> +   __vdso_gettimeofday;
> +   __vdso_clock_getres;
> +   __vdso_clock_gettime;
> +   local: *;
> +   };
> +}

I still struggle to understand how symbol versioning is supposed to work
in a vdso (as opposed to a library you compile against), but I think this should
use the version from the kernel that you plan to merge into, i.e. LINUX_4
or LINUX_4_16.

   Arnd

Re: [PATCH 18/31] nds32: Library functions

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:

> +#define get_user(x,p)  \
> +   ({  \
> +   const register typeof(*(p)) __user *__p asm("$r0") = (p);\
> +   register unsigned long __r2 asm("$r2"); \
> +   register int __e asm("$r0");\
> +   switch (sizeof(*(__p))) {   \
> +   case 1: \
> +   __get_user_x(__r2, __p, __e, 1, "$lp"); \
> +   break;  \
> +   case 2: \
> +   __get_user_x(__r2, __p, __e, 2, "$lp"); \
> +   break;  \
> +   case 4: \
> +   __get_user_x(__r2, __p, __e, 4, "$lp"); \
> +   break;  \
> +   case 8: \
> +   __get_user_x(__r2, __p, __e, 8, "$r3", "$lp");  \
> +   break;  \
> +   default: __e = __get_user_bad(); break; \
> +   }   \
> +   x = (typeof(*(p))) __r2;\
> +   __e;\
> +   })

Something looks odd here: __get_user_bad above looks like it is meant
to provide a link-time error

> +__get_user_bad_8:
> +   move$r3, #0
> +__get_user_bad:
> +   move$r2, #0
> +   move$r0, #-EFAULT
> +   ret
> +

but here you actually have a symbol with that name, it gets turned
into a runtime error. I think the first one needs to get renamed to
actually work as expected and force the link error in built-in code
(it works in modules ins __get_user_bad is not exported).

> +
> +__put_user_bad:
> +   move$r0, #-EFAULT
> +   ret
> +

same here.

  Arnd

Re: [PATCH 03/31] nds32: Support early_printk

2017-11-08 Thread Tobias Klauser

On 2017-11-08 at 06:54:51 +0100, Greentime Hu  wrote:
> From: Greentime Hu 
> 
> Signed-off-by: Rick Chen 
> Signed-off-by: Greentime Hu 
> ---
>  arch/nds32/kernel/early_printk.c |  124 
> ++
>  1 file changed, 124 insertions(+)
>  create mode 100644 arch/nds32/kernel/early_printk.c
> 
> diff --git a/arch/nds32/kernel/early_printk.c 
> b/arch/nds32/kernel/early_printk.c
> new file mode 100644
> index 000..269c3cd
> --- /dev/null
> +++ b/arch/nds32/kernel/early_printk.c

Could be implemented using earlycon (the 8250 drivers already supports
it) instead of duplicating functionality in arch/nds32?  See e.g. the
nios2 port for how this could be done, specifically commit e118c3fec9c0
("nios2: remove custom early console implementation").

Re: [PATCH 20/31] nds32: L2 cache support

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 

> +
> +/* This is defined for head.S to use due to device tree is not yet built. */
> +#define L2CC_PA_BASE   0x90F0

This looks problematic, since it prevents you from using the same head.S for
multiple SoCs that have different L2 controllers or that have them at different
addresses.

What does head.S actually do to the L2CC? Could the boot protocol require
that to be done by the boot loader before entering the kernel instead?

 Arnd

Re: [PATCH] ARM: dts: add phy-reset property for rk3066a-rayeager emac

2017-11-08 Thread Chris Zhong


Hi Florian Fainelli


On 2017年11月08日 02:26, Florian Fainelli wrote:

On 11/07/2017 01:51 AM, Chris Zhong wrote:


On 2017年11月07日 15:54, Vladimir Zapolskiy wrote:

Hello Chris,

On 11/07/2017 04:49 AM, Chris Zhong wrote:

The ethernet phy of rk3066a-rayeager has a reset pin, it controlled by
GPIO1_D6, this pin should be pull down then pull up to reset the phy.
Add a phy-reset property in emac, make the phy can be reset when emac
power on.

for PHY reset there are properties 'reset-gpios' and 'reset-delay-us',
please reference to Documentation/devicetree/bindings/net/mdio.txt

Can you try to reuse them instead of adding new custom properties?

This phy-reset is from Documentation/devicetree/bindings/net/arc_emac.txt.
And copy from arch/arm/boot/dts/rk3036-kylin.dts.
Can we just use these properties, they are not new.

Because it already exists does not mean it's correct, in fact, it is not
at all because it places the reset property for a PHY into the MAC node,
which is just not what this is, what we should be using instead is the
following patch series:

http://patchwork.ozlabs.org/project/netdev/list/?series=9267

http://patchwork.ozlabs.org/patch/828499/
http://patchwork.ozlabs.org/patch/828505/
http://patchwork.ozlabs.org/patch/828501/
http://patchwork.ozlabs.org/patch/828502/
Okay, this series works for me, and I will push a new version patch 
following it,

with a reset-gpios property under phy node.
And hope this series can be applied soon.

Thanks
Chris Zhong



As a side question, which is mainly addressed to Sergei and Roger,
I don't quite understand why PHY properties were initially added to
MAC/MDIO bus device tree nodes, in my opinion they must be moved under
PHY device tree nodes.

--
With best wishes,
Vladimir


Signed-off-by: Chris Zhong 
---

   arch/arm/boot/dts/rk3066a-rayeager.dts | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/rk3066a-rayeager.dts
b/arch/arm/boot/dts/rk3066a-rayeager.dts
index 570157f..6064a0a 100644
--- a/arch/arm/boot/dts/rk3066a-rayeager.dts
+++ b/arch/arm/boot/dts/rk3066a-rayeager.dts
@@ -173,6 +173,8 @@
   pinctrl-0 = <&emac_xfer>, <&emac_mdio>, <&rmii_rst>;
   phy = <&phy0>;
   phy-supply = <&vcc_rmii>;
+phy-reset-gpios = <&gpio1 RK_PD6 GPIO_ACTIVE_LOW>; /* PHY_RST */
+phy-reset-duration = <10>; /* millisecond */
   status = "okay";
 phy0: ethernet-phy@0 {

Re: [PATCH 23/31] nds32: Device tree support

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 
> ---
>  arch/nds32/boot/dts/Makefile   |8 ++
>  arch/nds32/boot/dts/ae3xx.dts  |   55 
>  arch/nds32/boot/dts/ag101p.dts |   60 
> 
>  arch/nds32/kernel/devtree.c|   45 ++
>  4 files changed, 168 insertions(+)
>  create mode 100644 arch/nds32/boot/dts/Makefile
>  create mode 100644 arch/nds32/boot/dts/ae3xx.dts
>  create mode 100644 arch/nds32/boot/dts/ag101p.dts
>  create mode 100644 arch/nds32/kernel/devtree.c
>
> diff --git a/arch/nds32/boot/dts/Makefile b/arch/nds32/boot/dts/Makefile
> new file mode 100644
> index 000..d31faa8
> --- /dev/null
> +++ b/arch/nds32/boot/dts/Makefile
> @@ -0,0 +1,8 @@
> +ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
> +BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_NDS32_BUILTIN_DTB)).dtb.o
> +else
> +BUILTIN_DTB :=
> +endif
> +obj-$(CONFIG_OF) += $(BUILTIN_DTB)

For new architectures, I think it's better to not support built-in dtb
but instead require the
boot loader to be aware of device trees.

> +clean-files := *.dtb *.dtb.S
> diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
> new file mode 100644
> index 000..b6c85dc
> --- /dev/null
> +++ b/arch/nds32/boot/dts/ae3xx.dts
> @@ -0,0 +1,55 @@
> +/dts-v1/;
> +/ {
> +   compatible = "nds32 ae3xx";

ae3xx looks like a wildcard name for multiple boards. Please always
have compatible
names without wildcards. You usually also want to list both the SoC
and the board
here.

> +   #address-cells = <1>;
> +   #size-cells = <1>;
> +   interrupt-parent = <&intc>;
> +
> +   chosen {
> +   bootargs = "console=ttyS0,38400n8 
> earlyprintk=uart8250-32bit,0xf030 debug loglevel=7";
> +   };

Remove the earlyprintk option from the bootargs here, regular boards
should never rely
on earlyprintk. The "earlycon" support in the uart drivers works
almost as well (it starts
slightly later in the boot process), and it will pick up the uart from
the chosen/stdout-path
property.

> +   if (!params || !early_init_dt_scan(params)) {
> +   pr_crit("\n"
> +   "Error: invalid device tree blob at (virtual address 
> 0x%p)\n"
> +   "The dtb must be 8-byte aligned and must not exceed 8 
> KB in size\n"
> +   "\nPlease check your bootloader.", params);

What is the 8KB limit for the dtb for? This sounds really limiting
once you get to
more complex SoCs.

   Arnd

Re: [PATCH 24/31] nds32: Miscellaneous header files

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:
> +
> +static inline void __delay(unsigned long loops)
> +{
> +   __asm__ __volatile__(".align 2\n"
> +"1:\n"
> +"\taddi\t%0, %0, -1\n"
> +"\tbgtz\t%0, 1b\n"
> +:"=r"(loops)
> +:"0"(loops));
> +}

Does the architecture define a high-resolution clock source? If yes,
then it's better
to use that to get exact timing than to rely on the loop calibration.

> +/*
> + * This file is generally used by user-level software, so you need to
> + * be a little careful about namespace pollution etc.  Also, we cannot
> + * assume GCC is being used.
> + */
> +
> +typedef unsigned short __kernel_mode_t;
> +#define __kernel_mode_t __kernel_mode_t
> +
> +typedef unsigned short __kernel_ipc_pid_t;
> +#define __kernel_ipc_pid_t __kernel_ipc_pid_t
> +
> +typedef unsigned short __kernel_uid_t;
> +typedef unsigned short __kernel_gid_t;
> +#define __kernel_uid_t __kernel_uid_t
> +
> +typedef unsigned short __kernel_old_dev_t;
> +#define __kernel_old_dev_t __kernel_old_dev_t
> +
> +#include 

I don't understand why you would want to override any of those.
Changing them unfortunately means rebuilding all of your user
space, but I think it would be better to do that now than to suffer
from this later on.

  Arnd

[RFC PATCH -tip 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events

2017-11-08 Thread Masami Hiramatsu

Hi,

This series introduce new trace events which allows user to
trace network congestion window etc. via ftrace or perftools.
And remove jprobe usages (tcp_probe/dccp_probe/sctp_probe).
So this series removes all register_jprobe users from the kernel
tree.

So following example in

https://wiki.linuxfoundation.org/networking/tcpprobe

 # modprobe tcp_probe port=5001
 # cat /proc/net/tcpprobe >/tmp/data.out &
 # pid=$!
 # iperf -c otherhost
 # kill $pid

will be changed as below;

 # cd /tracing
 # echo 1 > events/tcp/tcp_probe/enable
 # echo "sport == 5001 || dport == 5001"  > events/tcp/tcp_probe/filter
 # tail -f trace_pipe > /tmp/data.out &
 # pid=$!
 # iperf -c otherhost
 # kill $pid

And it outouts logs lile below;

# tracer: nop
#
#  _-=> irqs-off
# / _=> need-resched
#| / _---=> hardirq/softirq
#|| / _--=> preempt-depth
#||| / delay
#   TASK-PID   CPU#  TIMESTAMP  FUNCTION
#  | |   |      | |
  -0 [000] ..s2  1089.238049: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=37 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28960
  -0 [000] ..s2  1090.156938: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=37 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28992
  -0 [000] ..s2  1091.333729: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=38 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28992
  -0 [000] ..s2  1092.300330: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=37 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28992
  -0 [000] ..s2  1095.044739: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=36 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28992
  -0 [000] ..s2  1096.573825: tcp_probe: 
src=[:::192.168.139.2]:5001 dest=[:::192.168.139.1]:56256 mark=0x0 
length=32 snd_nxt=0xee4abe9c snd_una=0xee4abe9c snd_cwnd=10 ssthresh=2147483647 
snd_wnd=29312 srtt=478 rcv_wnd=28992

I need your feedback for this change, like formatting etc.
Also, I need more test for this events by who can setup
DCCP and SCTP, since those are special protocols, I have
no environment to test it.

Steve, I also wrote a hack in sctp_probe event. Because
it requires to record several events at once, sctp_probe_path
events will be called from assignment code. This means
ring-buffer write will be recursively called (reserve-commit
pair will be recursed). As far as I can see, that seems OK.
But I need your review too.

Thank you,

---

Masami Hiramatsu (6):
  net: tcp: Add trace events for TCP congestion window tracing
  net: tcp: Remove TCP probe module
  net: sctp: Add SCTP ACK tracking trace event
  net: sctp: Remove debug SCTP probe module
  net: dccp: Add DCCP sendmsg trace event
  net: dccp: Remove dccpprobe module


 include/trace/events/sctp.h |   96 ++
 include/trace/events/tcp.h  |   96 ++
 net/Kconfig |   17 --
 net/core/net-traces.c   |1 
 net/dccp/Kconfig|   17 --
 net/dccp/Makefile   |2 
 net/dccp/probe.c|  203 -
 net/dccp/proto.c|5 +
 net/dccp/trace.h|  105 +++
 net/ipv4/Makefile   |1 
 net/ipv4/tcp_input.c|4 +
 net/ipv4/tcp_probe.c|  301 ---
 net/sctp/Kconfig|   12 --
 net/sctp/Makefile   |3 
 net/sctp/probe.c|  244 ---
 net/sctp/sm_statefuns.c |5 +
 16 files changed, 312 insertions(+), 800 deletions(-)
 create mode 100644 include/trace/events/sctp.h
 create mode 100644 include/trace/events/tcp.h
 delete mode 100644 net/dccp/probe.c
 create mode 100644 net/dccp/trace.h
 delete mode 100644 net/ipv4/tcp_probe.c
 delete mode 100644 net/sctp/probe.c

--
Masami Hiramatsu (Linaro)

[RFC PATCH -tip 2/6] net: tcp: Remove TCP probe module

2017-11-08 Thread Masami Hiramatsu

Remove TCP probe module since jprobe has been deprecated.
That function is now replaced by tcp/tcp_probe trace-event.
You can use it via ftrace or perftools.

Signed-off-by: Masami Hiramatsu 
---
 net/Kconfig  |   17 ---
 net/ipv4/Makefile|1 
 net/ipv4/tcp_probe.c |  301 --
 3 files changed, 319 deletions(-)
 delete mode 100644 net/ipv4/tcp_probe.c

diff --git a/net/Kconfig b/net/Kconfig
index 9dba2715919d..efe930db3c08 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -336,23 +336,6 @@ config NET_PKTGEN
  To compile this code as a module, choose M here: the
  module will be called pktgen.
 
-config NET_TCPPROBE
-   tristate "TCP connection probing"
-   depends on INET && PROC_FS && KPROBES
-   ---help---
-   This module allows for capturing the changes to TCP connection
-   state in response to incoming packets. It is used for debugging
-   TCP congestion avoidance modules. If you don't understand
-   what was just said, you don't need it: say N.
-
-   Documentation on how to use TCP connection probing can be found
-   at:
-   
- 
http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
-
-   To compile this code as a module, choose M here: the
-   module will be called tcp_probe.
-
 config NET_DROP_MONITOR
tristate "Network packet drop alerting service"
depends on INET && TRACEPOINTS
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index c6c8ad1d4b6d..47a0a6649a9d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -43,7 +43,6 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
 obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
-obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
 obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
deleted file mode 100644
index 697f4c67b2e3..
--- a/net/ipv4/tcp_probe.c
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * tcpprobe - Observe the TCP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-
-MODULE_AUTHOR("Stephen Hemminger ");
-MODULE_DESCRIPTION("TCP cwnd snooper");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.1");
-
-static int port __read_mostly;
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-static unsigned int bufsize __read_mostly = 4096;
-MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
-module_param(bufsize, uint, 0);
-
-static unsigned int fwmark __read_mostly;
-MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
-module_param(fwmark, uint, 0);
-
-static int full __read_mostly;
-MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd 
changes)");
-module_param(full, int, 0);
-
-static const char procname[] = "tcpprobe";
-
-struct tcp_log {
-   ktime_t tstamp;
-   union {
-   struct sockaddr raw;
-   struct sockaddr_in  v4;
-   struct sockaddr_in6 v6;
-   }   src, dst;
-   u16 length;
-   u32 snd_nxt;
-   u32 snd_una;
-   u32 snd_wnd;
-   u32 rcv_wnd;
-   u32 snd_cwnd;
-   u32 ssthresh;
-   u32 srtt;
-};
-
-static struct {
-   spinlock_t  lock;
-   wait_queue_head_t wait;
-   ktime_t start;
-   u32 lastcwnd;
-
-   unsigned long   head, tail;
-   struct tcp_log  *log;
-} tcp_probe;
-
-static inline int tcp_probe_used(void)
-{
-   return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1);
-}
-
-static inline int tcp_probe_avail(void)
-{
-   return bufsize - tcp_probe_used() - 1;
-}
-
-#define tcp_probe_copy_fl_to_si4(inet, si4, mem)   \
-   do {\
-   si4.sin_family = AF_INET;   \
-   si4.sin_port = inet->inet_##mem##port;  \

[RFC PATCH -tip 4/6] net: sctp: Remove debug SCTP probe module

2017-11-08 Thread Masami Hiramatsu

Remove SCTP probe module since jprobe has been deprecated.
That function is now replaced by sctp/sctp_probe and
sctp/sctp_probe_path trace-events.
You can use it via ftrace or perftools.

Signed-off-by: Masami Hiramatsu 
---
 net/sctp/Kconfig  |   12 ---
 net/sctp/Makefile |3 -
 net/sctp/probe.c  |  244 -
 3 files changed, 259 deletions(-)
 delete mode 100644 net/sctp/probe.c

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index d9c04dc1b3f3..c740b189d4ba 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,18 +37,6 @@ menuconfig IP_SCTP
 
 if IP_SCTP
 
-config NET_SCTPPROBE
-   tristate "SCTP: Association probing"
-depends on PROC_FS && KPROBES
----help---
-This module allows for capturing the changes to SCTP association
-state in response to incoming packets. It is used for debugging
-SCTP congestion control algorithms. If you don't understand
-what was just said, you don't need it: say N.
-
-To compile this code as a module, choose M here: the
-module will be called sctp_probe.
-
 config SCTP_DBG_OBJCNT
bool "SCTP: Debug object counts"
depends on PROC_FS
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 8c434af3e68f..28961220b582 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -4,7 +4,6 @@
 #
 
 obj-$(CONFIG_IP_SCTP) += sctp.o
-obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
 obj-$(CONFIG_INET_SCTP_DIAG) += sctp_diag.o
 
 sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
@@ -15,8 +14,6 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
  output.o input.o debug.o stream.o auth.o \
  offload.o
 
-sctp_probe-y := probe.o
-
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
deleted file mode 100644
index 1280f85a598d..
--- a/net/sctp/probe.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * sctp_probe - Observe the SCTP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger 
- *
- * Modified for SCTP from Stephen Hemminger's code
- * Copyright (C) 2010, Wei Yongjun 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-
-MODULE_SOFTDEP("pre: sctp");
-MODULE_AUTHOR("Wei Yongjun ");
-MODULE_DESCRIPTION("SCTP snooper");
-MODULE_LICENSE("GPL");
-
-static int port __read_mostly = 0;
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-static unsigned int fwmark __read_mostly = 0;
-MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
-module_param(fwmark, uint, 0);
-
-static int bufsize __read_mostly = 64 * 1024;
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
-module_param(bufsize, int, 0);
-
-static int full __read_mostly = 1;
-MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd 
changes)");
-module_param(full, int, 0);
-
-static const char procname[] = "sctpprobe";
-
-static struct {
-   struct kfifo  fifo;
-   spinlock_tlock;
-   wait_queue_head_t wait;
-   struct timespec64 tstart;
-} sctpw;
-
-static __printf(1, 2) void printl(const char *fmt, ...)
-{
-   va_list args;
-   int len;
-   char tbuf[256];
-
-   va_start(args, fmt);
-   len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
-   va_end(args);
-
-   kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
-   wake_up(&sctpw.wait);
-}
-
-static int sctpprobe_open(struct inode *inode, struct file *file)
-{
-   kfifo_reset(&sctpw.fifo);
-   ktime_get_ts64(&sctpw.tstart);
-
-   return 0;
-}
-
-static ssize_t sctpprobe_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
-   int error = 0, cnt = 0;
-   unsigned char *tbuf;
-
-   if (!buf)
-   return -EINVAL;
-
-   if (len == 0)
-   return 0;
-
-   tbuf = vmalloc(len);
-   if (!tbuf)
-   return -ENOMEM;
-
-   error = wait_event_interruptible(sctpw.wait,
-

[RFC PATCH -tip 5/6] net: dccp: Add DCCP sendmsg trace event

2017-11-08 Thread Masami Hiramatsu

Add DCCP sendmsg trace event (dccp/dccp_probe) for
replacing dccpprobe. User can trace this event via
ftrace or perftools.

Signed-off-by: Masami Hiramatsu 
---
 net/dccp/proto.c |5 +++
 net/dccp/trace.h |  105 ++
 2 files changed, 110 insertions(+)
 create mode 100644 net/dccp/trace.h

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b68168fcc06a..77a8a737c4f3 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -38,6 +38,9 @@
 #include "dccp.h"
 #include "feat.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
 EXPORT_SYMBOL_GPL(dccp_statistics);
@@ -756,6 +759,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t len)
int rc, size;
long timeo;
 
+   trace_dccp_probe(sk, len);
+
if (len > dp->dccps_mss_cache)
return -EMSGSIZE;
 
diff --git a/net/dccp/trace.h b/net/dccp/trace.h
new file mode 100644
index ..aa01321a6c37
--- /dev/null
+++ b/net/dccp/trace.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dccp
+
+#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DCCP_H
+
+#include 
+#include "dccp.h"
+#include "ccids/ccid3.h"
+#include 
+
+TRACE_EVENT(dccp_probe,
+
+   TP_PROTO(struct sock *sk, size_t size),
+
+   TP_ARGS(sk, size),
+
+   TP_STRUCT__entry(
+   /* sockaddr_in6 is always bigger than sockaddr_in */
+   __array(__u8, saddr, sizeof(struct sockaddr_in6))
+   __array(__u8, daddr, sizeof(struct sockaddr_in6))
+   __field(__u16, sport)
+   __field(__u16, dport)
+   __field(__u16, size)
+   __field(__u16, tx_s)
+   __field(__u32, tx_rtt)
+   __field(__u32, tx_p)
+   __field(__u32, tx_x_calc)
+   __field(__u64, tx_x_recv)
+   __field(__u64, tx_x)
+   __field(__u32, tx_t_ipi)
+   ),
+
+   TP_fast_assign(
+   const struct inet_sock *inet = inet_sk(sk);
+   struct ccid3_hc_tx_sock *hc = NULL;
+
+   if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
+   hc = ccid3_hc_tx_sk(sk);
+
+   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+   if (sk->sk_family == AF_INET) {
+   struct sockaddr_in *v4 = (void *)__entry->saddr;
+
+   v4->sin_family = AF_INET;
+   v4->sin_port = inet->inet_sport;
+   v4->sin_addr.s_addr = inet->inet_saddr;
+   v4 = (void *)__entry->daddr;
+   v4->sin_family = AF_INET;
+   v4->sin_port = inet->inet_dport;
+   v4->sin_addr.s_addr = inet->inet_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+   } else if (sk->sk_family == AF_INET6) {
+   struct sockaddr_in6 *v6 = (void *)__entry->saddr;
+
+   v6->sin6_family = AF_INET6;
+   v6->sin6_port = inet->inet_sport;
+   v6->sin6_addr = inet6_sk(sk)->saddr;
+   v6 = (void *)__entry->daddr;
+   v6->sin6_family = AF_INET6;
+   v6->sin6_port = inet->inet_dport;
+   v6->sin6_addr = sk->sk_v6_daddr;
+#endif
+   }
+
+   /* For filtering use */
+   __entry->sport = ntohs(inet->inet_sport);
+   __entry->dport = ntohs(inet->inet_dport);
+
+   __entry->size = size;
+   if (hc) {
+   __entry->tx_s = hc->tx_s;
+   __entry->tx_rtt = hc->tx_rtt;
+   __entry->tx_p = hc->tx_p;
+   __entry->tx_x_calc = hc->tx_x_calc;
+   __entry->tx_x_recv = hc->tx_x_recv >> 6;
+   __entry->tx_x = hc->tx_x >> 6;
+   __entry->tx_t_ipi = hc->tx_t_ipi;
+   } else {
+   __entry->tx_s = 0;
+   memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi -
+  (void *)&__entry->tx_rtt +
+  sizeof(__entry->tx_t_ipi));
+   }
+   ),
+
+   TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d "
+ "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d",
+ __entry->saddr, __entry->daddr, __entry->size,
+ __entry->tx_s, __entry->tx_rtt, __entry->tx_p,
+ __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x,
+ __entry->tx_t_ipi)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define

[RFC PATCH -tip 6/6] net: dccp: Remove dccpprobe module

2017-11-08 Thread Masami Hiramatsu

Remove DCCP probe module since jprobe has been deprecated.
That function is now replaced by dccp/dccp_probe trace-event.
You can use it via ftrace or perftools.

Signed-off-by: Masami Hiramatsu 
---
 net/dccp/Kconfig  |   17 
 net/dccp/Makefile |2 -
 net/dccp/probe.c  |  203 -
 3 files changed, 222 deletions(-)
 delete mode 100644 net/dccp/probe.c

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 8c0ef71bed2f..b270e84d9c13 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -39,23 +39,6 @@ config IP_DCCP_DEBUG
 
  Just say N.
 
-config NET_DCCPPROBE
-   tristate "DCCP connection probing"
-   depends on PROC_FS && KPROBES
-   ---help---
-   This module allows for capturing the changes to DCCP connection
-   state in response to incoming packets. It is used for debugging
-   DCCP congestion avoidance modules. If you don't understand
-   what was just said, you don't need it: say N.
-
-   Documentation on how to use DCCP connection probing can be found
-   at:
-   
- 
http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe
-
-   To compile this code as a module, choose M here: the
-   module will be called dccp_probe.
-
 
 endmenu
 
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2e7b56097bc4..9d0383d2f277 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -21,9 +21,7 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
-obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
 dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
-dccp_probe-y := probe.o
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
deleted file mode 100644
index 3d3fda05b32d..
--- a/net/dccp/probe.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * dccp_probe - Observe the DCCP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger 
- *
- * Modified for DCCP from Stephen Hemminger's code
- * Copyright (C) 2006, Ian McDonald 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "dccp.h"
-#include "ccid.h"
-#include "ccids/ccid3.h"
-
-static int port;
-
-static int bufsize = 64 * 1024;
-
-static const char procname[] = "dccpprobe";
-
-static struct {
-   struct kfifo  fifo;
-   spinlock_tlock;
-   wait_queue_head_t wait;
-   struct timespec64 tstart;
-} dccpw;
-
-static void printl(const char *fmt, ...)
-{
-   va_list args;
-   int len;
-   struct timespec64 now;
-   char tbuf[256];
-
-   va_start(args, fmt);
-   getnstimeofday64(&now);
-
-   now = timespec64_sub(now, dccpw.tstart);
-
-   len = sprintf(tbuf, "%lu.%06lu ",
- (unsigned long) now.tv_sec,
- (unsigned long) now.tv_nsec / NSEC_PER_USEC);
-   len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
-   va_end(args);
-
-   kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
-   wake_up(&dccpw.wait);
-}
-
-static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
-   const struct inet_sock *inet = inet_sk(sk);
-   struct ccid3_hc_tx_sock *hc = NULL;
-
-   if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
-   hc = ccid3_hc_tx_sk(sk);
-
-   if (port == 0 || ntohs(inet->inet_dport) == port ||
-   ntohs(inet->inet_sport) == port) {
-   if (hc)
-   printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n",
-  &inet->inet_saddr, ntohs(inet->inet_sport),
-  &inet->inet_daddr, ntohs(inet->inet_dport), size,
-  hc->tx_s, hc->tx_rtt, hc->tx_p,
-  hc->tx_x_calc, hc->tx_x_recv >> 6,
-  hc->tx_x >> 6, hc->tx_t_ipi);
-   else
-   printl("%pI4:%u %pI4:%u %d\n",
-  &inet->inet_saddr, ntohs(inet->inet_sport),
-  &inet->inet_daddr, ntohs(inet->inet_dport),
-

Re: [PATCH 25/31] nds32: defconfig

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 
> ---
>  arch/nds32/configs/ae3xx_defconfig  |  110 
> +++
>  arch/nds32/configs/ag101p_defconfig |  109 ++

Are those two incompatible? I would recommend starting without board
specific defconfig
files, it just gets messy once you get more than a few machines you
want to support.

> diff --git a/arch/nds32/configs/ae3xx_defconfig 
> b/arch/nds32/configs/ae3xx_defconfig
> new file mode 100644
> index 000..14d49a3
> --- /dev/null
> +++ b/arch/nds32/configs/ae3xx_defconfig
> @@ -0,0 +1,110 @@
> +CONFIG_CROSS_COMPILE="nds32le-linux-"
> +CONFIG_SYSVIPC=y
> +CONFIG_POSIX_MQUEUE=y
> +CONFIG_HIGH_RES_TIMERS=y
> +CONFIG_BSD_PROCESS_ACCT=y
> +CONFIG_BSD_PROCESS_ACCT_V3=y
> +CONFIG_IKCONFIG=y
> +CONFIG_IKCONFIG_PROC=y
> +CONFIG_LOG_BUF_SHIFT=14
> +CONFIG_NAMESPACES=y
> +CONFIG_USER_NS=y
> +CONFIG_RELAY=y
> +CONFIG_BLK_DEV_INITRD=y
> +CONFIG_SYSCTL_SYSCALL=y
> +CONFIG_KALLSYMS_ALL=y
> +CONFIG_EMBEDDED=y

You usually don't want to select 'CONFIG_EMBEDDED' for regular
builds, it's should only be needed to disable options that are
usually considered essential.

> +CONFIG_FB=y
> +# CONFIG_VGA_CONSOLE is not set
> +CONFIG_FRAMEBUFFER_CONSOLE=y

You have a framebuffer console here, but no framebuffer driver?

> +CONFIG_MMC=y
> +CONFIG_RTC_CLASS=y
> +# CONFIG_RTC_HCTOSYS is not set
> +CONFIG_CLKSRC_ATCPIT100=y
> +CONFIG_EXT2_FS=y

Maybe use EXT4, not EXT2, in the defconfig?

   Arnd

[RFC PATCH -tip 3/6] net: sctp: Add SCTP ACK tracking trace event

2017-11-08 Thread Masami Hiramatsu

Add SCTP ACK tracking trace event to trace the changes of SCTP
association state in response to incoming packets.
It is used for debugging SCTP congestion control algorithms,
and will replace sctp_probe module.

Note that this event a bit tricky. Since this consists of 2
events (sctp_probe and sctp_probe_path) so you have to enable
both events as below.

  # cd /sys/kernel/debug/tracing
  # echo 1 > events/sctp/sctp_probe/enable
  # echo 1 > events/sctp/sctp_probe_path/enable

Or, you can enable all the events under sctp.

  # echo 1 > events/sctp/enable

Since sctp_probe_path event is always invoked from sctp_probe
event, you can not see any output if you only enable
sctp_probe_path.


Signed-off-by: Masami Hiramatsu 
---
 include/trace/events/sctp.h |   96 +++
 net/sctp/sm_statefuns.c |5 ++
 2 files changed, 101 insertions(+)
 create mode 100644 include/trace/events/sctp.h

diff --git a/include/trace/events/sctp.h b/include/trace/events/sctp.h
new file mode 100644
index ..32c2dc72311e
--- /dev/null
+++ b/include/trace/events/sctp.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sctp
+
+#if !defined(_TRACE_SCTP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCTP_H
+
+#include 
+#include 
+
+TRACE_EVENT(sctp_probe_path,
+
+   TP_PROTO(struct sctp_transport *sp,
+const struct sctp_association *asoc),
+
+   TP_ARGS(sp, asoc),
+
+   TP_STRUCT__entry(
+   __field(__u64, asoc)
+   __field(__u32, primary)
+   __array(__u8, ipaddr, sizeof(union sctp_addr))
+   __field(__u32, state)
+   __field(__u32, cwnd)
+   __field(__u32, ssthresh)
+   __field(__u32, flight_size)
+   __field(__u32, partial_bytes_acked)
+   __field(__u32, pathmtu)
+   ),
+
+   TP_fast_assign(
+   __entry->asoc = (__u64)asoc;
+   __entry->primary = (sp == asoc->peer.primary_path);
+   memcpy(__entry->ipaddr, &sp->ipaddr, sizeof(union sctp_addr));
+   __entry->state = sp->state;
+   __entry->cwnd = sp->cwnd;
+   __entry->ssthresh = sp->ssthresh;
+   __entry->flight_size = sp->flight_size;
+   __entry->partial_bytes_acked = sp->partial_bytes_acked;
+   __entry->pathmtu = sp->pathmtu;
+   ),
+
+   TP_printk("asoc=%#llx%s ipaddr=%pISpc state=%u cwnd=%u ssthresh=%u "
+ "flight_size=%u partial_bytes_acked=%u pathmtu=%u",
+ __entry->asoc, __entry->primary ? "(*)" : "",
+ __entry->ipaddr, __entry->state, __entry->cwnd,
+ __entry->ssthresh, __entry->flight_size,
+ __entry->partial_bytes_acked, __entry->pathmtu)
+);
+
+TRACE_EVENT(sctp_probe,
+
+   TP_PROTO(const struct sctp_endpoint *ep,
+const struct sctp_association *asoc,
+struct sctp_chunk *chunk),
+
+   TP_ARGS(ep, asoc, chunk),
+
+   TP_STRUCT__entry(
+   __field(__u64, asoc)
+   __field(__u32, mark)
+   __field(__u16, bind_port)
+   __field(__u16, peer_port)
+   __field(__u32, pathmtu)
+   __field(__u32, rwnd)
+   __field(__u16, unack_data)
+   ),
+
+   TP_fast_assign(
+   struct sctp_transport *sp;
+   struct sk_buff *skb = chunk->skb;
+
+   __entry->asoc = (__u64)asoc;
+   __entry->mark = skb->mark;
+   __entry->bind_port = ep->base.bind_addr.port;
+   __entry->peer_port = asoc->peer.port;
+   __entry->pathmtu = asoc->pathmtu;
+   __entry->rwnd = asoc->peer.rwnd;
+   __entry->unack_data = asoc->unack_data;
+
+   list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+   transports) {
+   trace_sctp_probe_path(sp, asoc);
+   }
+   ),
+
+   TP_printk("asoc=%#llx mark=%#x bind_port=%d peer_port=%d pathmtu=%d "
+ "rwnd=%u unack_data=%d",
+ __entry->asoc, __entry->mark, __entry->bind_port,
+ __entry->peer_port, __entry->pathmtu, __entry->rwnd,
+ __entry->unack_data)
+);
+
+#endif /* _TRACE_SCTP_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8f8ccded13e4..c5f92b2cc5c3 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,6 +59,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 static struct sctp_packet *sctp_abort_pkt_new(
struct net *net,
const struct sctp_endpoint *ep,
@@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net 
*net,
s

[RFC PATCH -tip 1/6] net: tcp: Add trace events for TCP congestion window tracing

2017-11-08 Thread Masami Hiramatsu

This adds an event to trace TCP stat variables with
slightly intrusive trace-event. This uses ftrace/perf
event log buffer to trace those state, no needs to
prepare own ring-buffer, nor custom user apps.

User can use ftrace to trace this event as below;

  # cd /sys/kernel/debug/tracing
  # echo 1 > events/tcp/tcp_probe/enable
  (run workloads)
  # cat trace

Signed-off-by: Masami Hiramatsu 
---
 include/trace/events/tcp.h |   96 
 net/core/net-traces.c  |1 
 net/ipv4/tcp_input.c   |4 ++
 3 files changed, 101 insertions(+)
 create mode 100644 include/trace/events/tcp.h

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index ..b8969bbceb38
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+TRACE_EVENT(tcp_probe,
+
+   TP_PROTO(struct sock *sk, struct sk_buff *skb),
+
+   TP_ARGS(sk, skb),
+
+   TP_STRUCT__entry(
+   /* sockaddr_in6 is always bigger than sockaddr_in */
+   __array(__u8, saddr, sizeof(struct sockaddr_in6))
+   __array(__u8, daddr, sizeof(struct sockaddr_in6))
+   __field(__u16, sport)
+   __field(__u16, dport)
+   __field(__u32, mark)
+   __field(__u16, length)
+   __field(__u32, snd_nxt)
+   __field(__u32, snd_una)
+   __field(__u32, snd_cwnd)
+   __field(__u32, ssthresh)
+   __field(__u32, snd_wnd)
+   __field(__u32, srtt)
+   __field(__u32, rcv_wnd)
+   ),
+
+   TP_fast_assign(
+   const struct tcp_sock *tp = tcp_sk(sk);
+   const struct inet_sock *inet = inet_sk(sk);
+
+   memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+   memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+   if (sk->sk_family == AF_INET) {
+   struct sockaddr_in *v4 = (void *)__entry->saddr;
+
+   v4->sin_family = AF_INET;
+   v4->sin_port = inet->inet_sport;
+   v4->sin_addr.s_addr = inet->inet_saddr;
+   v4 = (void *)__entry->daddr;
+   v4->sin_family = AF_INET;
+   v4->sin_port = inet->inet_dport;
+   v4->sin_addr.s_addr = inet->inet_daddr;
+#if IS_ENABLED(CONFIG_IPV6)
+   } else if (sk->sk_family == AF_INET6) {
+   struct sockaddr_in6 *v6 = (void *)__entry->saddr;
+
+   v6->sin6_family = AF_INET6;
+   v6->sin6_port = inet->inet_sport;
+   v6->sin6_addr = inet6_sk(sk)->saddr;
+   v6 = (void *)__entry->daddr;
+   v6->sin6_family = AF_INET6;
+   v6->sin6_port = inet->inet_dport;
+   v6->sin6_addr = sk->sk_v6_daddr;
+#endif
+   }
+
+   /* For filtering use */
+   __entry->sport = ntohs(inet->inet_sport);
+   __entry->dport = ntohs(inet->inet_dport);
+   __entry->mark = skb->mark;
+
+   __entry->length = skb->len;
+   __entry->snd_nxt = tp->snd_nxt;
+   __entry->snd_una = tp->snd_una;
+   __entry->snd_cwnd = tp->snd_cwnd;
+   __entry->snd_wnd = tp->snd_wnd;
+   __entry->rcv_wnd = tp->rcv_wnd;
+   __entry->ssthresh = tcp_current_ssthresh(sk);
+   __entry->srtt = tp->srtt_us >> 3;
+   ),
+
+   TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x "
+ "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u "
+ "rcv_wnd=%u",
+ __entry->saddr, __entry->daddr, __entry->mark,
+ __entry->length, __entry->snd_nxt, __entry->snd_una,
+ __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd,
+ __entry->srtt, __entry->rcv_wnd)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 71f209542364..2e84d642c03f 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9a0b3c5ffa46..da8b342b95ce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -76,6 +76,7 @@
 #include 
 #include 
 #include 
+#include 
 
 int sysctl_tcp_fack __read_mostly;
 int sysctl_tcp_max_reordering __read_mostly = 300;
@@ -5356,6 +5357,9 @@ void tcp_rcv_established(struct sock *s

RE: [PATCH 11/31] nds32: Atomic operations

2017-11-08 Thread vincentc

> -Original Message-
> From: arndbergm...@gmail.com [mailto:arndbergm...@gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Wednesday, November 08, 2017 4:54 PM
> To: Greentime Hu
> Cc: Greentime Ying-Han Hu(胡英漢); Linux Kernel Mailing List; linux-arch;
> Thomas Gleixner; Jason Cooper; Marc Zyngier; Rob Herring; Networking;
> Vincent Ren-Wei Chen(陳人維); Palmer Dabbelt
> Subject: Re: [PATCH 11/31] nds32: Atomic operations
>
> On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu 
> wrote:
> > From: Greentime Hu 
> >
> > Signed-off-by: Vincent Chen 
> > Signed-off-by: Greentime Hu 
> > ---
> >  arch/nds32/include/asm/futex.h|  116 
> >  arch/nds32/include/asm/spinlock.h |  178
> > +
> >  2 files changed, 294 insertions(+)
> >  create mode 100644 arch/nds32/include/asm/futex.h  create mode
> 100644
> > arch/nds32/include/asm/spinlock.h
>
>
> > diff --git a/arch/nds32/include/asm/spinlock.h
> > b/arch/nds32/include/asm/spinlock.h
> > new file mode 100644
> > index 000..dd5fc71
> > --- /dev/null
> > +++ b/arch/nds32/include/asm/spinlock.h
> > @@ -0,0 +1,178 @@
> > +
> > +#define arch_spin_unlock_wait(lock) \
> > +   do { while (arch_spin_is_locked(lock)) cpu_relax(); } while
> > +(0)
>
> This was removed from the other architectures in commit
> 952111d7db02 ("arch: Remove spin_unlock_wait() arch-specific definitions")
>
> Please remove this as well.
>
Dear Arnd:

I will remove them in the next version patch.


> Palmer, I see riscv has the same thing, please also add a patch to your tree 
> to
> remove it.
>
> > +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
> > +
> > +static inline void arch_spin_lock(arch_spinlock_t * lock) {
> > +   unsigned long tmp;
> > +
> > +   __asm__ __volatile__("1:\n"
> > +"\tllw\t%0, [%1]\n"
> > +"\tbnez\t%0, 1b\n"
> > +"\tmovi\t%0, #0x1\n"
> > +"\tscw\t%0, [%1]\n"
> > +"\tbeqz\t%0, 1b\n"
> > +:"=&r"(tmp)
> > +:"r"(&lock->lock)
> > +:"memory");
>
> The coding style seems inconsistent here, the other inline asm uses real tabs
> instead of \t, and 'asm volatile' is generally preferred over '__asm__
> __volatile__'.
>
>Arnd

OK, I will modify it in the next version patch.

Thanks

Best regards
Vincent
CONFIDENTIALITY NOTICE:

This e-mail (and its attachments) may contain confidential and legally 
privileged information or information protected from disclosure. If you are not 
the intended recipient, you are hereby notified that any disclosure, copying, 
distribution, or use of the information contained herein is strictly 
prohibited. In this case, please immediately notify the sender by return 
e-mail, delete the message (and any accompanying documents) and destroy all 
printed hard copies. Thank you for your cooperation.

Copyright ANDES TECHNOLOGY CORPORATION - All Rights Reserved.

Is there a race between __mod_timer() and del_timer()?

2017-11-08 Thread David Howells

Is there a race between the optimisation for networking code in __mod_timer()
and del_timer() - or, at least, a race that matters?

Consider:

CPU A   CPU B
=== ===
[timer X is active]
==>__mod_timer(X)
if (timer_pending(timer))
[Take the true path]
-- IRQ --   ==>del_timer(X)
<==
if (timer->expires == expires)
[Take the true path]
<==return 1
[timer X is not active]

There's no locking to prevent this, but __mod_timer() returns without
restarting the timer.  I'm not sure this is a problem exactly, however, since
del_timer() *was* issued, and could've deleted the timer after __mod_timer()
returned.

A couple of possible alleviations:

 (1) Recheck timer_pending() before returning from __mod_timer().

 (2) Set timer->expires to jiffies in del_timer() - but since there's nothing
 preventing the optimisation in __mod_timer() from occurring concurrently
 with del_timer(), this probably won't help.

I think it might just be best to put a note in the comments in __mod_timer().

Thoughts?

David

Re: [PATCH 5/7] scripts/leaking_addresses: add emailing results

2017-11-08 Thread Petr Mladek

On Wed 2017-11-08 14:37:37, Tobin C. Harding wrote:
> Developers may not have the time (or inclination) to investigate script
> output. This information is, however, useful. If we add functionality to
> the script to email results for further investigation.
> 
> Add --send-report flag to email scan results (to Tobin C. Harding).

I am not sure that it is wise to make spaming one person
so easy ;-)

It might make sense to add some more information into
the message. For example:

+ uname -a
+ whether the log was generated using root access

Also people might feel more comfortable if this feature:

 + prints the message
 + printks where it is being sent
 + ask yes/no before doing so


>  scripts/leaking_addresses.pl | 42 ++
>  1 file changed, 42 insertions(+)
>  mode change 100755 => 100644 scripts/leaking_addresses.pl
   

> diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
> old mode 100755
> new mode 100644

I guess that this was not intended.

Best Regards,
Petr

Re: [PATCH 26/31] nds32: Build infrastructure

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:55 AM, Greentime Hu  wrote:

> diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
> new file mode 100644
> index 000..112f470
> --- /dev/null
> +++ b/arch/nds32/Kconfig
> @@ -0,0 +1,107 @@
> +#
> +# For a description of the syntax of this configuration file,
> +# see Documentation/kbuild/kconfig-language.txt.
> +#
> +
> +config NDS32
> +def_bool y
> +   select ARCH_HAS_RAW_COPY_USER
> +   select ARCH_WANT_FRAME_POINTERS if FTRACE
> +   select ARCH_WANT_IPC_PARSE_VERSION
> +   select CLKSRC_MMIO
> +   select CLONE_BACKWARDS
> +   select TIMER_OF
> +   select FRAME_POINTER
> +   select GENERIC_ATOMIC64
> +   select GENERIC_CPU_DEVICES
> +   select GENERIC_CLOCKEVENTS
> +   select GENERIC_IOMAP
> +   select GENERIC_IRQ_CHIP
> +   select GENERIC_IRQ_PROBE
> +   select GENERIC_IRQ_SHOW
> +   select GENERIC_STRNCPY_FROM_USER
> +   select GENERIC_STRNLEN_USER
> +   select GENERIC_TIME_VSYSCALL
> +   select HAVE_ARCH_TRACEHOOK
> +   select HAVE_GENERIC_IOMAP

You normally don't want HAVE_GENERIC_IOMAP, at least unless the CPU
has special instructions to trigger PCI I/O port access.

> +   select HAVE_DEBUG_KMEMLEAK
> +   select HAVE_IDE

You certainly don't want HAVE_IDE

> +   select HAVE_MEMBLOCK
> +   select HAVE_MEMBLOCK_NODE_MAP
> +   select HAVE_UID16

HAVE_UID16 shouldn't be used on new architectures, as mentioned in the
comment about asm/posix_types.h

> +   select HAVE_REGS_AND_STACK_ACCESS_API
> +   select IRQ_DOMAIN
> +   select LOCKDEP_SUPPORT
> +   select MODULES_USE_ELF_REL
> +   select MODULES_USE_ELF_RELA

I would think that you use either MODULES_USE_ELF_REL or
MODULES_USE_ELF_RELA, but not both.

> +   select OF
> +   select OF_EARLY_FLATTREE
> +   select OLD_SIGACTION
> +   select OLD_SIGSUSPEND3

What are the OLD_SIG* ones for? It sounds like something you shouldn't
need, although I'm not familiar wiht them.

> +   select NO_IOPORT_MAP
> +   select RTC_LIB
> +   select THREAD_INFO_IN_TASK
> +   select SYS_SUPPORTS_APM_EMULATION

I don't see what SYS_SUPPORTS_APM_EMULATION gains you.

> +config GENERIC_CALIBRATE_DELAY
> +   def_bool y

It's better to avoid the delay loop completely and skip the calibration,
if your hardware allows.

> +
> +config NDS32_BUILTIN_DTB
> +string "Builtin DTB"
> +default ""
> +   help
> + User can use it to specify the dts of the SoC

Better leave this up to the boot loader.

> +config ALIGNMENT_TRAP
> +   tristate "Kernel support unaligned access handling"
> +   default y
> +   help
> + Andes processors cannot fetch/store information which is not
> + naturally aligned on the bus, i.e., a 4 byte fetch must start at an
> + address divisible by 4. On 32-bit Andes processors, these 
> non-aligned
> + fetch/store instructions will be emulated in software if you say
> + here, which has a severe performance impact. This is necessary for
> + correct operation of some network protocols. With an IP-only
> + configuration it is safe to say N, otherwise say Y.

Which network protocols are you referring to?

> +config HIGHMEM
> +   bool "High Memory Support"
> +   depends on MMU && CPU_CACHE_NONALIASING
> +   help
> + The address space of Andes processors is only 4 Gigabytes large
> + and it has to accommodate user address space, kernel address
> + space as well as some memory mapped IO. That means that, if you
> + have a large amount of physical memory and/or IO, not all of the
> + memory can be "permanently mapped" by the kernel. The physical
> + memory that is not permanently mapped is called "high memory".
> +
> + Depending on the selected kernel/user memory split, minimum
> + vmalloc space and actual amount of RAM, you may not need this
> + option which should result in a slightly faster kernel.
> +
> + If unsure, say N.

Generally speaking, highmem support is a mess, and it's better to avoid it.

I see that the two device tree files you have list 1GB of memory. Do you think
that is a common configuration for actual products? Do you expect any to
have more than 1GB (or more than 4GB) in the future, or is that the upper
end of the scale?

If 1GB is a reasonable upper bound, then you could change the vmsplit
to give slightly less address space to user space and have 1GB of direct-mapped
kernel memory plus 256MB of vmalloc space reserved for the kernel,
and completely avoid highmem.

> +config MEMORY_START
> +   hex "Physical memory start address"
> +   default "0x"
> +   help
> + Physical memory start address, you may modify it if it is porting to
> + a new SoC with different start address.
> +endmenu

On ARM, we found options like this to be rather problematic since it

Re: [PATCH 00/31] Andes(nds32) Linux Kernel Port

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 9:41 AM, Greentime Hu  wrote:
> 2017-11-08 16:32 GMT+08:00 David Howells :
>> Greentime Hu  wrote:
>>
>>> The build script and toolchain repositories are able to be found here:
>>>   https://github.com/andestech/build_script.git
>>
>> Is arch support in upstream binutils and gcc?
>
> Yes, it is but only supporting elf-toolchain now. We will do the
> upstream for Linux-toolchain in the next step.

Is the Linux toolchain only required for building user space, or do
you also need it
for building kernels? It's often possible to build kernels with a bare
toolchain, and
that helps a lot for automated testing.

  Arnd

Re: Is there a race between __mod_timer() and del_timer()?

2017-11-08 Thread David Howells

David Howells  wrote:

> I think it might just be best to put a note in the comments in __mod_timer().

How about the attached?

David
---
commit d538c734f9bf885292b88a81a06c5efee528d70d
Author: David Howells 
Date:   Wed Nov 8 10:20:27 2017 +

Add a comment into __mod_timer() noting a possible race with del_timer()

Add a comment into __mod_timer() noting a possible race with del_timer() in
which the 'common optimization' case could leave the timer unstarted if
del_timer() happens between the timer_pending() check and the timer
expiration check.

Signed-off-by: David Howells 

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f2674a056c26..e0ac4486529c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -949,6 +949,9 @@ __mod_timer(struct timer_list *timer, unsigned long 
expires, bool pending_only)
 * The downside of this optimization is that it can result in
 * larger granularity than you would get from adding a new
 * timer with this expiry.
+*
+* Note that if del_timer() happens whilst we're just here, we
+* will return with the timer unstarted.
 */
if (timer->expires == expires)
return 1;

Re: [PATCH 00/31] Andes(nds32) Linux Kernel Port

2017-11-08 Thread Arnd Bergmann

On Wed, Nov 8, 2017 at 6:54 AM, Greentime Hu  wrote:
> This patchset adds core architecture support to Linux for Andestech's
> N13, N15, D15, N10, D10 processor cores.
>
> Based on the 16/32-bit AndeStar RISC-like architecture, we designed the
> configurable AndesCore series of embedded processor families. AndesCores
> range from highly performance-efficient small-footprint cores for
> microcontrollers and deeply-embedded applications to 1GHz+ cores running
> Linux, covering general-purpose N-series cores for a wide range of computing
> need, DSP-capable D-series cores for digital signal control,
> instruction-extensible E-series cores for application-specific acceleration,
> and secure S-series cores for best protection of the most valuable.

I looked at the entire patch series now and commented on anything I noticed
that could be improved, overall this looks very nice, great work!

Most of my comments are about tiny details that are easy to address.

I see two areas that need to be looked at carefully, and that may take a
few more rounds to get right:

- In the user space ABI, you have a couple of things that differ from the
  normal asm-generic definitions, i.e. s few syscall entry points and some
  types in asm/posix-types.h. I guess you did that to remain compatible
  with an older glibc port, but IMHO this compatibility should be broken
  in favor of having the standard ABI before the port gets merged.

- For the boot interface, you need to clearly define what can be expected
  and what cannot. This involves the presence of the l2cc, the physical
  memory address, the built-in dtb, and probably a few more things I
  missed. For long-term maintainability, you probably want to ensure that
  you can build a kernel that runs on as much diverse hardware as possible.

   Arnd

RE: [PATCH 0/9] use network namespace for iSCSI control interfaces

2017-11-08 Thread David Laight

From: Chris Leech
> Sent: 07 November 2017 22:45
> 
> I've posted these changes to allow iSCSI management within a container
> using a network namespace to the SCSI and Open-iSCSI lists, but seeing
> as it's not really SCSI/block related I'm casting a wider net looking
> for reviews.

I didn't spot you acquiring and releasing references to the namespace.
(I might have missed it, the relevant patch is difficult to read).

If the sockets are created in the context of the process whose namespace
you are using you don't need it, but given the hooks and callbacks
I'm not at all sure that is obviously true.

David

Re: Is there a race between __mod_timer() and del_timer()?

2017-11-08 Thread Thomas Gleixner

On Wed, 8 Nov 2017, David Howells wrote:

> Is there a race between the optimisation for networking code in __mod_timer()
> and del_timer() - or, at least, a race that matters?
> 
> Consider:
> 
>   CPU A   CPU B
>   === ===
>   [timer X is active]
>   ==>__mod_timer(X)
>   if (timer_pending(timer))
>   [Take the true path]
>   -- IRQ --   ==>del_timer(X)
>   <==
>   if (timer->expires == expires)
>   [Take the true path]
>   <==return 1
>   [timer X is not active]
> 
> There's no locking to prevent this, but __mod_timer() returns without
> restarting the timer.  I'm not sure this is a problem exactly, however, since
> del_timer() *was* issued, and could've deleted the timer after __mod_timer()
> returned.

Correct, if two CPUs fiddle with the same timer concurrently then there is
no guaranteed outcome.

> A couple of possible alleviations:
> 
>  (1) Recheck timer_pending() before returning from __mod_timer().

That's just adding more instructions into that code path for a dubious
value.

>  (2) Set timer->expires to jiffies in del_timer() - but since there's nothing
>  preventing the optimisation in __mod_timer() from occurring concurrently
>  with del_timer(), this probably won't help.

Right.

> I think it might just be best to put a note in the comments in __mod_timer().

Agreed.

Thanks,

tglx

Re: [PATCH 4/7] scripts/leaking_addresses: add reporting

2017-11-08 Thread Petr Mladek

On Wed 2017-11-08 14:37:36, Tobin C. Harding wrote:
> Currently script just dumps all results found. Potentially, this risks
> loosing single results among multiple duplicate results. We need some
> way of restricting duplicates to assist users of the script. It would
> also be nice if we got a report instead of raw results.
> 
> Duplicates can be defined in various ways, instead of trying to find a
> single perfect solution we can present the user with various options to
> display the output. Doing so will typically lead to users wanting to
> view the output multiple times. Currently we scan the kernel each time,
> this is slow and unnecessary. We can expedite the process by writing the
> results to file for subsequent viewing.
> 
> Add sub-commands `scan` and `format`. Display output as a report instead
> of raw results. Add --raw flag to view raw results. Save results to
> file. For subsequent calls to `format` parse output file instead of
> re-scanning.
> 
> Signed-off-by: Tobin C. Harding 
> ---
>  scripts/leaking_addresses.pl | 201 
> ---
>  1 file changed, 187 insertions(+), 14 deletions(-)
> 
> diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
> index 719ed0aaede7..4c31e935319b 100755
> --- a/scripts/leaking_addresses.pl
> +++ b/scripts/leaking_addresses.pl
> @@ -21,14 +21,19 @@ use File::Spec;
>  use Cwd 'abs_path';
>  use Term::ANSIColor qw(:constants);
>  use Getopt::Long qw(:config no_auto_abbrev);
> +use File::Spec::Functions 'catfile';
>  
>  my $P = $0;
>  my $V = '0.01';
>  
> -# Directories to scan.
> +# Directories to scan (we scan `dmesg` also).
>  my @DIRS = ('/proc', '/sys');
>  
>  # Command line options.
> +my $output = "scan.out";

The hard-coded filename and its use is dangerous. Nobody expects that
this kind of script writes/re-writes a file on the system.

> +my $suppress_dmesg = 0;
> +my $squash_by_path = 0;
> +my $raw = 0;
>  my $help = 0;
>  my $debug = 0;
>  
> @@ -70,21 +75,34 @@ sub help
>   my ($exitcode) = @_;
>  
>   print << "EOM";
> -Usage: $P [OPTIONS]
> +
> +Usage: $P COMMAND [OPTIONS]
>  Version: $V
>  
> +Commands:
> +
> + scanScan the kernel (savesg raw results to file and runs `format`).
> + format  Parse results file and format output.

The later formatting/filtering might be useful but the use
of the hard coded file is strange. Also it is pity that
the script does not do anything useful out of box.

I suggest to remove the commands and do the scan out of box.
It should not store anything on the disk by default.

Then we could define following options:

-o, --output=  Store raw results into file for later formatting.
-i, --input=   Read raw result from file and just format them.

Well, it is still somehow non-intuitive. It might help to
be more explicit:

-o, --output-raw=
-i, --input-raw=


>  Options:
>  
> - -d, --debugDisplay debugging output.
> - -h, --help, --version  Display this help and exit.
> + -o, --output=  Raw results output file, used for later 
> formatting.
> + --suppress-dmesg Do not show dmesg results.
> + --squash-by-path Show one result per unique path.

I would personally add also option for the default mode:

--squash-by-filename Show one result per unique filename
 (default).

In fact, I would personally use --squash-by-path or even --raw by
default. These provide easy to understand output. While the
--squash-by-filename mode has pretty good results but
it is quite non-intuitive.

Best Regards,
Petr

[PATCH net-next] net: dsa: lan9303: Fix lan9303_alr_del_port()

2017-11-08 Thread Egil Hjelmeland

Fix embarrassing bug in lan9303_alr_del_port(): Instead of zeroing
entr->mac_addr, I destroyed the next cache entry. Affected .port_fdb_del and
.port_mdb_del.

Fixes: 0620427ea0d6 ("net: dsa: lan9303: Add fdb/mdb manipulation")
Signed-off-by: Egil Hjelmeland 
---
 drivers/net/dsa/lan9303-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 320651a57c6f..c0910aebc037 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -702,7 +702,7 @@ static int lan9303_alr_del_port(struct lan9303 *chip, const 
u8 *mac, int port)
 
entr->port_map &= ~BIT(port);
if (entr->port_map == 0) /* zero means its free again */
-   eth_zero_addr(&entr->port_map);
+   eth_zero_addr(entr->mac_addr);
lan9303_alr_set_entry(chip, mac, entr->port_map, entr->stp_override);
 
return 0;
-- 
2.11.0

Re: [PATCH] net/tcp: track all ipv4/tcp state transition in tcp_set_state

2017-11-08 Thread Yafang Shao

2017-11-08 14:51 GMT+08:00 David Miller :
> From: Yafang Shao 
> Date: Tue,  7 Nov 2017 18:36:28 +0800
>
>> When I hooked the function tcp_set_state with kprobe to track the ipv4/tcp
>> state transistion, I found state transition from TCP_LISTEN to TCP_SYN_RECV
>> is missed.
>>
>> I think it is better to use the helper to do state transition instead of
>> assigning the state to sk_state directly.
>> Then we can monitor the whole tcp lifespans with kprobe or ftrace easily.
>>
>> Signed-off-by: Yafang Shao 
>
> This is really heavy handed and excessive for these cases.
>
> They don't have to handle any of the issues dealt with in
> tcp_set_state().
>
> I would prefer if you made a special helper to net/tcp.h which did:
>
> static inline void __tcp_set_state(struct sock *sk, int state)
> {
> trace_tcp_set_state(sk, sk->sk_state, state);
> sk->sk_state = state;
> }

Good idea!
I will try to implement it.

Thanks
Yafang

[PATCH net-next] net: dsa: lan9303: Documentation: Add missing word "Mbps"

2017-11-08 Thread Egil Hjelmeland

Signed-off-by: Egil Hjelmeland 
---
 Documentation/networking/dsa/lan9303.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/dsa/lan9303.txt 
b/Documentation/networking/dsa/lan9303.txt
index ec28683d107d..144b02b95207 100644
--- a/Documentation/networking/dsa/lan9303.txt
+++ b/Documentation/networking/dsa/lan9303.txt
@@ -1,9 +1,9 @@
 LAN9303 Ethernet switch driver
 ==
 
-The LAN9303 is a three port 10/100 ethernet switch with integrated phys for the
-two external ethernet ports. The third port is an RMII/MII interface to a host
-master network interface (e.g. fixed link).
+The LAN9303 is a three port 10/100 Mbps ethernet switch with integrated phys 
for
+the two external ethernet ports. The third port is an RMII/MII interface to a
+host master network interface (e.g. fixed link).
 
 
 Driver details
-- 
2.11.0

Re: [kernel-hardening] Re: [PATCH resend 2/2] userns: control capabilities of some user namespaces

2017-11-08 Thread महेश बंडेवार

Sorry folks I was traveling and seems like lot happened on this thread. :p

I will try to response few of these comments selectively -

> The thing that makes me hesitate with this set is that it is a
> permanent new feature to address what (I hope) is a temporary
> problem.
I agree this is permanent new feature but it's not solving a temporary
problem. It's impossible to assess what and when new vulnerability
that could show up. I think Daniel summed it up appropriately in his
response

> Seems like there are two naive ways to do it, the first being to just
> look at all code under ns_capable() plus code called from there.  It
> seems like looking at the result of that could be fruitful.
This is really hard. The main issue that there were features designed
and developed before user-ns days with an assumption that unprivileged
users will never get certain capabilities which only root user gets.
Now that is not true anymore with user-ns creation with mapping root
for any process. Also at the same time blocking user-ns creation for
eveyone is a big-hammer which is not needed too. So it's not that easy
to just perform a code-walk-though and correct those decisions now.

> It seems to me that the existing control in
> /proc/sys/kernel/unprivileged_userns_clone might be the better duct tape
> in that case.
This solution is essentially blocking unprivileged users from using
the user-namespaces entirely. This is not really a solution that can
work. The solution that this patch-set adds allows unprivileged users
to create user-namespaces. Actually the proposed solution is more
fine-grained approach than the unprivileged_userns_clone solution
since you can selectively block capabilities rather than completely
blocking the functionality.

> I meant each task has a perm_cap_bset next to the cap_bset.  So task
> p1 (if it has privilege) can drop CAP_SYS_ADMIN from perm_cap_bset,
> p2 (if it has privilege) can drop CAP_NET_ADMIN.  When p1 creates a
> new user_ns, that init task has its cap_bset set to all caps but
> CAP_SYS_ADMIN.
>
> I think for simplicity perm_cap_bset would *only* affect the filling
> of cap_bset at user namespace creation.  So if you wanted to drop a
> capability from your own cap_bset as well, you'd have to do that
> separately.
My original intention is to reduce the attack surface when
vulnerabilities are discovered / published, but I don't see how this
is solving that issue. Also the reason to have sysctl is to have
simplistic control across the board to contain the situation. If that
is not addressed then we might need some other solution on top of
this.

Re: [PATCH net-next V2 3/3] tun: add eBPF based queue selection method

2017-11-08 Thread Jason Wang




On 2017年11月08日 14:43, Michael S. Tsirkin wrote:

On Wed, Nov 08, 2017 at 02:28:53PM +0900, Jason Wang wrote:


On 2017年11月04日 08:56, Willem de Bruijn wrote:

On Fri, Nov 3, 2017 at 5:56 PM, Willem de Bruijn
 wrote:

On Tue, Oct 31, 2017 at 7:32 PM, Jason Wang  wrote:

This patch introduces an eBPF based queue selection method based on
the flow steering policy ops. Userspace could load an eBPF program
through TUNSETSTEERINGEBPF. This gives much more flexibility compare
to simple but hard coded policy in kernel.

Signed-off-by: Jason Wang 
---
+static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+{
+   struct bpf_prog *prog;
+   u32 fd;
+
+   if (copy_from_user(&fd, data, sizeof(fd)))
+   return -EFAULT;
+
+   prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);

If the idea is to allow guests to pass BPF programs down to the host,
you may want to define a new program type that is more restrictive than
socket filter.

The external functions allowed for socket filters (sk_filter_func_proto)
are relatively few (compared to, say, clsact), but may still leak host
information to a guest. More importantly, guest security considerations
limits how we can extend socket filters later.

Unless the idea is for the hypervisor to prepared the BPF based on a
limited set of well defined modes that the guest can configure. Then
socket filters are fine, as the BPF is prepared by a regular host process.

Yes, I think the idea is to let qemu to build a BPF program now.

Passing eBPF program from guest to host is interesting, but an obvious issue
is how to deal with the accessing of map.

Thanks

Fundamentally, I suspect the way to solve it is to allow
the program to specify "should be offloaded to host".

And then it would access the host map rather than the guest map.


This looks a big extension.



Then add some control path API for guest to poke at the host map.


Actually, as Willem said, we can even forbid using map through a type, 
but this will lose lots of flexibility.




It's not that there's anything special about the host map -
it's just separate from the guest - so if we wanted to
do something that can work on bare-metal we could -
just do something like a namespace and put all host
maps there. But I'm not sure it's worth the complexity.

Cc Aaron who wanted to look at this.



Maybe the first step is to let classic BPF to be passed from guest and 
consider eBPF on top.


Thanks

Re: [PATCH net-next 1/4] net: phy: sfp: Do not reject soldered down modules

2017-11-08 Thread Russell King - ARM Linux

On Tue, Nov 07, 2017 at 07:49:08PM -0800, Florian Fainelli wrote:
> The SFP module identification code in sfp_sm_mod_probe() will reject SFF
> modules soldered down because they have an identified of 0x2, while the code
> currently checks for 0x3 only (SFP_PHYS_ID_SFP), update that.
> 
> Signed-off-by: Florian Fainelli 
> ---
>  drivers/net/phy/sfp.c | 5 +++--
>  include/linux/sfp.h   | 1 +
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
> index e381811e5f11..942288aa9cdb 100644
> --- a/drivers/net/phy/sfp.c
> +++ b/drivers/net/phy/sfp.c
> @@ -463,8 +463,9 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
>vendor, part, rev, sn, date);
>  
>   /* We only support SFP modules, not the legacy GBIC modules. */
> - if (sfp->id.base.phys_id != SFP_PHYS_ID_SFP ||
> - sfp->id.base.phys_ext_id != SFP_PHYS_EXT_ID_SFP) {
> + if ((sfp->id.base.phys_id != SFP_PHYS_ID_SFP &&
> +  sfp->id.base.phys_id != SFP_PHYS_ID_SFF) ||
> +  sfp->id.base.phys_ext_id != SFP_PHYS_EXT_ID_SFP) {

I'd prefer that we do something like the patch I sent a couple of nights
ago, having a separate compatible for the SFF modules (since they have
no insert signal as SFF is soldered in place) and use that to decide
which phys_id we accept here.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up

Re: regression: UFO removal breaks kvm live migration

2017-11-08 Thread David Miller

From: Jason Wang 
Date: Wed, 8 Nov 2017 17:25:48 +0900

> On 2017年11月08日 17:08, Willem de Bruijn wrote:
>> That won't help in the short term. I'm still reading up to see if
>> there are
>> any other options besides reimplement or advertise-but-drop, such as
>> an implicit trigger that would make the guest renegotiate. It's
>> unlikely, but
>> worth a look..
> 
> Yes, this looks hard. And even if we can manage to do this, it looks
> an overkill since it will impact all guest after migration.

Like Willem I would much prefer "advertise-but-drop" if it works.

In the long term feature renegotiation triggers are a must.

There is no way for us to remove features otherwise.  In my opinion
this will even make migrations more powerful.

Fwd: [PATCH net-next v6 3/3] act_vlan: VLAN action rewrite to use RCU lock/unlock and update

2017-11-08 Thread Manish Kurup

Hi Dave,

On Tue, Nov 7, 2017 at 7:07 PM, David Miller  wrote:
>
> From: Alexander Duyck 
> Date: Tue, 7 Nov 2017 08:54:20 -0800
>
> > Are we really going to be so strict about the reverse xmas-tree that
> > we won't allow for assignment w/ variable declaration because the
> > dependency order won't fit into that format?
>
> Yes.
>
> > Last I knew this kind of setup was an exception to the reverse
> > xmas-tree layout requirement because in this case 'p' relies on 'v' so
> > we can't reorder these without having to kick the assignment of 'p'
> > off onto a line by itself.
>
> Please just declare the variable naked without the assignment and do
> the assignment down in the code.

I have a changeset that I had made to incorporate the reverse xmas
tree, doing the very thing you talk about, above.
The only reason I didnt not send it out because it made more than
minimal changes, especially how the 'opt' struct is defined.

I will make the changes and send the review around once more.

Thanks,

Re: [PATCH 5/7] scripts/leaking_addresses: add emailing results

2017-11-08 Thread Greg KH

On Wed, Nov 08, 2017 at 11:16:43AM +0100, Petr Mladek wrote:
> On Wed 2017-11-08 14:37:37, Tobin C. Harding wrote:
> > Developers may not have the time (or inclination) to investigate script
> > output. This information is, however, useful. If we add functionality to
> > the script to email results for further investigation.
> > 
> > Add --send-report flag to email scan results (to Tobin C. Harding).
> 
> I am not sure that it is wise to make spaming one person
> so easy ;-)

I agree, I would strongly discourage this, as you will end up getting
reports from really old kernels for the next 20+ years.  We have seen
that happen for every time we have added a "report this to foo@baz" in a
kernel log message.

If you _really_ want to do this, at least point it at a mailing list.

thanks,

greg k-h

[PATCH net] net: hns3: Updates MSI/MSI-X alloc/free APIs(depricated) to new APIs

2017-11-08 Thread Salil Mehta

This patch migrates the HNS3 driver code from use of depricated PCI
MSI/MSI-X interrupt vector allocation/free APIs to new common APIs.

Signed-off-by: Salil Mehta 
Suggested-by: Christoph Hellwig 
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c| 108 +++--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h|  15 ++-
 2 files changed, 42 insertions(+), 81 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c 
b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c1cdbfd..09fa068 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -885,14 +885,14 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
if (hnae3_dev_roce_supported(hdev)) {
-   hdev->num_roce_msix =
+   hdev->num_roce_msi =
hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
   HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
/* PF should have NIC vectors and Roce vectors,
 * NIC vectors are queued before Roce vectors.
 */
-   hdev->num_msi = hdev->num_roce_msix  + HCLGE_ROCE_VECTOR_OFFSET;
+   hdev->num_msi = hdev->num_roce_msi  + HCLGE_ROCE_VECTOR_OFFSET;
} else {
hdev->num_msi =
hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
@@ -1835,7 +1835,7 @@ static int hclge_init_roce_base_info(struct hclge_vport 
*vport)
struct hnae3_handle *roce = &vport->roce;
struct hnae3_handle *nic = &vport->nic;
 
-   roce->rinfo.num_vectors = vport->back->num_roce_msix;
+   roce->rinfo.num_vectors = vport->back->num_roce_msi;
 
if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors ||
vport->back->num_msi_left == 0)
@@ -1853,67 +1853,46 @@ static int hclge_init_roce_base_info(struct hclge_vport 
*vport)
return 0;
 }
 
-static int hclge_init_msix(struct hclge_dev *hdev)
+static int hclge_init_msi(struct hclge_dev *hdev)
 {
struct pci_dev *pdev = hdev->pdev;
-   int ret, i;
-
-   hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi,
- sizeof(struct msix_entry),
- GFP_KERNEL);
-   if (!hdev->msix_entries)
-   return -ENOMEM;
-
-   hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
-  sizeof(u16), GFP_KERNEL);
-   if (!hdev->vector_status)
-   return -ENOMEM;
+   int vectors;
+   int i;
 
-   for (i = 0; i < hdev->num_msi; i++) {
-   hdev->msix_entries[i].entry = i;
-   hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+   vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+   PCI_IRQ_MSI | PCI_IRQ_MSIX);
+   if (vectors < 0) {
+   dev_err(&pdev->dev, "failed to allocate MSI/MSI-X vectors %d\n",
+   vectors);
+   return vectors;
}
+   if (vectors < hdev->num_msi)
+   dev_warn(&hdev->pdev->dev,
+"could not alloc(=%d) all requested(=%d) MSI/MSI-X\n",
+hdev->num_msi, vectors);
 
-   hdev->num_msi_left = hdev->num_msi;
-   hdev->base_msi_vector = hdev->pdev->irq;
+   hdev->num_msi = vectors;
+   hdev->num_msi_left = vectors;
+   hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = hdev->base_msi_vector +
-   HCLGE_ROCE_VECTOR_OFFSET;
-
-   ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries,
-   hdev->num_msi, hdev->num_msi);
-   if (ret < 0) {
-   dev_info(&hdev->pdev->dev,
-"MSI-X vector alloc failed: %d\n", ret);
-   return ret;
-   }
-
-   return 0;
-}
-
-static int hclge_init_msi(struct hclge_dev *hdev)
-{
-   struct pci_dev *pdev = hdev->pdev;
-   int vectors;
-   int i;
+HCLGE_ROCE_VECTOR_OFFSET;
 
hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
   sizeof(u16), GFP_KERNEL);
-   if (!hdev->vector_status)
+   if (!hdev->vector_status) {
+   pci_free_irq_vectors(pdev);
return -ENOMEM;
+   }
 
for (i = 0; i < hdev->num_msi; i++)
hdev->vector_status[i] = HCLGE_INVALID_VPORT;
 
-   vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI);
-   if (vectors < 0) {
-   dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors);
-   return -EINVAL;
+   hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi,
+

[PATCH net-next v10 1/3] act_vlan: Change stats update to use per-core stats

2017-11-08 Thread Manish Kurup

The VLAN action maintains one set of stats across all cores, and uses a
spinlock to synchronize updates to it from the same. Changed this to use a
per-CPU stats context instead.
This change will result in better performance.

Acked-by: Jamal Hadi Salim 
Acked-by: Jiri Pirko 
Signed-off-by: Manish Kurup 
---
 net/sched/act_vlan.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 115fc33..8a35efe 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,9 +30,10 @@ static int tcf_vlan(struct sk_buff *skb, const struct 
tc_action *a,
int err;
u16 tci;
 
-   spin_lock(&v->tcf_lock);
tcf_lastuse_update(&v->tcf_tm);
-   bstats_update(&v->tcf_bstats, skb);
+   bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
+
+   spin_lock(&v->tcf_lock);
action = v->tcf_action;
 
/* Ensure 'data' points at mac_header prior calling vlan manipulating
@@ -85,7 +86,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct 
tc_action *a,
 
 drop:
action = TC_ACT_SHOT;
-   v->tcf_qstats.drops++;
+   qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+
 unlock:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
@@ -172,7 +174,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr 
*nla,
 
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
-&act_vlan_ops, bind, false);
+&act_vlan_ops, bind, true);
if (ret)
return ret;
 
-- 
2.7.4

[PATCH net-next v10 0/3] net_sched actions: act_vlan now uses RCU

2017-11-08 Thread Manish Kurup

This commit consists of 3 patches:

patch1 (1/3):
The VLAN action maintains one set of stats across all cores, and uses a
spinlock to synchronize updates to it from the same. Changed this to use a
per-CPU stats context instead.
This change will result in better performance.

patch2 (2/3):
Modified netronome nfp flower action to use VLAN helper functions instead
of accessing/referencing TC act_vlan private structures directly. 

patch3 (3/3):
Using a spinlock in the VLAN action causes performance issues when the VLAN
action is used on multiple cores. Rewrote the VLAN action to use RCU read
locking for reads and updates instead.
All functions now use an RCU dereferenced pointer to access the VLAN action
context. Modified helper functions used by other modules, to use the RCU as
opposed to directly accessing the structure.

As part of this review, there were some changes suggested by reviewers.
I have incorporated all the changes that were requested.

Here're the changes:
v2: Fixed all helper functions to use RCU (rtnl_dereference) - Eric, Jamal
v2: Fixed indentation, extra line nits - Jamal, Jiri
v2: Moved rcu_head to the end of the struct - Jiri
v2: Re-formatted locals to reverse-christmas-tree - Jiri
v2: Removed mismatched spin_lock() - Cong
v2: Removed spin_lock_bh() in tcf_vlan_init, rtnl_dereference() should
suffice - Cong, Jiri
v4: Modified the nfp flower action code to use the VLAN helper functions
instead of referencing the structure directly. Isolated this into a
separate patch - Pieter Jansen
v5: Got rid of the unlikely() for the allocation case - Simon Horman
v6: Had forgotten cleanup functions for RCU alloc, added them - Dave Miller
v7: Re-formatted more locals to reverse-christmas-tree - Pieter V
v8: Reverted reverse-christmas-tree(v7), not required when dependencies
make it difficult to implement - Alexander D
v9: Cover letter subject change - Jamal
v10: Re-formatted locals in v7 back to using reverse xmas tree - Dave M

Reviewed-by: Pieter Jansen van Vuuren 
Acked-by: Jamal Hadi Salim 
Acked-by: Jiri Pirko 
Signed-off-by: Manish Kurup 

Manish Kurup (3):
  act_vlan: Change stats update to use per-core stats
  nfp flower action: Modified to use VLAN helper functions
  act_vlan: VLAN action rewrite to use RCU lock/unlock and update

 drivers/net/ethernet/netronome/nfp/flower/action.c |  5 +-
 include/net/tc_act/tc_vlan.h   | 46 ---
 net/sched/act_vlan.c   | 94 ++
 3 files changed, 101 insertions(+), 44 deletions(-)

-- 
2.7.4

[PATCH net-next v10 2/3] nfp flower action: Modified to use VLAN helper functions

2017-11-08 Thread Manish Kurup

Modified netronome nfp flower action to use VLAN helper functions instead
of accessing/referencing TC act_vlan private structures directly.

Reviewed-by: Pieter Jansen van Vuuren 
Signed-off-by: Manish Kurup 
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c 
b/drivers/net/ethernet/netronome/nfp/flower/action.c
index de64ced..c1c595f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -58,7 +58,6 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
 const struct tc_action *action)
 {
size_t act_size = sizeof(struct nfp_fl_push_vlan);
-   struct tcf_vlan *vlan = to_vlan(action);
u16 tmp_push_vlan_tci;
 
push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN;
@@ -67,8 +66,8 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
 
tmp_push_vlan_tci =
-   FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) |
-   FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) |
+   FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) |
+   FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) |
NFP_FL_PUSH_VLAN_CFI;
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
 }
-- 
2.7.4

[PATCH net-next v10 3/3] act_vlan: VLAN action rewrite to use RCU lock/unlock and update

2017-11-08 Thread Manish Kurup

Using a spinlock in the VLAN action causes performance issues when the VLAN
action is used on multiple cores. Rewrote the VLAN action to use RCU read
locking for reads and updates instead.
All functions now use an RCU dereferenced pointer to access the VLAN action
context. Modified helper functions used by other modules, to use the RCU as
opposed to directly accessing the structure.

Acked-by: Jamal Hadi Salim 
Acked-by: Jiri Pirko 
Signed-off-by: Manish Kurup 
---
 include/net/tc_act/tc_vlan.h | 46 ++-
 net/sched/act_vlan.c | 88 +---
 2 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index c2090df..22ae260 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -13,12 +13,17 @@
 #include 
 #include 
 
+struct tcf_vlan_params {
+   int   tcfv_action;
+   u16   tcfv_push_vid;
+   __be16tcfv_push_proto;
+   u8tcfv_push_prio;
+   struct rcu_head   rcu;
+};
+
 struct tcf_vlan {
struct tc_actioncommon;
-   int tcfv_action;
-   u16 tcfv_push_vid;
-   __be16  tcfv_push_proto;
-   u8  tcfv_push_prio;
+   struct tcf_vlan_params __rcu *vlan_p;
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
@@ -33,22 +38,45 @@ static inline bool is_tcf_vlan(const struct tc_action *a)
 
 static inline u32 tcf_vlan_action(const struct tc_action *a)
 {
-   return to_vlan(a)->tcfv_action;
+   u32 tcfv_action;
+
+   rcu_read_lock();
+   tcfv_action = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_action;
+   rcu_read_unlock();
+
+   return tcfv_action;
 }
 
 static inline u16 tcf_vlan_push_vid(const struct tc_action *a)
 {
-   return to_vlan(a)->tcfv_push_vid;
+   u16 tcfv_push_vid;
+
+   rcu_read_lock();
+   tcfv_push_vid = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_vid;
+   rcu_read_unlock();
+
+   return tcfv_push_vid;
 }
 
 static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
 {
-   return to_vlan(a)->tcfv_push_proto;
+   __be16 tcfv_push_proto;
+
+   rcu_read_lock();
+   tcfv_push_proto = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_proto;
+   rcu_read_unlock();
+
+   return tcfv_push_proto;
 }
 
 static inline u8 tcf_vlan_push_prio(const struct tc_action *a)
 {
-   return to_vlan(a)->tcfv_push_prio;
-}
+   u8 tcfv_push_prio;
 
+   rcu_read_lock();
+   tcfv_push_prio = rcu_dereference(to_vlan(a)->vlan_p)->tcfv_push_prio;
+   rcu_read_unlock();
+
+   return tcfv_push_prio;
+}
 #endif /* __NET_TC_VLAN_H */
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 8a35efe..f9e6b80 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -26,6 +26,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct 
tc_action *a,
struct tcf_result *res)
 {
struct tcf_vlan *v = to_vlan(a);
+   struct tcf_vlan_params *p;
int action;
int err;
u16 tci;
@@ -33,24 +34,27 @@ static int tcf_vlan(struct sk_buff *skb, const struct 
tc_action *a,
tcf_lastuse_update(&v->tcf_tm);
bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb);
 
-   spin_lock(&v->tcf_lock);
-   action = v->tcf_action;
-
/* Ensure 'data' points at mac_header prior calling vlan manipulating
 * functions.
 */
if (skb_at_tc_ingress(skb))
skb_push_rcsum(skb, skb->mac_len);
 
-   switch (v->tcfv_action) {
+   rcu_read_lock();
+
+   action = READ_ONCE(v->tcf_action);
+
+   p = rcu_dereference(v->vlan_p);
+
+   switch (p->tcfv_action) {
case TCA_VLAN_ACT_POP:
err = skb_vlan_pop(skb);
if (err)
goto drop;
break;
case TCA_VLAN_ACT_PUSH:
-   err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
-   (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
+   err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid |
+   (p->tcfv_push_prio << VLAN_PRIO_SHIFT));
if (err)
goto drop;
break;
@@ -69,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct 
tc_action *a,
goto drop;
}
/* replace the vid */
-   tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+   tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
-   if (v->tcfv_push_prio) {
+   if (p->tcfv_push_prio) {
tci &= ~VLAN_PRIO_MASK;
-   tci |= v->tcfv_push_prio << VLAN_PRIO

Re: [kernel-hardening] [PATCH v4] scripts: add leaking_addresses.pl

2017-11-08 Thread Michael Ellerman

"Tobin C. Harding"  writes:
> Currently we are leaking addresses from the kernel to user space. This
> script is an attempt to find some of those leakages. Script parses
> `dmesg` output and /proc and /sys files for hex strings that look like
> kernel addresses.
>
> Only works for 64 bit kernels, the reason being that kernel addresses
> on 64 bit kernels have '' as the leading bit pattern making greping
> possible.

That doesn't work super well on other architectures :D

I don't speak perl but presumably you can check the arch somehow and
customise the regex?

...
> +# Return _all_ non false positive addresses from $line.
> +sub extract_addresses
> +{
> +my ($line) = @_;
> +my $address = '\b(0x)?[[:xdigit:]]{12}\b';

On 64-bit powerpc (ppc64/ppc64le) we'd want:

+my $address = '\b(0x)?[89abcdef]00[[:xdigit:]]{13}\b';


> +# Do not parse these files (absolute path).
> +my @skip_parse_files_abs = ('/proc/kmsg',
> + '/proc/kcore',
> + '/proc/fs/ext4/sdb1/mb_groups',
> + '/proc/1/fd/3',
> + '/sys/kernel/debug/tracing/trace_pipe',
> + '/sys/kernel/security/apparmor/revision');

Can you add:

  /sys/firmware/devicetree

and/or /proc/device-tree (which is a symlink to the above).

We should also start restricting access to that because it may have
potentially interesting physical addresses in it, but that will break
existing tools, so it will need to be opt-in and done over time.

cheers

Re: [PATCH net-next v6 3/3] act_vlan: VLAN action rewrite to use RCU lock/unlock and update

2017-11-08 Thread Manish Kurup

Hi Dave,

On Wed, Nov 8, 2017 at 6:40 AM, Manish Kurup  wrote:
> Hi Dave,
>
> On Tue, Nov 7, 2017 at 7:07 PM, David Miller  wrote:
>>
>> From: Alexander Duyck 
>> Date: Tue, 7 Nov 2017 08:54:20 -0800
>>
>> > Are we really going to be so strict about the reverse xmas-tree that
>> > we won't allow for assignment w/ variable declaration because the
>> > dependency order won't fit into that format?
>>
>> Yes.
>>
>> > Last I knew this kind of setup was an exception to the reverse
>> > xmas-tree layout requirement because in this case 'p' relies on 'v' so
>> > we can't reorder these without having to kick the assignment of 'p'
>> > off onto a line by itself.
>>
>> Please just declare the variable naked without the assignment and do
>> the assignment down in the code.
>
> I have a changeset that I had made to incorporate the reverse xmas tree,
> doing the very thing you talk about, above.
> The only reason I didnt not send it out because it made more than minimal
> changes, especially how the 'opt' struct is defined.
>
> I will make the changes and send the review around once more.
>
> Thanks,
>
I have made the required changes, and sent the review around once more
(v10). Please let me know if this looks OK.

Thanks!

-Manish

Re: [PATCH net-next 1/2] net: add support for Cavium PTP coprocessor

2017-11-08 Thread Aleksey Makarov




On 11/07/2017 10:49 PM, David Daney wrote:

On 11/07/2017 11:07 AM, Aleksey Makarov wrote:

From: Radoslaw Biernacki 

This patch adds support for the Precision Time Protocol
Clocks and Timestamping hardware found on Cavium ThunderX
processors.

Signed-off-by: Radoslaw Biernacki 
Signed-off-by: Aleksey Makarov 
---
  drivers/net/ethernet/cavium/Kconfig |  13 +
  drivers/net/ethernet/cavium/Makefile    |   1 +
  drivers/net/ethernet/cavium/common/Makefile |   1 +
  drivers/net/ethernet/cavium/common/cavium_ptp.c | 353 
  drivers/net/ethernet/cavium/common/cavium_ptp.h |  78 ++
  5 files changed, 446 insertions(+)
  create mode 100644 drivers/net/ethernet/cavium/common/Makefile
  create mode 100644 drivers/net/ethernet/cavium/common/cavium_ptp.c
  create mode 100644 drivers/net/ethernet/cavium/common/cavium_ptp.h


[...]

+
+/* The Cavium PTP can *only* be found in SoCs containing the ThunderX
+ * ARM64 CPU implementation.  All accesses to the device registers on this
+ * platform are implicitly strongly ordered with respect to memory
+ * accesses.


I believe that is not correct.  I/O register accesses are implicitly
ordered with respect to other I/O register accesses.  However, with
respect to memory accesses, no ordering is imposed.  Therefore, one
must be very careful not to introduce subtile memory ordering bugs
with these things when using the unordered versions.


I will fix it in the next version.

Thank you
Aleksey Makarov


+ * So writeq_relaxed() and readq_relaxed() are safe to use with
+ * no memory barriers in this driver.  The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.



Also it should be noted that on production silicon, the performance difference between 
the "relaxed" variant and the normal variant of read*/write* is often 
negligible.



+ */
+
+static u64 cavium_ptp_reg_read(struct cavium_ptp *clock, u64 offset)
+{
+    return readq_relaxed(clock->reg_base + offset);
+}
+
+static void cavium_ptp_reg_write(struct cavium_ptp *clock, u64 offset, u64 val)
+{
+    writeq_relaxed(val, clock->reg_base + offset);
+}
+


Are the PTP register access really so much in the hot path that using the 
relaxed variants can be measured here?  If not, would it make the driver look 
cleaner to remove these and just use readq/writeq calls directly  in the body 
of the driver?

David.

RE: mlx5 broken affinity

2017-11-08 Thread David Laight

From: Sagi Grimberg
> Sent: 08 November 2017 07:28
...
> > Why would you give the user a knob to destroy what you carefully optimized?
> 
> Well, looks like someone relies on this knob, the question is if he is
> doing something better for his workload. I don't know, its really up to
> the user to say.

Maybe the user wants to ensure that nothing except some very specific
processing happens on some (or most) of the cpu cores.

If the expected overall ethernet data rate isn't exceptionally large
is there any reason to allocate a queue (etc) for every cpu.

David

Re: [1/3] rtlwifi: fix uninitialized rtlhal->last_suspend_sec time

2017-11-08 Thread Kalle Valo

Arnd Bergmann  wrote:

> We set rtlhal->last_suspend_sec to an uninitialized stack variable,
> but unfortunately gcc never warned about this, I only found it
> while working on another patch. I opened a gcc bug for this.
> 
> Presumably the value of rtlhal->last_suspend_sec is not all that
> important, but it does get used, so we probably want the
> patch backported to stable kernels.
> 
> Cc: sta...@vger.kernel.org
> Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82839
> Signed-off-by: Arnd Bergmann 
> Acked-by: Larry Finger 

3 patches applied to wireless-drivers-next.git, thanks.

3f2a162fab15 rtlwifi: fix uninitialized rtlhal->last_suspend_sec time
3c92d5517af8 rtlwifi: use ktime_get_real_seconds() for suspend time
ac978dc79a91 rtlwifi: drop unused ppsc->last_wakeup_time

-- 
https://patchwork.kernel.org/patch/10043505/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: rtlwifi: remove redundant pointer tid_data

2017-11-08 Thread Kalle Valo

Colin Ian King  wrote:

> From: Colin Ian King 
> 
> tid_data is assigned but never read, hence it is redundant
> and can be removed. Cleans up clang warning:
> 
> drivers/net/wireless/realtek/rtlwifi/base.c:1581:2: warning: Value
> stored to 'tid_data' is never read
> 
> Signed-off-by: Colin Ian King 

Patch applied to wireless-drivers-next.git, thanks.

82e730e521ce rtlwifi: remove redundant pointer tid_data

-- 
https://patchwork.kernel.org/patch/10040293/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: rtlwifi: remove redundant initialization to cfg_cmd

2017-11-08 Thread Kalle Valo

Colin Ian King  wrote:

> From: Colin Ian King 
> 
> cfg_cmd is initialized to zero and this value is never read, instead
> it is over-written in the start of a do-while loop. Remove the
> redundant initialization. Cleans up clang warning:
> 
> drivers/net/wireless/realtek/rtlwifi/core.c:1750:22: warning: Value
> stored to 'cfg_cmd' during its initialization is never read
> 
> Signed-off-by: Colin Ian King 
> Acked-by: Larry Finger 

Patch applied to wireless-drivers-next.git, thanks.

f80ead1cd5fa rtlwifi: remove redundant initialization to cfg_cmd

-- 
https://patchwork.kernel.org/patch/10041685/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: iwlegacy: remove redundant pointer sta_priv

2017-11-08 Thread Kalle Valo

Colin Ian King  wrote:

> From: Colin Ian King 
> 
> Pointer sta_priv is assigned but never read, hence it is redundant
> and can be removed. Cleans up clang warning:
> 
> drivers/net/wireless/intel/iwlegacy/4965-rs.c:2163:2: warning: Value
> stored to 'sta_priv' is never read
> 
> Signed-off-by: Colin Ian King 

Patch applied to wireless-drivers-next.git, thanks.

6c6e25311312 iwlegacy: remove redundant pointer sta_priv

-- 
https://patchwork.kernel.org/patch/10036047/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: orinoco_usb: remove redundant pointer dev

2017-11-08 Thread Kalle Valo

Colin Ian King  wrote:

> From: Colin Ian King 
> 
> The pointer dev is assigned but never read, hence it is redundant
> and can be removed. Cleans up clang warning:
> 
> drivers/net/wireless/intersil/orinoco/orinoco_usb.c:1468:2: warning:
> Value stored to 'dev' is never read
> 
> Signed-off-by: Colin Ian King 

Patch applied to wireless-drivers-next.git, thanks.

9b741b2a3148 orinoco_usb: remove redundant pointer dev

-- 
https://patchwork.kernel.org/patch/10040113/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: regression: UFO removal breaks kvm live migration

2017-11-08 Thread Jason Wang

On 2017年11月08日 20:32, David Miller wrote:

From: Jason Wang 
Date: Wed, 8 Nov 2017 17:25:48 +0900

On 2017年11月08日 17:08, Willem de Bruijn wrote:

That won't help in the short term. I'm still reading up to see if
there are
any other options besides reimplement or advertise-but-drop, such as
an implicit trigger that would make the guest renegotiate. It's
unlikely, but
worth a look..

Yes, this looks hard. And even if we can manage to do this, it looks
an overkill since it will impact all guest after migration.

Like Willem I would much prefer "advertise-but-drop" if it works.

This makes migration work but all guest UFO traffic will stall.

In the long term feature renegotiation triggers are a must.

There is no way for us to remove features otherwise.

We can remove if we don't break userspace(guest).

In my opinion
this will even make migrations more powerful.

But this does not help for guest running old version of kernel which  
still think UFO work.

Thanks

Re: zd1201: remove unused variable framelen

2017-11-08 Thread Kalle Valo

Colin Ian King  wrote:

> From: Colin Ian King 
> 
> Variable framelen is assigned but never read, hence it is redundant
> and can be removed. Cleans up clang warning:
> 
> drivers/net/wireless/zydas/zd1201.c:234:3: warning: Value stored
> to 'framelen' is never read
> 
> Signed-off-by: Colin Ian King 

Patch applied to wireless-drivers-next.git, thanks.

03e40f1e7680 zd1201: remove unused variable framelen

-- 
https://patchwork.kernel.org/patch/10047145/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

Re: regression: UFO removal breaks kvm live migration

2017-11-08 Thread David Miller

From: Jason Wang 
Date: Wed, 8 Nov 2017 21:53:27 +0900

> On 2017年11月08日 20:32, David Miller wrote:
>> From: Jason Wang 
>> Date: Wed, 8 Nov 2017 17:25:48 +0900
>>
>>> On 2017年11月08日 17:08, Willem de Bruijn wrote:
 That won't help in the short term. I'm still reading up to see if
 there are
 any other options besides reimplement or advertise-but-drop, such as
 an implicit trigger that would make the guest renegotiate. It's
 unlikely, but
 worth a look..
>>> Yes, this looks hard. And even if we can manage to do this, it looks
>>> an overkill since it will impact all guest after migration.
>> Like Willem I would much prefer "advertise-but-drop" if it works.
> 
> This makes migration work but all guest UFO traffic will stall.

The idea is that the sender will resend and it will be smaller and
thus non-UFO.

Re: KASAN: use-after-free Read in worker_thread (2)

2017-11-08 Thread Dmitry Vyukov

On Wed, Nov 8, 2017 at 1:58 PM, syzbot

wrote:
> Hello,
>
> syzkaller hit the following crash on
> 7dfaa7bc99498da1c6c4a48bee8d2d5265161a8c
> git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/master
> compiler: gcc (GCC) 7.1.1 20170620
> .config is attached
> Raw console output is attached.
>
> Unfortunately, I don't have any reproducer for this bug yet.
>


I guess this is more about kcmsock.c rather than workqueue.c. +kcm maintainers.


> ==
> BUG: KASAN: use-after-free in worker_thread+0x15bb/0x1990
> kernel/workqueue.c:2245
> Read of size 8 at addr 8801c3a74110 by task kworker/u4:6/3515
>
> CPU: 1 PID: 3515 Comm: kworker/u4:6 Not tainted 4.14.0-rc7+ #112
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x194/0x257 lib/dump_stack.c:53
>  print_address_description+0x73/0x250 mm/kasan/report.c:252
>  kasan_report_error mm/kasan/report.c:351 [inline]
>  kasan_report+0x25b/0x340 mm/kasan/report.c:409
>  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
>  worker_thread+0x15bb/0x1990 kernel/workqueue.c:2245
>  kthread+0x35e/0x430 kernel/kthread.c:231
>  ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432
>
> Allocated by task 31482:
>  save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>  set_track mm/kasan/kasan.c:459 [inline]
>  kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
>  kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
>  kmem_cache_alloc+0x12e/0x760 mm/slab.c:3562
>  kmem_cache_zalloc include/linux/slab.h:657 [inline]
>  kcm_attach net/kcm/kcmsock.c:1394 [inline]
>  kcm_attach_ioctl net/kcm/kcmsock.c:1460 [inline]
>  kcm_ioctl+0x2d1/0x1610 net/kcm/kcmsock.c:1695
>  sock_do_ioctl+0x65/0xb0 net/socket.c:961
>  sock_ioctl+0x2c2/0x440 net/socket.c:1058
>  vfs_ioctl fs/ioctl.c:46 [inline]
>  do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
>  SYSC_ioctl fs/ioctl.c:701 [inline]
>  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
>  entry_SYSCALL_64_fastpath+0x1f/0xbe
>
> Freed by task 1249:
>  save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>  set_track mm/kasan/kasan.c:459 [inline]
>  kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
>  __cache_free mm/slab.c:3504 [inline]
>  kmem_cache_free+0x77/0x280 mm/slab.c:3764
>  unreserve_psock+0x5a1/0x780 net/kcm/kcmsock.c:547
>  kcm_write_msgs+0xbae/0x1b80 net/kcm/kcmsock.c:590
>  kcm_tx_work+0x2e/0x190 net/kcm/kcmsock.c:731
>  process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113
>  worker_thread+0x223/0x1990 kernel/workqueue.c:2247
>  kthread+0x35e/0x430 kernel/kthread.c:231
>  ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432
>
> The buggy address belongs to the object at 8801c3a74040
>  which belongs to the cache kcm_psock_cache of size 552
> The buggy address is located 208 bytes inside of
>  552-byte region [8801c3a74040, 8801c3a74268)
> The buggy address belongs to the page:
> page:ea00070e9d00 count:1 mapcount:0 mapping:8801c3a74040 index:0x0
> compound_mapcount: 0
> flags: 0x2fffc008100(slab|head)
> raw: 02fffc008100 8801c3a74040  0001000b
> raw: ea00067920a0 8801d3f39948 8801d3f2a840 
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
>  8801c3a74000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
>  8801c3a74080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>>
>> 8801c3a74100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>
>  ^
>  8801c3a74180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>  8801c3a74200: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc
> ==
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkal...@googlegroups.com.
> Please credit me with: Reported-by: syzbot 
>
> syzbot will keep track of this bug report.
> Once a fix for this bug is committed, please reply to this email with:
> #syz fix: exact-commit-title
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscr...@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/001a114a7bc08e95e7055d783ea5%40google.com.
> Fo

Re: [PATCH net-next 2/3] ip6_gre: Refactor ip6gre xmit codes

2017-11-08 Thread William Tu

On Tue, Nov 7, 2017 at 6:10 PM, David Miller  wrote:
> From: William Tu 
> Date: Sun,  5 Nov 2017 10:39:04 -0800
>
>> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
>> index 3e10c51e7e0c..8c7612f32926 100644
>> --- a/net/ipv6/ip6_gre.c
>> +++ b/net/ipv6/ip6_gre.c
>> @@ -497,6 +497,79 @@ static int gre_handle_offloads(struct sk_buff *skb, 
>> bool csum)
>>   csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
>>  }
>>
>> +static inline void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
>> + struct net_device *dev,
>> + struct flowi6 *fl6, __u8 *dsfield,
>> + int *encap_limit)
>
> Please do not use 'inline' in foo.c files, let the compiler decide.
>
> Thank you.

Thanks. I will remove it and submit v2.
William

Re: [PATCH 30/31] dt-bindings: nds32 CPU Bindings

2017-11-08 Thread Rob Herring

Please Cc the DT list on bindings.

On Tue, Nov 7, 2017 at 11:55 PM, Greentime Hu  wrote:
> From: Greentime Hu 

Commit message needed.

> Signed-off-by: Vincent Chen 
> Signed-off-by: Rick Chen 
> Signed-off-by: Zong Li 
> Signed-off-by: Greentime Hu 
> ---
>  Documentation/devicetree/bindings/nds32/cpus.txt |   33 
> ++
>  1 file changed, 33 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/nds32/cpus.txt
>
> diff --git a/Documentation/devicetree/bindings/nds32/cpus.txt 
> b/Documentation/devicetree/bindings/nds32/cpus.txt
> new file mode 100644
> index 000..97394cb
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/nds32/cpus.txt
> @@ -0,0 +1,33 @@
> +* Andestech Processor Binding
> +
> +This binding specifies what properties must be available in the device tree
> +representation of a Andestech Processor Core, which is the root node in the
> +tree.
> +
> +Required properties:
> +
> +   - compatible:
> +   Usage: required
> +   Value type: 
> +   Definition: should be one of:
> +   "andestech,n13"
> +   "andestech,n15"
> +   "andestech,d15"
> +   "andestech,n10"
> +   "andestech,d10"

SMP supported for any of these?

> +
> +- device_type
> +   Usage: required
> +   Value type: 
> +   Definition: must be "cpu"
> +
> +* Examples
> +
> +/ {
> +   cpus {
> +   cpu@0 {

Needs a reg property or drop the unit address.

> +   device_type = "cpu";
> +   compatible = "andestech,n13", "andestech,n15";

n13 is a superset of n15?

> +   };
> +   };
> +};
> --
> 1.7.9.5
>

Re: syzbot:dup: general protection fault in __lock_acquire (2)

2017-11-08 Thread Dmitry Vyukov

On Sun, Nov 5, 2017 at 3:34 PM, Jon Maloy  wrote:
> The problem was already known, but the solution is non-trivial, and needs 
> some more review and testing before I can submit it.
>
> ///Jon Maloy

Hi Jon,

Thank you very much for actually bothering to reply with the dup
command. But commands must be in email body, i.e.:

#syz dup: general protection fault in __lock_acquire (2)

I've updated the email template to clarify that it's meant to be email
body to avoid confusion in future:
https://github.com/google/syzkaller/commit/9547ae3a85db67e4d3abe9ee7782a41b782a7906

Please reply to the "general protection fault in __lock_acquire (2)"
report with the fix command once there is a fixing commit for this.
This will allow syzbot to understand when the commit reaches all of
its trees and report similarly looking bugs in future.

Thanks



>> -Original Message-
>> From: syzbot
>> [mailto:bot+0cea668556ca5b811dc9725d82edbd87fea4defb@syzkaller.appsp
>> otmail.com]
>> Sent: Sunday, November 05, 2017 09:42
>> To: da...@davemloft.net; Jon Maloy ; linux-
>> ker...@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
>> b...@googlegroups.com; tipc-discuss...@lists.sourceforge.net; Ying Xue
>> 
>> Subject: general protection fault in perf_trace_lock_acquire
>>
>> Hello,
>>
>> syzkaller hit the following crash on
>> 5a3517e009e979f21977d362212b7729c5165d92
>> git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/master
>> compiler: gcc (GCC) 7.1.1 20170620
>> .config is attached
>> Raw console output is attached.
>>
>>
>>
>>
>> R10: 20ed7fe4 R11: 0212 R12: 004b7550
>> R13: 7f1a861ffb58 R14: 004b7560 R15: 
>> Subscriber rejected, no memory
>> kasan: CONFIG_KASAN_INLINE enabled
>> kasan: GPF could be caused by NULL-ptr deref or user memory access
>> general protection fault:  [#1] SMP KASAN Dumping ftrace buffer:
>> (ftrace buffer empty)
>> Modules linked in:
>> CPU: 1 PID: 518 Comm: syz-executor4 Not tainted 4.14.0-rc7-next-20171103+
>> #38
>> Hardware name: Google Google Compute Engine/Google Compute Engine,
>> BIOS Google 01/01/2011
>> task: 8801c558a180 task.stack: 8801d965
>> RIP: 0010:perf_trace_lock_acquire+0xc0/0x980
>> include/trace/events/lock.h:13
>> RSP: 0018:8801d9657668 EFLAGS: 00010002
>> RAX: 0007 RBX: 11003b2caed7 RCX: 
>> RDX: dc00 RSI: 0020 RDI: 85f24de0
>> RBP: 8801d9657840 R08:  R09: 0020
>> R10: dc00 R11: 8154c3e0 R12: 8801d9657818
>> R13:  R14: 85f24de0 R15: 0001
>> FS:  7f1a86200700() GS:8801db30()
>> knlGS:
>> CS:  0010 DS:  ES:  CR0: 80050033
>> CR2: 004d4a84 CR3: 0001ce90c000 CR4: 001406e0
>> DR0: 2000 DR1:  DR2: 
>> DR3:  DR6: 0ff0 DR7: 0600 Call
>> Trace:
>>   trace_lock_acquire include/trace/events/lock.h:13 [inline]
>>   lock_acquire+0x394/0x580 kernel/locking/lockdep.c:4003
>>   __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
>>   _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:174
>>   spin_lock_bh include/linux/spinlock.h:320 [inline]
>>   tipc_subscrb_subscrp_delete+0x8f/0x480 net/tipc/subscr.c:201
>>   tipc_subscrb_delete net/tipc/subscr.c:238 [inline]
>>   tipc_subscrb_release_cb+0x17/0x30 net/tipc/subscr.c:316
>>   tipc_close_conn+0x171/0x270 net/tipc/server.c:204
>>   tipc_topsrv_kern_subscr+0x724/0x810 net/tipc/server.c:514
>>   tipc_group_create+0x702/0x9c0 net/tipc/group.c:184
>>   tipc_sk_join net/tipc/socket.c:2747 [inline]
>>   tipc_setsockopt+0x249/0xc10 net/tipc/socket.c:2861
>>   SYSC_setsockopt net/socket.c:1851 [inline]
>>   SyS_setsockopt+0x189/0x360 net/socket.c:1830
>>   entry_SYSCALL_64_fastpath+0x1f/0xbe
>> RIP: 0033:0x452869
>> RSP: 002b:7f1a861ffbe8 EFLAGS: 0212 ORIG_RAX: 0036
>> RAX: ffda RBX: 007580d8 RCX: 00452869
>> RDX: 0087 RSI: 010f RDI: 0014
>> RBP: 0086 R08: 001c R09: 
>> R10: 20ed7fe4 R11: 0212 R12: 004b7550
>> R13: 7f1a861ffb58 R14: 004b7560 R15: 
>> Code: c7 40 1c 00 f2 f2 f2 c7 40 20 f2 f2 f2 f2 c7 40 24 00 f2 f2 f2 c7 40
>> 28 f3 f3 f3 f3 48 8d 46 18 48 89 85 70 fe ff ff 48 c1 e8 03 <80> 3c 10 00 0f 
>> 85 da
>> 04 00 00 49 8b 79 18 48 85 ff 0f 84 62 04
>> RIP: perf_trace_lock_acquire+0xc0/0x980 include/trace/events/lock.h:13
>> RSP:
>> 8801d9657668
>> ---[ end trace 2fd434e3de3d34c0 ]---
>>
>>
>> ---
>> This bug is generated by a dumb bot. It may contain errors.
>> See https://goo.gl/tpsmEJ for details.
>> Direct all questions to syzkal...@googlegroups.com.
>> Please credit me with: Reported-by: syzbot 
>>
>> syzbot will keep track of this bug re

[PATCH] net: realtek: r8169: remove redundant assignment to giga_ctrl

2017-11-08 Thread Colin King

From: Colin Ian King 

The variable giga_ctrl is being assigned to zero however this is
never read and hence the assignment is redundant, so remove it.
Cleans up clang warning:

drivers/net/ethernet/realtek/r8169.c:1978:3: warning: Value stored
to 'giga_ctrl' is never read

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/realtek/r8169.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index fd218fd9ef3c..dcb8c39382e7 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1982,8 +1982,6 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
rtl_writephy(tp, MII_ADVERTISE, auto_nego);
rtl_writephy(tp, MII_CTRL1000, giga_ctrl);
} else {
-   giga_ctrl = 0;
-
if (speed == SPEED_10)
bmcr = 0;
else if (speed == SPEED_100)
-- 
2.14.1

Re: [PATCH 27/31] dt-bindings: interrupt-controller: Andestech Internal Vector Interrupt Controller

2017-11-08 Thread Rob Herring

+DT list

On Tue, Nov 7, 2017 at 11:55 PM, Greentime Hu  wrote:
> From: Greentime Hu 

Commit msg needed.

> Signed-off-by: Rick Chen 
> Signed-off-by: Greentime Hu 
> ---
>  .../interrupt-controller/andestech,ativic32.txt|   27 
> 
>  1 file changed, 27 insertions(+)
>  create mode 100644 
> Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
>
> diff --git 
> a/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
>  
> b/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
> new file mode 100644
> index 000..6bac908
> --- /dev/null
> +++ 
> b/Documentation/devicetree/bindings/interrupt-controller/andestech,ativic32.txt
> @@ -0,0 +1,27 @@
> +* Andestech Internal Vector Interrupt Controller
> +
> +The Internal Vector Interrupt Controller (IVIC) is a basic interrupt 
> controller
> +suitable for a simpler SoC platform not requiring a more sophisticated and
> +bigger External Vector Interrupt Controller.
> +
> +
> +Main node required properties:
> +
> +- compatible : should at least contain  "andestech,ativic32".
> +- interrupt-parent: Empty for the interrupt controller itself

Drop this.

> +- interrupt-controller : Identifies the node as an interrupt controller
> +- #interrupt-cells: The number of cells to define the interrupts. Should be 
> 2.
> +   The first cell is the IRQ number
> +   The second cell is used to specify mode:
> +   1 = low-to-high edge triggered
> +   2 = high-to-low edge triggered
> +   4 = active high level-sensitive
> +   8 = active low level-sensitive

Just state 2 cells and refer to interrupt-controller/interrupts.txt.

> +   Default for internal sources should be set to 4 (active high).
> +
> +Examples:
> +   intc: interrupt-controller {
> +   compatible = "andestech,ativic32";
> +   #interrupt-cells = <2>;
> +   interrupt-controller;
> +   };
> --
> 1.7.9.5
>

Re: [PATCH 29/31] MAINTAINERS: Add nds32

2017-11-08 Thread Rob Herring

On Tue, Nov 7, 2017 at 11:55 PM, Greentime Hu  wrote:
> From: Greentime Hu 
>
> Signed-off-by: Greentime Hu 
> ---
>  MAINTAINERS |9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 2f4e462..bce1181 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -857,6 +857,15 @@ X: drivers/iio/*/adjd*
>  F: drivers/staging/iio/*/ad*
>  F: drivers/staging/iio/trigger/iio-trig-bfin-timer.c
>
> +ANDES ARCHITECTURE
> +M: Greentime Hu 
> +M: Vincent Chen 
> +T: git https://github.com/andestech/linux.git
> +S: Supported
> +F: arch/nds32

DT binding files?

> +K: nds32
> +N: nds32
> +
>  ANDROID CONFIG FRAGMENTS
>  M: Rob Herring 
>  S: Supported
> --
> 1.7.9.5
>

[PATCH v2 net-next 01/12] tls: Use kzalloc for aead_request allocation

2017-11-08 Thread Ilya Lesokhin

Use kzalloc for aead_request allocation as
we don't set all the bits in the request.

Fixes: 3c4d7559159b ('tls: kernel TLS support')
Signed-off-by: Ilya Lesokhin 
---
 net/tls/tls_sw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7d80040..f00383a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -219,7 +219,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
struct aead_request *aead_req;
int rc;
 
-   aead_req = kmalloc(req_size, flags);
+   aead_req = kzalloc(req_size, flags);
if (!aead_req)
return -ENOMEM;
 
-- 
1.8.3.1

[PATCH v2 net-next 07/12] tcp: Add clean acked data hook

2017-11-08 Thread Ilya Lesokhin

Called when a TCP segment is acknowledged.
Could be used by application protocols who hold additional
metadata associated with the stream data
This is required by TLS device offload to release
metadata associated with acknowledged TLS records.

Signed-off-by: Boris Pismenny 
Signed-off-by: Ilya Lesokhin 
Signed-off-by: Aviad Yehezkel 
---
 include/net/inet_connection_sock.h | 2 ++
 net/ipv4/tcp_input.c   | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/include/net/inet_connection_sock.h 
b/include/net/inet_connection_sock.h
index 0358745..49f2878 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -77,6 +77,7 @@ struct inet_connection_sock_af_ops {
  * @icsk_af_ops   Operations which are AF_INET{4,6} specific
  * @icsk_ulp_ops  Pluggable ULP control hook
  * @icsk_ulp_data ULP private data
+ * @icsk_clean_acked  Clean acked data hook
  * @icsk_ca_state:Congestion control state
  * @icsk_retransmits: Number of unrecovered [RTO] timeouts
  * @icsk_pending: Scheduled timer event
@@ -101,6 +102,7 @@ struct inet_connection_sock {
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops  *icsk_ulp_ops;
void  *icsk_ulp_data;
+   void  (*icsk_clean_acked)(struct sock *sk);
unsigned int  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
__u8  icsk_ca_state:6,
  icsk_ca_setsockopt:1,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0ada8bf..b3e9bcc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3645,6 +3645,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff 
*skb, int flag)
if (!prior_packets)
goto no_queue;
 
+   if (icsk->icsk_clean_acked)
+   icsk->icsk_clean_acked(sk);
+
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
-- 
1.8.3.1

[PATCH v2 net-next 10/12] net: Add TLS offload netdev ops

2017-11-08 Thread Ilya Lesokhin

Add new netdev ops to add and delete tls context

Signed-off-by: Boris Pismenny 
Signed-off-by: Ilya Lesokhin 
Signed-off-by: Aviad Yehezkel 
---
 include/linux/netdevice.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fda527c..e8cb3cf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -842,6 +842,23 @@ struct xfrmdev_ops {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+enum tls_offload_ctx_dir {
+   TLS_OFFLOAD_CTX_DIR_RX,
+   TLS_OFFLOAD_CTX_DIR_TX,
+};
+
+struct tls_crypto_info;
+
+struct tlsdev_ops {
+   int (*tls_dev_add)(struct net_device *netdev, struct sock *sk,
+  enum tls_offload_ctx_dir direction,
+  struct tls_crypto_info *crypto_info);
+   void (*tls_dev_del)(struct net_device *netdev, struct sock *sk,
+   enum tls_offload_ctx_dir direction);
+};
+#endif
+
 struct dev_ifalias {
struct rcu_head rcuhead;
char ifalias[];
@@ -1727,6 +1744,10 @@ struct net_device {
const struct xfrmdev_ops *xfrmdev_ops;
 #endif
 
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+   const struct tlsdev_ops *tlsdev_ops;
+#endif
+
const struct header_ops *header_ops;
 
unsigned intflags;
-- 
1.8.3.1

[PATCH v2 net-next 11/12] net: Add TLS TX offload features

2017-11-08 Thread Ilya Lesokhin

This patch adds a netdev feature to configure TLS TX offloads.

Signed-off-by: Boris Pismenny 
Signed-off-by: Ilya Lesokhin 
Signed-off-by: Aviad Yehezkel 
---
 include/linux/netdev_features.h | 2 ++
 net/core/ethtool.c  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index dc8b489..ed0648a 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -76,6 +76,7 @@ enum {
NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload 
*/
NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */
NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
+   NETIF_F_HW_TLS_TX_BIT,  /* Hardware TLS TX offload */
 
/*
 * Add your fresh new feature above and remember to update
@@ -140,6 +141,7 @@ enum {
 #define NETIF_F_HW_ESP __NETIF_F(HW_ESP)
 #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM)
 #defineNETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
+#define NETIF_F_HW_TLS_TX  __NETIF_F(HW_TLS_TX)
 
 #define for_each_netdev_feature(mask_addr, bit)\
for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f8fcf45..a1138d2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -106,6 +106,7 @@ int ethtool_op_get_ts_info(struct net_device *dev, struct 
ethtool_ts_info *info)
[NETIF_F_HW_ESP_BIT] =   "esp-hw-offload",
[NETIF_F_HW_ESP_TX_CSUM_BIT] =   "esp-tx-csum-hw-offload",
[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =   "rx-udp_tunnel-port-offload",
+   [NETIF_F_HW_TLS_TX_BIT] ="tls-hw-tx-offload",
 };
 
 static const char
-- 
1.8.3.1

Re: [PATCH 02/31] nds32: Kernel booting and initialization

2017-11-08 Thread Rob Herring

On Tue, Nov 7, 2017 at 11:54 PM, Greentime Hu  wrote:
> From: Greentime Hu 
>

Commit message needed.

> Signed-off-by: Vincent Chen 
> Signed-off-by: Greentime Hu 
> ---

> +   /* it could update max_pfn */
> +   if (max_pfn - ram_start_pfn <= MAXMEM_PFN)
> +   max_low_pfn = max_pfn;
> +   else {
> +   max_low_pfn = MAXMEM_PFN + ram_start_pfn;
> +#ifndef CONFIG_HIGHMEM

Use IS_ENABLED here and other ifdef's if possible.

> +   max_pfn = MAXMEM_PFN + ram_start_pfn;
> +#endif
> +   }


> +static int __init nds32_device_probe(void)
> +{
> +   return of_platform_populate(NULL, NULL, NULL, NULL);
> +}
> +
> +device_initcall(nds32_device_probe);

You can drop this. The core code will call of_platform_populate for you.

[PATCH v2 net-next 12/12] tls: Add generic NIC offload infrastructure.

2017-11-08 Thread Ilya Lesokhin

This patch adds a generic infrastructure to offload TLS crypto to a
network devices. It enables the kernel TLS socket to skip encryption
and authentication operations on the transmit side of the data path.
Leaving those computationally expensive operations to the NIC.

The NIC offload infrastructure builds TLS records and pushes them to
the TCP layer just like the SW KTLS implementation and using the same API.
TCP segmentation is mostly unaffected. Currently the only exception is
that we prevent mixed SKBs where only part of the payload requires
offload. In the future we are likely to add a similar restriction
following a change cipher spec record.

The notable differences between SW KTLS and NIC offloaded TLS
implementations are as follows:
1. The offloaded implementation builds "plaintext TLS record", those
records contain plaintext instead of ciphertext and place holder bytes
instead of authentication tags.
2. The offloaded implementation maintains a mapping from TCP sequence
number to TLS records. Thus given a TCP SKB sent from a NIC offloaded
TLS socket, we can use the tls NIC offload infrastructure to obtain
enough context to encrypt the payload of the SKB.
A TLS record is released when the last byte of the record is ack'ed,
this is done through the new icsk_clean_acked callback.

The infrastructure should be extendable to support various NIC offload
implementations.  However it is currently written with the
implementation below in mind:
The NIC assumes that packets from each offloaded stream are sent as
plaintext and in-order. It keeps track of the TLS records in the TCP
stream. When a packet marked for offload is transmitted, the NIC
encrypts the payload in-place and puts authentication tags in the
relevant place holders.

The responsibility for handling out-of-order packets (i.e. TCP
retransmission, qdisc drops) falls on the netdev driver.

The netdev driver keeps track of the expected TCP SN from the NIC's
perspective.  If the next packet to transmit matches the expected TCP
SN, the driver advances the expected TCP SN, and transmits the packet
with TLS offload indication.

If the next packet to transmit does not match the expected TCP SN. The
driver calls the TLS layer to obtain the TLS record that includes the
TCP of the packet for transmission. Using this TLS record, the driver
posts a work entry on the transmit queue to reconstruct the NIC TLS
state required for the offload of the out-of-order packet. It updates
the expected TCP SN accordingly and transmit the now in-order packet.
The same queue is used for packet transmission and TLS context
reconstruction to avoid the need for flushing the transmit queue before
issuing the context reconstruction request.

Signed-off-by: Boris Pismenny 
Signed-off-by: Ilya Lesokhin 
Signed-off-by: Aviad Yehezkel 
---
 include/net/tls.h |  55 +++-
 net/tls/Kconfig   |   9 +
 net/tls/Makefile  |   3 +
 net/tls/tls_device.c  | 692 ++
 net/tls/tls_device_fallback.c | 382 +++
 net/tls/tls_main.c|  30 +-
 6 files changed, 1164 insertions(+), 7 deletions(-)
 create mode 100644 net/tls/tls_device.c
 create mode 100644 net/tls/tls_device_fallback.c

diff --git a/include/net/tls.h b/include/net/tls.h
index 70becd0..2ae9174 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -71,6 +71,29 @@ struct tls_sw_context {
struct scatterlist sg_aead_out[2];
 };
 
+struct tls_record_info {
+   struct list_head list;
+   u32 end_seq;
+   int len;
+   int num_frags;
+   skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+struct tls_offload_context {
+   struct crypto_aead *aead_send;
+
+   struct list_head records_list;
+   struct scatterlist sg_tx_data[MAX_SKB_FRAGS];
+   void (*sk_destruct)(struct sock *sk);
+   struct tls_record_info *open_record;
+   struct tls_record_info *retransmit_hint;
+   u64 hint_record_sn;
+   u64 unacked_record_sn;
+
+   u32 expected_seq;
+   spinlock_t lock;/* protects records list */
+};
+
 enum {
TLS_PENDING_CLOSED_RECORD
 };
@@ -81,6 +104,9 @@ struct tls_context {
struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
};
 
+   struct list_head gclist;
+   struct sock *sk;
+   struct net_device *netdev;
void *priv_ctx;
 
u8 tx_conf:2;
@@ -125,9 +151,23 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_tx_resources(struct sock *sk);
 
-void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
-void tls_icsk_clean_acked(struct sock *sk);
+void tls_clear_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_device_sendpage(struct sock *sk, struct page *page,
+

1 2 3 4 >

1 - 100 of 322 matches

Mail list logo