[PATCH net] tipc: fix potential null-ptr dereference bugs in netlink compat functions

2016-05-21 Thread Baozeng Ding
Before calling the nla_parse_nested function, its third argument should be
checked to make sure it is not null. This patch fixes several potential
null pointer dereference vulnerabilities in the tipc netlink functions.

Signed-off-by: Baozeng Ding 
---
 net/tipc/netlink_compat.c | 111 ++
 1 file changed, 93 insertions(+), 18 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4dfc5c1..1e18b78 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct 
tipc_nl_compat_cmd_doit *cmd,
 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
  struct nlattr **attrs)
 {
+   int err;
struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
 
-   nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
-NULL);
+   if (!attrs[TIPC_NLA_BEARER])
+   return -EINVAL;
+
+   err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
+  attrs[TIPC_NLA_BEARER], NULL);
+   if (err)
+   return err;
 
return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
nla_data(bearer[TIPC_NLA_BEARER_NAME]),
@@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct tipc_nl_compat_msg 
*msg,
 static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
 struct nlattr **attrs)
 {
+   int err;
char *name;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
 
-   nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+   if (!attrs[TIPC_NLA_LINK])
+   return -EINVAL;
 
-   nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
-NULL);
+   err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+  attrs[TIPC_NLA_LINK], NULL);
+   if (err)
+   return err;
+
+   if (!link[TIPC_NLA_LINK_PROP])
+   return -EINVAL;
 
-   nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
-NULL);
+   err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
+  link[TIPC_NLA_LINK_PROP], NULL);
+   if (err)
+   return err;
+
+   if (!link[TIPC_NLA_LINK_STATS])
+   return -EINVAL;
+
+   err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
+  link[TIPC_NLA_LINK_STATS], NULL);
+   if (err)
+   return err;
 
name = (char *)TLV_DATA(msg->req);
if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
@@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct 
tipc_nl_compat_msg *msg,
 static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
struct nlattr **attrs)
 {
+   int err;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_link_info link_info;
 
-   nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+   if (!attrs[TIPC_NLA_LINK])
+   return -EINVAL;
+
+   err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+  attrs[TIPC_NLA_LINK], NULL);
+   if (err)
+   return err;
 
link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
@@ -751,6 +781,7 @@ static int tipc_nl_compat_name_table_dump_header(struct 
tipc_nl_compat_msg *msg)
 static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
  struct nlattr **attrs)
 {
+   int err;
char port_str[27];
struct tipc_name_table_query *ntq;
struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1];
@@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct 
tipc_nl_compat_msg *msg,
static const char * const scope_str[] = {"", " zone", " cluster",
 " node"};
 
-   nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
-attrs[TIPC_NLA_NAME_TABLE], NULL);
+   if (!attrs[TIPC_NLA_NAME_TABLE])
+   return -EINVAL;
 
-   nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL],
-NULL);
+   err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
+  attrs[TIPC_NLA_NAME_TABLE], NULL);
+   if (err)
+   return err;
+
+   if (!nt[TIPC_NLA_NAME_TABLE_PUBL])
+   return -EINVAL;
+
+   err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
+  nt[TIPC_NLA_NAME_TABLE_PUBL], NULL);
+   if (err)
+   return err;
 

[PATCH net] tipc: fix potential null pointer dereference in some compat, functions

2016-05-21 Thread Baozeng Ding
Before calling the nla_parse_nested function, make sure the pointer to
the attribute is not null. This patch fixes several potential null
pointer dereference vulnerabilities in the tipc netlink functions.

Signed-off-by: Baozeng Ding 
---
 net/tipc/netlink_compat.c | 111 ++
 1 file changed, 93 insertions(+), 18 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4dfc5c1..1e18b78 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct 
tipc_nl_compat_cmd_doit *cmd,
 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
  struct nlattr **attrs)
 {
+   int err;
struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
 
-   nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
-NULL);
+   if (!attrs[TIPC_NLA_BEARER])
+   return -EINVAL;
+
+   err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
+  attrs[TIPC_NLA_BEARER], NULL);
+   if (err)
+   return err;
 
return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
nla_data(bearer[TIPC_NLA_BEARER_NAME]),
@@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct tipc_nl_compat_msg 
*msg,
 static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
 struct nlattr **attrs)
 {
+   int err;
char *name;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
 
-   nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+   if (!attrs[TIPC_NLA_LINK])
+   return -EINVAL;
 
-   nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
-NULL);
+   err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+  attrs[TIPC_NLA_LINK], NULL);
+   if (err)
+   return err;
+
+   if (!link[TIPC_NLA_LINK_PROP])
+   return -EINVAL;
 
-   nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
-NULL);
+   err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
+  link[TIPC_NLA_LINK_PROP], NULL);
+   if (err)
+   return err;
+
+   if (!link[TIPC_NLA_LINK_STATS])
+   return -EINVAL;
+
+   err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
+  link[TIPC_NLA_LINK_STATS], NULL);
+   if (err)
+   return err;
 
name = (char *)TLV_DATA(msg->req);
if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
@@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct 
tipc_nl_compat_msg *msg,
 static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
struct nlattr **attrs)
 {
+   int err;
struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_link_info link_info;
 
-   nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+   if (!attrs[TIPC_NLA_LINK])
+   return -EINVAL;
+
+   err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+  attrs[TIPC_NLA_LINK], NULL);
+   if (err)
+   return err;
 
link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
@@ -751,6 +781,7 @@ static int tipc_nl_compat_name_table_dump_header(struct 
tipc_nl_compat_msg *msg)
 static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
  struct nlattr **attrs)
 {
+   int err;
char port_str[27];
struct tipc_name_table_query *ntq;
struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1];
@@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct 
tipc_nl_compat_msg *msg,
static const char * const scope_str[] = {"", " zone", " cluster",
 " node"};
 
-   nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
-attrs[TIPC_NLA_NAME_TABLE], NULL);
+   if (!attrs[TIPC_NLA_NAME_TABLE])
+   return -EINVAL;
 
-   nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL],
-NULL);
+   err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
+  attrs[TIPC_NLA_NAME_TABLE], NULL);
+   if (err)
+   return err;
+
+   if (!nt[TIPC_NLA_NAME_TABLE_PUBL])
+   return -EINVAL;
+
+   err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
+  nt[TIPC_NLA_NAME_TABLE_PUBL], NULL);
+   if (err)
+   return err;
 
ntq = 

Re: IPv6 extension header privileges

2016-05-21 Thread Hannes Frederic Sowa
On 21.05.2016 19:46, Sowmini Varadhan wrote:
> Tom Herbert wrote:
> If you don't mind I'll change this to make specific options are
> privileged and not all hbh and destopt. There is talk in IETF about
> reinventing IP extensibility within UDP since the kernel APIs don't
> allow setting EH. I would like to avoid that :-)
> 
> Do you mean this
>   http://www.ietf.org/mail-archive/web/spud/current/msg00365.html
> 
> Maybe I misunderstood that rather long thread, but the author of that
> draft seems to be arguing for reinventing tcp congavoid and windowing 
> on top of udp to bypass kernel? ;-)

Hmm, haven't read carefully but isn't that just plain TCP in UDP? I saw
extension headers mentioned but haven't grasped why they deem necessary.

> Hannes Frederic Sowa wrote:
 A white list of certain registered IPv6 IANA-options for non-priv whould
> 
> Problem is that APIs are not IANA'ed. 
> Even RFC 3542 is just Informationaal. 
> 
> And even the classic socket API's that come down from BSD are not 
> ietf'ed or iana'ed.

I think I don't completely understand this. IANA is numbering registry
and if we have the proper option number allocated we can make sensible
decisions and put options on the white list or provide a more complete
sensible implementation of the specification in the kernel.

E.g. if an option for encapsulation is going to be specified, normal
users should not be able to set those, like with CALIPSO or some VNI
inside hop-by-hop options. That should probably be controlled by a
routing table or a flow matching subsystem, in the kernel.

Congestion and windowing information need to be settable and receivable
from user space.

Unassigned option, we don't know anything about them so far and should
probably being blocked for the time being, as we don't know which spec
would use the number.

I think it is also in favor of the IETF to get those numbers specified
and allocated in a proper way, otherwise security won't be manageable at
all any more.

> Hannes Frederic Sowa wrote:
>> Can you give some more details about the planned new options? I think we
>> can also open the API up for all options where the fourth bit is set,
>> which AFAIK denotes the experimental option space. And only have a
>> blacklist for the fourth bit == 0 case. Otherwise this is something IETF
>> people probably know more about what an impact this change could have.
> 
> I think it would be ok for some options to be privileged, others to not.
> We certainly have precedent for that from the classic socket APIs..
> and man pages etc can document any restrictions, as we do with other
> ioctls/sockopts etc.

Sure, we can make options unprivileged, we just need to know which one
and find out the details how they mix with each other. Also user space
should only be allowed to query the options it is allowed to set, too,
and not the whole option blocks.

Bye,
Hannes



Re: [PATCH 0043/1529] Fix typo

2016-05-21 Thread David Miller

I'm not applying a ton of patches that all have the same Subject line.

How can anyone looking at the GIT shortlog figure out what might
be different amongst any of these 1529 patches?

You must prefix your Subject line with the subsystem or area that your
patch is changing, followed by a colon character, then a space
character.

For this specific patch an appropriate subject line would be
"[PATCH 0043/1529] hisax: Fix typo."


Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer

2016-05-21 Thread Lino Sanfilippo
On 21.05.2016 21:47, Francois Romieu wrote:
> Shuyu Wei  :
> [...]
>> diff --git a/drivers/net/ethernet/arc/emac_main.c 
>> b/drivers/net/ethernet/arc/emac_main.c
>> index a3a9392..c2447b0 100644
>> --- a/drivers/net/ethernet/arc/emac_main.c
>> +++ b/drivers/net/ethernet/arc/emac_main.c
>> @@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct 
>> net_device *ndev)
>>  
>> skb_tx_timestamp(skb);
>>  
>> +   /* Make sure timestamp is set */
>> +   smp_wmb();
> 
> Should be dma_wmb() (see davem's message).
> 
> It's completely unrelated to SMP.
> 

Its also completely unrelated to dma so I doubt that this is what davem meant.
As far as I understood he was referring to the dma descriptor.

Lino


Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer

2016-05-21 Thread Lino Sanfilippo

On 21.05.2016 18:09, Shuyu Wei wrote:
> Looks like I got it wrong in the first place.
> 
> priv->tx_buff is not for the device, so there's no need to move it.
> The race has been fixed by commit c278c253f3d9, I forgot to check
> it out. That's my fault.
> 
> I do find another problem. We need to use a barrier to make sure
> skb_tx_timestamp() is called before setting the FOR_EMAC flag.
> 

Shuyu,

Could you please test the patch below? I implemented a new approach
in which tx_clean uses txbd_curr to determine if there are more 
descriptors to check or if the loop can be left. 
Memory barriers on both sides (xmit and clean) should ensure that 
SKB and info are only accessed if they are valid.
I also hope that skb_tx_timestamp is not an issue any more.


BTW: concerning get_maintainer Alexander Kochetkov should be CCed for
modifications in this area, so thats what I do hereby.

--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -162,7 +162,13 @@ static void arc_emac_tx_clean(struct net_device *ndev)
struct sk_buff *skb = tx_buff->skb;
unsigned int info = le32_to_cpu(txbd->info);
 
-   if ((info & FOR_EMAC) || !txbd->data || !skb)
+   if (info & FOR_EMAC)
+   break;
+
+   /* Make sure curr pointer is consistent with info */
+   rmb();
+
+   if (*txbd_dirty == priv->txbd_curr)
break;
 
if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
@@ -195,8 +201,8 @@ static void arc_emac_tx_clean(struct net_device *ndev)
*txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM;
}
 
-   /* Ensure that txbd_dirty is visible to tx() before checking
-* for queue stopped.
+   /* Ensure that txbd_dirty is visible to tx() and we see the most recent
+* value for txbd_curr.
 */
smp_mb();
 
@@ -680,35 +686,29 @@ static int arc_emac_tx(struct sk_buff *skb, struct 
net_device *ndev)
dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
 
priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
-
-   /* Make sure pointer to data buffer is set */
-   wmb();
+   priv->tx_buff[*txbd_curr].skb = skb;
 
skb_tx_timestamp(skb);
 
*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
-   /* Make sure info word is set */
+   /* 1. Make sure that with respect to tx_clean everything is set up
+* properly before we advance txbd_curr.
+* 2. Make sure writes to DMA descriptors are completed before we inform
+* the hardware.
+*/
wmb();
 
-   priv->tx_buff[*txbd_curr].skb = skb;
-
/* Increment index to point to the next BD */
*txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
 
-   /* Ensure that tx_clean() sees the new txbd_curr before
-* checking the queue status. This prevents an unneeded wake
-* of the queue in tx_clean().
+   /* Ensure we see the most recent value of txbd_dirty and tx_clean() sees
+* the updated value of txbd_curr.
 */
smp_mb();
 
-   if (!arc_emac_tx_avail(priv)) {
+   if (!arc_emac_tx_avail(priv))
netif_stop_queue(ndev);
-   /* Refresh tx_dirty */
-   smp_mb();
-   if (arc_emac_tx_avail(priv))
-   netif_start_queue(ndev);
-   }
 
arc_reg_set(priv, R_STATUS, TXPL_MASK);
 
-- 
1.9.1



Re: Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?

2016-05-21 Thread Shmulik Ladkani
On Sat, 21 May 2016 17:55:59 +0200 Hannes Frederic Sowa 
 wrote:
> On 21.05.2016 14:50, Shmulik Ladkani wrote:
> > Hi,
> > 
> > inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not
> > to request resubmission for local delivery.
> > 
> > For an INET6_PROTO_FINAL handler, the following actions gets executed
> > prior delivery, in ip6_input_finish:
> > 
> > nf_reset(skb);
> > 
> > skb_postpull_rcsum(skb, skb_network_header(skb),
> >skb_network_header_len(skb));
> > 
> > For some reason, l2tp_ip6_protocol handler is NOT marked as
> > INET6_PROTO_FINAL. Probably an oversight.
> > 
> > Since 'l2tp_ip6_recv' never results in a resubmission, the above actions
> > are not applied to skbs passed to l2tp_ip6.
> > 
> > Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL?  
> 
> I don't see any specific reason why it shouldn't be a INET6_PROTO_FINAL.
> Anyway, receive path of L2TPv3 without UDP encapsulation doesn't deal
> with checksums anyway, as far as I know.
> 
> > What's the consequences not executing the above actions for l2tp_ip6
> > packets?  
> 
> Probably not a whole lot in this case.

OK, so the skb_postpull_rcsum is irrelevant for IPPROTO_L2TP over ipv6.

However, one more thing WRT to INET6_PROTO_FINAL not being set - we're
also missing the multicast filtering of 'ip6_input_finish':

if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
&hdr->saddr) &&
!ipv6_is_mld(skb, nexthdr, 
skb_network_header_len(skb)))
goto discard;

I assume no reason to allow multicast daddr which aren't in the mc_list
(or saddr excluded) to pass up into 'l2tp_ip6_recv'?


Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer

2016-05-21 Thread Francois Romieu
Shuyu Wei  :
[...]
> diff --git a/drivers/net/ethernet/arc/emac_main.c 
> b/drivers/net/ethernet/arc/emac_main.c
> index a3a9392..c2447b0 100644
> --- a/drivers/net/ethernet/arc/emac_main.c
> +++ b/drivers/net/ethernet/arc/emac_main.c
> @@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct 
> net_device *ndev)
>  
> skb_tx_timestamp(skb);
>  
> +   /* Make sure timestamp is set */
> +   smp_wmb();

Should be dma_wmb() (see davem's message).

It's completely unrelated to SMP.

-- 
Ueimor


Re: [PATCH net] uapi glibc compat: fix compilation when !__USE_MISC in glibc

2016-05-21 Thread Mikko Rapeli
On Thu, May 19, 2016 at 05:26:29PM +0200, Nicolas Dichtel wrote:
> These structures are defined only if __USE_MISC is set in glibc net/if.h
> headers, ie when _BSD_SOURCE or _SVID_SOURCE are defined.

Also, reading /usr/include/features.h from Debian glibc 2.22-4:

...
   The `-ansi' switch to the GNU C compiler, and standards conformance
   options such as `-std=c99', define __STRICT_ANSI__.  If none of
   these are defined, or if _DEFAULT_SOURCE is defined, the default is
   to have _POSIX_SOURCE set to one and _POSIX_C_SOURCE set to
   200809L, as well as enabling miscellaneous functions from BSD and
   SVID.  If more than one of these are defined, they accumulate.  For
   example __STRICT_ANSI__, _POSIX_SOURCE and _POSIX_C_SOURCE together
   give you ISO C, 1003.1, and 1003.2, but nothing else.
...
/* _BSD_SOURCE and _SVID_SOURCE are deprecated aliases for
   _DEFAULT_SOURCE.
...

In the compile test I go with the defaults. I guess I should test all
of these feature variants and in the future check deeper into the glibc
ifdefs.

> CC: Jan Engelhardt 
> CC: Josh Boyer 
> CC: Stephen Hemminger 
> CC: Waldemar Brodkorb 
> CC: Gabriel Laskar 
> CC: Mikko Rapeli 
> Fixes: 4a91cb61bb99 ("uapi glibc compat: fix compile errors when glibc 
> net/if.h included before linux/if.h")
> Signed-off-by: Nicolas Dichtel 

Signed-off-by: Mikko Rapeli 

> ---
>  include/uapi/linux/libc-compat.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/libc-compat.h 
> b/include/uapi/linux/libc-compat.h
> index d5e38c73377c..e4f048ee7043 100644
> --- a/include/uapi/linux/libc-compat.h
> +++ b/include/uapi/linux/libc-compat.h
> @@ -52,7 +52,7 @@
>  #if defined(__GLIBC__)
>  
>  /* Coordinate with glibc net/if.h header. */
> -#if defined(_NET_IF_H)
> +#if defined(_NET_IF_H) && defined(__USE_MISC)
>  
>  /* GLIBC headers included first so don't define anything
>   * that would already be defined. */
> -- 
> 2.8.1
> 


Re: IPv6 extension header privileges

2016-05-21 Thread Sowmini Varadhan
Tom Herbert wrote:
> >>> If you don't mind I'll change this to make specific options are
> >>> privileged and not all hbh and destopt. There is talk in IETF about
> >>> reinventing IP extensibility within UDP since the kernel APIs don't
> >>> allow setting EH. I would like to avoid that :-)

Do you mean this
  http://www.ietf.org/mail-archive/web/spud/current/msg00365.html

Maybe I misunderstood that rather long thread, but the author of that
draft seems to be arguing for reinventing tcp congavoid and windowing 
on top of udp to bypass kernel? ;-)

Hannes Frederic Sowa wrote:
> >> A white list of certain registered IPv6 IANA-options for non-priv whould

Problem is that APIs are not IANA'ed. 
Even RFC 3542 is just Informationaal. 

And even the classic socket API's that come down from BSD are not 
ietf'ed or iana'ed.

Hannes Frederic Sowa wrote:
> Can you give some more details about the planned new options? I think we
> can also open the API up for all options where the fourth bit is set,
> which AFAIK denotes the experimental option space. And only have a
> blacklist for the fourth bit == 0 case. Otherwise this is something IETF
> people probably know more about what an impact this change could have.

I think it would be ok for some options to be privileged, others to not.
We certainly have precedent for that from the classic socket APIs..
and man pages etc can document any restrictions, as we do with other
ioctls/sockopts etc.

--Sowmini



Re: IPv6 extension header privileges

2016-05-21 Thread Hannes Frederic Sowa
On 21.05.2016 18:00, Tom Herbert wrote:
> On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa
>  wrote:
>> On 21.05.2016 17:19, Tom Herbert wrote:
>>> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa
>>>  wrote:
 On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
> On (05/21/16 02:20), Hannes Frederic Sowa wrote:
>>
>> There are some options inherently protocol depending like the jumbo
>> payload option, which should be under control of the kernel, or the
>> router alert option for igmp, which causes packets to be steered towards
>> the slow/software path of routers, which can be used for DoS attacks.
>>
>> Setting CALIPSO options in IPv6 on packets as users would defeat the
>> whole CALIPSO model, etc.
>>
>> The RFC3542 requires at least some of the options in dst/hop-by-hop
>
> "requires" is a strong word. 3542 declares it as a  "may" (lower case).
> The only thing required strongly is IPV6_NEXTHOP itself.
>
> I suspect 3542 was written at a time when hbh and dst opt were loosely
> defined and the "may" is just a place-holder (i.e., it's not even a MAY)

 My wording directly from the RFC was too strong, true, but given that
 there is a CALIPSO patch already floating around for the kernel and
 those options are strictly controlled by selinux policy and build the
 foundation for the networking separation we can't make it simply
 non-priv.

>>> If you don't mind I'll change this to make specific options are
>>> privileged and not all hbh and destopt. There is talk in IETF about
>>> reinventing IP extensibility within UDP since the kernel APIs don't
>>> allow setting EH. I would like to avoid that :-)
>>
>> Hehe, certainly.
>>
>> A white list of certain registered IPv6 IANA-options for non-priv whould
>> certainly fly in my opinion. That is what I meant with "More
>> fine-grained parsing and setting of those options has never been
>> implemented." from my first mail.
>>
>> I am not that certain about a blacklist though, but haven't thought
>> about that enough. I didn't yet get around to review other options, but
>> basically people could use private options in some proprietary settings
>> and we could break their assumptions by such a change.
>>
>> Would a white list be sufficient?
>>
> Probably not. The "kernel is the problem" community always seem to be
> looking for even the slightest API or implementation deficiency to
> justify bypassing the kernel entirely. :-(

Another issue, which could come up also is the ordering options during
the merge of kernel policy and user space provided options, hmpf...

Maybe a way out of it is to use some kind of sysctl, capability, or
whatever, depending on the use cases?

Bye,
Hannes



[PATCH] net/tipc: fix potential null pointer dereference in several tipc netlink compat functions

2016-05-21 Thread Baozeng Ding

Before calling the nla_parse_nested funciton, its third argument should be
checked to make sure it is not null. This patch fixes several potential
null pointer dereference vulnerability in the tipc netlink functions.

Signed-off-by: Baozeng Ding 
---
 net/tipc/netlink_compat.c | 111 
++

 1 file changed, 93 insertions(+), 18 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4dfc5c1..959e989 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct 
tipc_nl_compat_cmd_doit *cmd,

 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
   struct nlattr **attrs)
 {
+int err;
 struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];

-nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
- NULL);
+if (!attrs[TIPC_NLA_BEARER])
+return -EINVAL;
+
+err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX,
+   attrs[TIPC_NLA_BEARER], NULL);
+if (err)
+return err;

 return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
 nla_data(bearer[TIPC_NLA_BEARER_NAME]),
@@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct 
tipc_nl_compat_msg *msg,

 static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
  struct nlattr **attrs)
 {
+int err;
 char *name;
 struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
 struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
 struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];

-nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+if (!attrs[TIPC_NLA_LINK])
+return -EINVAL;

-nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
- NULL);
+err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+   attrs[TIPC_NLA_LINK], NULL);
+if (err)
+return err;
+
+if (!link[TIPC_NLA_LINK_PROP])
+return -EINVAL;

-nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
- NULL);
+err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX,
+   link[TIPC_NLA_LINK_PROP], NULL);
+if (err)
+return err;
+
+if (!link[TIPC_NLA_LINK_STATS])
+return -EINVAL;
+
+err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX,
+   link[TIPC_NLA_LINK_STATS], NULL);
+if (err)
+return err;

 name = (char *)TLV_DATA(msg->req);
 if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
@@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct 
tipc_nl_compat_msg *msg,

 static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 struct nlattr **attrs)
 {
+int err;
 struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
 struct tipc_link_info link_info;

-nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+if (!attrs[TIPC_NLA_LINK])
+return -EINVAL;
+
+err = nla_parse_nested(link, TIPC_NLA_LINK_MAX,
+   attrs[TIPC_NLA_LINK], NULL);
+if (err)
+return err;

 link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
 link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
@@ -751,6 +781,7 @@ static int 
tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)

 static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg,
   struct nlattr **attrs)
 {
+int err;
 char port_str[27];
 struct tipc_name_table_query *ntq;
 struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1];
@@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct 
tipc_nl_compat_msg *msg,

 static const char * const scope_str[] = {"", " zone", " cluster",
  " node"};

-nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
- attrs[TIPC_NLA_NAME_TABLE], NULL);
+if (!attrs[TIPC_NLA_NAME_TABLE])
+return -EINVAL;

-nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL],
- NULL);
+err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX,
+   attrs[TIPC_NLA_NAME_TABLE], NULL);
+if (err)
+return err;
+
+if (!attrs[TIPC_NLA_NAME_TABLE])
+return -EINVAL;
+
+err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX,
+   nt[TIPC_NLA_NAME_TABLE_PUBL], NULL);
+if (err)
+return err;

 ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);

@@ -813,10 +854,17 @@ out:
 static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg,
   struct nlattr **attrs)
 {
+int err;
 u32 type, lower, upper;
 struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1];

-nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL);
+if (!attrs[TIPC_NLA_PUBL])
+return -EINVAL;
+
+err = nla_parse_nested(publ, TIPC_NLA_P

Re: IPv6 extension header privileges

2016-05-21 Thread Hannes Frederic Sowa
On 21.05.2016 18:00, Tom Herbert wrote:
> On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa
>  wrote:
>> On 21.05.2016 17:19, Tom Herbert wrote:
>>> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa
>>>  wrote:
 On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
> On (05/21/16 02:20), Hannes Frederic Sowa wrote:
>>
>> There are some options inherently protocol depending like the jumbo
>> payload option, which should be under control of the kernel, or the
>> router alert option for igmp, which causes packets to be steered towards
>> the slow/software path of routers, which can be used for DoS attacks.
>>
>> Setting CALIPSO options in IPv6 on packets as users would defeat the
>> whole CALIPSO model, etc.
>>
>> The RFC3542 requires at least some of the options in dst/hop-by-hop
>
> "requires" is a strong word. 3542 declares it as a  "may" (lower case).
> The only thing required strongly is IPV6_NEXTHOP itself.
>
> I suspect 3542 was written at a time when hbh and dst opt were loosely
> defined and the "may" is just a place-holder (i.e., it's not even a MAY)

 My wording directly from the RFC was too strong, true, but given that
 there is a CALIPSO patch already floating around for the kernel and
 those options are strictly controlled by selinux policy and build the
 foundation for the networking separation we can't make it simply
 non-priv.

>>> If you don't mind I'll change this to make specific options are
>>> privileged and not all hbh and destopt. There is talk in IETF about
>>> reinventing IP extensibility within UDP since the kernel APIs don't
>>> allow setting EH. I would like to avoid that :-)
>>
>> Hehe, certainly.
>>
>> A white list of certain registered IPv6 IANA-options for non-priv whould
>> certainly fly in my opinion. That is what I meant with "More
>> fine-grained parsing and setting of those options has never been
>> implemented." from my first mail.
>>
>> I am not that certain about a blacklist though, but haven't thought
>> about that enough. I didn't yet get around to review other options, but
>> basically people could use private options in some proprietary settings
>> and we could break their assumptions by such a change.
>>
>> Would a white list be sufficient?
>>
> Probably not. The "kernel is the problem" community always seem to be
> looking for even the slightest API or implementation deficiency to
> justify bypassing the kernel entirely. :-(

Hmm, I see...

Can you give some more details about the planned new options? I think we
can also open the API up for all options where the fourth bit is set,
which AFAIK denotes the experimental option space. And only have a
blacklist for the fourth bit == 0 case. Otherwise this is something IETF
people probably know more about what an impact this change could have.

Bye,
Hannes



Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer

2016-05-21 Thread Shuyu Wei
Looks like I got it wrong in the first place.

priv->tx_buff is not for the device, so there's no need to move it.
The race has been fixed by commit c278c253f3d9, I forgot to check
it out. That's my fault.

I do find another problem. We need to use a barrier to make sure
skb_tx_timestamp() is called before setting the FOR_EMAC flag.

According to the comment(include/linux/skbuff.h):
>/**
> * skb_tx_timestamp() - Driver hook for transmit timestamping
> *
> * Ethernet MAC Drivers should call this function in their hard_xmit()
> * function immediately before giving the sk_buff to the MAC hardware.
> *
> * Specifically, one should make absolutely sure that this function is
> * called before TX completion of this packet can trigger.  Otherwise
> * the packet could potentially already be freed.
> *
> * @skb: A socket buffer.
> */

---

diff --git a/drivers/net/ethernet/arc/emac_main.c 
b/drivers/net/ethernet/arc/emac_main.c
index a3a9392..c2447b0 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct 
net_device *ndev)
 
skb_tx_timestamp(skb);
 
+   /* Make sure timestamp is set */
+   smp_wmb();
+
*info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
 
/* Make sure info word is set */



Re: IPv6 extension header privileges

2016-05-21 Thread Tom Herbert
On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa
 wrote:
> On 21.05.2016 17:19, Tom Herbert wrote:
>> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa
>>  wrote:
>>> On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
 On (05/21/16 02:20), Hannes Frederic Sowa wrote:
>
> There are some options inherently protocol depending like the jumbo
> payload option, which should be under control of the kernel, or the
> router alert option for igmp, which causes packets to be steered towards
> the slow/software path of routers, which can be used for DoS attacks.
>
> Setting CALIPSO options in IPv6 on packets as users would defeat the
> whole CALIPSO model, etc.
>
> The RFC3542 requires at least some of the options in dst/hop-by-hop

 "requires" is a strong word. 3542 declares it as a  "may" (lower case).
 The only thing required strongly is IPV6_NEXTHOP itself.

 I suspect 3542 was written at a time when hbh and dst opt were loosely
 defined and the "may" is just a place-holder (i.e., it's not even a MAY)
>>>
>>> My wording directly from the RFC was too strong, true, but given that
>>> there is a CALIPSO patch already floating around for the kernel and
>>> those options are strictly controlled by selinux policy and build the
>>> foundation for the networking separation we can't make it simply
>>> non-priv.
>>>
>> If you don't mind I'll change this to make specific options are
>> privileged and not all hbh and destopt. There is talk in IETF about
>> reinventing IP extensibility within UDP since the kernel APIs don't
>> allow setting EH. I would like to avoid that :-)
>
> Hehe, certainly.
>
> A white list of certain registered IPv6 IANA-options for non-priv whould
> certainly fly in my opinion. That is what I meant with "More
> fine-grained parsing and setting of those options has never been
> implemented." from my first mail.
>
> I am not that certain about a blacklist though, but haven't thought
> about that enough. I didn't yet get around to review other options, but
> basically people could use private options in some proprietary settings
> and we could break their assumptions by such a change.
>
> Would a white list be sufficient?
>
Probably not. The "kernel is the problem" community always seem to be
looking for even the slightest API or implementation deficiency to
justify bypassing the kernel entirely. :-(

Tom

> Bye,
> Hannes
>


Re: Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?

2016-05-21 Thread Hannes Frederic Sowa
On 21.05.2016 14:50, Shmulik Ladkani wrote:
> Hi,
> 
> inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not
> to request resubmission for local delivery.
> 
> For an INET6_PROTO_FINAL handler, the following actions gets executed
> prior delivery, in ip6_input_finish:
> 
>   nf_reset(skb);
> 
>   skb_postpull_rcsum(skb, skb_network_header(skb),
>  skb_network_header_len(skb));
> 
> For some reason, l2tp_ip6_protocol handler is NOT marked as
> INET6_PROTO_FINAL. Probably an oversight.
> 
> Since 'l2tp_ip6_recv' never results in a resubmission, the above actions
> are not applied to skbs passed to l2tp_ip6.
> 
> Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL?

I don't see any specific reason why it shouldn't be a INET6_PROTO_FINAL.
Anyway, receive path of L2TPv3 without UDP encapsulation doesn't deal
with checksums anyway, as far as I know.

> What's the consequences not executing the above actions for l2tp_ip6
> packets?

Probably not a whole lot in this case.

Bye,
Hannes



Re: 4.5.0 on sun7i-a20-olinuxino-lime2: libphy: PHY stmmac-0:ffffffff not found (regression from rc7)

2016-05-21 Thread Bert Lindner

Hi,

On 2016-05-20 12:36, Marc Zyngier wrote:

On 20/05/16 11:30, Andre Heider wrote:

Hi,

On Fri, May 20, 2016 at 10:14 AM, Giuseppe CAVALLARO
 wrote:

On 5/20/2016 9:56 AM, Marc Zyngier wrote:


On 20/05/16 06:44, Andre Heider wrote:


Giuseppe, Alexandre, et al.,

On Thu, Mar 17, 2016 at 8:52 AM, Marc Zyngier 
wrote:


On Thu, 17 Mar 2016 00:56:40 +0100
Bert Lindner  wrote:


On 2016-03-16 18:42, Marc Zyngier wrote:


On 16/03/16 15:10, Bert Lindner wrote:


On 2016-03-16 14:10, Andreas Färber wrote:


Am 16.03.2016 um 13:09 schrieb Robin Murphy:


On 16/03/16 11:39, Marc Zyngier wrote:


On 16/03/16 11:19, Bert Lindner wrote:


...


For the board sun7i-a20-olinuxino-lime2, there seems to be a
problem
with the eth0 PHY in mainline kernel 4.5.0 that developed since
4.5.0-rc7. Ethernet does not work, although eth0 is reported:


...


[9.767125] NET: Registered protocol family 10
[   10.357405] libphy: PHY stmmac-0: not found
[   10.362382] eth0: Could not attach to PHY
[   10.366557] stmmac_open: Cannot attach to PHY (error: -19)


...


v4 fixes for 4.5 are here:

https://patchwork.ozlabs.org/patch/598195/ (revert)
https://patchwork.ozlabs.org/patch/598196/


...


Good to know, thanks. Could you also give the potential fix a go (as
mentioned by Andreas)? Just to make sure that whatever gets merged
next
will actually fix the issue.



Yes sure, it took a while because I had to travel. Confirmed, the
v4-for-4.5 fix works well for me, on sun7i-a20-olinuxino-lime2:

root@lime2-079f:~# cat /proc/version
Linux version 4.5.0-598195-598196-v4 (root@lime2-079f) (gcc version
4.9.1 (Ubuntu/Linaro 4.9.1-16ubuntu6) ) #1 SMP Wed Mar 16 16:44:22 UTC
2016

dmesg:
[8.245273] NET: Registered protocol family 10
[9.297406]  RX IPC Checksum Offload disabled
[9.297460]  No MAC Management Counters available
[9.297951] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready
[   16.285658] sun7i-dwmac 1c5.ethernet eth0: Link is Up -
1Gbps/Full - flow control rx/tx
[   16.285798] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready

The board is connected to my laptop rather than to a switch, so that
might be where the flow control message comes from (not sure). Anyway
ethernet works.



Cool, many thanks for taking the time to test and report.

Hopefully Giuseppe will get this merged quickly enough in mainline, and
it should then trickle into a 4.5-stable release (cc-ing stable on
these patches would probably be a good idea, BTW).



stmmac is broken on at least Lime2, BananaPi and Cubieboard2 since
v4.5 [0], including all five stable releases :(



All the A20 platforms are dead, actually.


The v4.5 patches quoted above are already +4 weeks old, could we
please get them into stable?



For that, the maintainer would have needed to CC stable, which he
didn't. I'd expect someone who cares to send these patches to stable.
It'd be better if the maintainer would do it himself though.



sure, I can send the patches to stable (sorry if I missed to add
stable ML on CC).

Andre, I have not clear if the train of patches actually fix the
issue or if you need my support to fix something else. In that case
I need some input for debugging (e.g. kernel log).


Bert already confirmed that those two patches fixes stmmac on his
Lime2, so I assume that it fixes the issue for all A20 platforms.


let me know, is it enough to re-send the patches only?


Just a resend with cc:stable :)


Not quite. Please read Documentation/stable_kernel_rules.txt, and the
section that concerns networking patches (and then consult
Documentation/networking/netdev-FAQ.txt which has all the details).


FWIW, recent 4.6-rc series and 4.6.0 have worked fine for me and the 
lime2. Had not tried 4.5.x again.


Best,

-bert




Re: IPv6 extension header privileges

2016-05-21 Thread Hannes Frederic Sowa
On 21.05.2016 17:19, Tom Herbert wrote:
> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa
>  wrote:
>> On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
>>> On (05/21/16 02:20), Hannes Frederic Sowa wrote:

 There are some options inherently protocol depending like the jumbo
 payload option, which should be under control of the kernel, or the
 router alert option for igmp, which causes packets to be steered towards
 the slow/software path of routers, which can be used for DoS attacks.

 Setting CALIPSO options in IPv6 on packets as users would defeat the
 whole CALIPSO model, etc.

 The RFC3542 requires at least some of the options in dst/hop-by-hop
>>>
>>> "requires" is a strong word. 3542 declares it as a  "may" (lower case).
>>> The only thing required strongly is IPV6_NEXTHOP itself.
>>>
>>> I suspect 3542 was written at a time when hbh and dst opt were loosely
>>> defined and the "may" is just a place-holder (i.e., it's not even a MAY)
>>
>> My wording directly from the RFC was too strong, true, but given that
>> there is a CALIPSO patch already floating around for the kernel and
>> those options are strictly controlled by selinux policy and build the
>> foundation for the networking separation we can't make it simply
>> non-priv.
>>
> If you don't mind I'll change this to make specific options are
> privileged and not all hbh and destopt. There is talk in IETF about
> reinventing IP extensibility within UDP since the kernel APIs don't
> allow setting EH. I would like to avoid that :-)

Hehe, certainly.

A white list of certain registered IPv6 IANA-options for non-priv whould
certainly fly in my opinion. That is what I meant with "More
fine-grained parsing and setting of those options has never been
implemented." from my first mail.

I am not that certain about a blacklist though, but haven't thought
about that enough. I didn't yet get around to review other options, but
basically people could use private options in some proprietary settings
and we could break their assumptions by such a change.

Would a white list be sufficient?

Bye,
Hannes



Re: IPv6 extension header privileges

2016-05-21 Thread Tom Herbert
On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa
 wrote:
> On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
>> On (05/21/16 02:20), Hannes Frederic Sowa wrote:
>> >
>> > There are some options inherently protocol depending like the jumbo
>> > payload option, which should be under control of the kernel, or the
>> > router alert option for igmp, which causes packets to be steered towards
>> > the slow/software path of routers, which can be used for DoS attacks.
>> >
>> > Setting CALIPSO options in IPv6 on packets as users would defeat the
>> > whole CALIPSO model, etc.
>> >
>> > The RFC3542 requires at least some of the options in dst/hop-by-hop
>>
>> "requires" is a strong word. 3542 declares it as a  "may" (lower case).
>> The only thing required strongly is IPV6_NEXTHOP itself.
>>
>> I suspect 3542 was written at a time when hbh and dst opt were loosely
>> defined and the "may" is just a place-holder (i.e., it's not even a MAY)
>
> My wording directly from the RFC was too strong, true, but given that
> there is a CALIPSO patch already floating around for the kernel and
> those options are strictly controlled by selinux policy and build the
> foundation for the networking separation we can't make it simply
> non-priv.
>
If you don't mind I'll change this to make specific options are
privileged and not all hbh and destopt. There is talk in IETF about
reinventing IP extensibility within UDP since the kernel APIs don't
allow setting EH. I would like to avoid that :-)

> Bye,
> Hannes


Re: [PATCH 0043/1529] Fix typo

2016-05-21 Thread Thadeu Lima de Souza Cascardo
On Sat, May 21, 2016 at 01:41:21PM +0200, Andrea Gelmini wrote:
> Signed-off-by: Andrea Gelmini 
> ---
>  Documentation/isdn/HiSax.cert | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert
> index f2a6fcb..1483cd9d 100644
> --- a/Documentation/isdn/HiSax.cert
> +++ b/Documentation/isdn/HiSax.cert
> @@ -32,7 +32,7 @@ special hardware used will be regarded as approved if at 
> least one
>  solution has been tested including those electrical tests. So if cards 
>  or tas have been completely approved for any other os, the approval
>  for those electrical tests is valid for linux, too.
> -Please send any questions regarding this drivers or approval abouts to 
> +Please send any questions regarding this drivers or approval about to 

Should be "these drivers".

>  wer...@isdn-development.de 
>  Additional information and the type approval documents will be found
>  shortly on the Colognechip website www.colognechip.com 
> -- 
> 2.8.2.534.g1f66975
> 


Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer

2016-05-21 Thread Shuyu Wei
On Thu, May 19, 2016 at 11:15:56PM +0200, Lino Sanfilippo wrote:
>  drivers/net/ethernet/arc/emac_main.c | 33 +
>  1 file changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/arc/emac_main.c 
> b/drivers/net/ethernet/arc/emac_main.c
> index a3a9392..b986499 100644
> --- a/drivers/net/ethernet/arc/emac_main.c
> +++ b/drivers/net/ethernet/arc/emac_main.c
> @@ -162,8 +162,13 @@ static void arc_emac_tx_clean(struct net_device *ndev)
>   struct sk_buff *skb = tx_buff->skb;
>   unsigned int info = le32_to_cpu(txbd->info);
>  
> - if ((info & FOR_EMAC) || !txbd->data || !skb)
> + if (info & FOR_EMAC) {
> + /* Make sure we see consistent values for info, skb
> +  * and data.
> +  */
> + rmb();
>   break;
> + }
>  
>   if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) {
>   stats->tx_errors++;
> @@ -680,35 +685,31 @@ static int arc_emac_tx(struct sk_buff *skb, struct 
> net_device *ndev)
>   dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len);
>  
>   priv->txbd[*txbd_curr].data = cpu_to_le32(addr);
> -
> - /* Make sure pointer to data buffer is set */
> - wmb();
> + priv->tx_buff[*txbd_curr].skb = skb;
>  
>   skb_tx_timestamp(skb);
>  
> - *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
> -
> - /* Make sure info word is set */
> + /* Make sure info is set after data and skb with respect to
> +  * tx_clean().
> +  */
>   wmb();
>  
> - priv->tx_buff[*txbd_curr].skb = skb;
> + *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len);
>  
>   /* Increment index to point to the next BD */
>   *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM;
>  
> - /* Ensure that tx_clean() sees the new txbd_curr before
> + /* 1.Ensure that tx_clean() sees the new txbd_curr before
>* checking the queue status. This prevents an unneeded wake
>* of the queue in tx_clean().
> +  * 2.Ensure that all values are written to RAM and to DMA
> +  * before hardware is informed.
> +  * 3.Ensure we see the most recent value for tx_dirty.
>*/
> - smp_mb();
> + mb();
>  
> - if (!arc_emac_tx_avail(priv)) {
> + if (!arc_emac_tx_avail(priv))
>   netif_stop_queue(ndev);
> - /* Refresh tx_dirty */
> - smp_mb();
> - if (arc_emac_tx_avail(priv))
> - netif_start_queue(ndev);
> - }
>  
>   arc_reg_set(priv, R_STATUS, TXPL_MASK);
> 
> 
Hi Lino,
I applied your patch and tested on my devices, it directly went to
panic. So there must be something wrong.


[   13.084412] rockchip_emac 10204000.ethernet eth0: Link is Up - 100Mbps/Full 
- flow control rx/tx
[   13.093360] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[   13.105915] Unable to handle kernel NULL pointer dereference at virtual 
address 00a8
[   13.114173] pgd = c0004000
[   13.117069] [00a8] *pgd=
[   13.120778] Internal error: Oops: 5 [#1] SMP ARM
[   13.125404] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0+ #88
[   13.131404] Hardware name: Rockchip (Device Tree)
[   13.136108] task: c0804e00 ti: c080 task.ti: c080
[   13.141515] PC is at __dev_kfree_skb_irq+0xc/0x8c
[   13.146223] LR is at arc_emac_poll+0x94/0x584
[   13.150581] pc : []lr : []psr: 60080113
[   13.150581] sp : c0801d88  ip : c0801da0  fp : c0801d9c
[   13.162046] r10: ee092000  r9 : ee093000  r8 : 0008
[   13.167268] r7 :   r6 : 0004  r5 : f08d0400  r4 : 0010
[   13.173789] r3 : 0001  r2 :   r1 : 0001  r0 : 
[   13.180312] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   13.187441] Control: 10c5387d  Table: 8d24c04a  DAC: 0051
[   13.193184] Process swapper/0 (pid: 0, stack limit = 0xc0800210)
[   13.199187] Stack: (0xc0801d88 to 0xc0802000)
[   13.203548] 1d80:   0010 f08d0400 c0801df4 c0801da0 
c03c5e64 c046f588
[   13.211723] 1da0:  ee0cc154  ee0924a8 c0801dcc 0028 
c0806614 f08d0408
[   13.219897] 1dc0: ee0922a8 007f  ee0924a8 c03c5dd0 0028 
012c fffee77c
[   13.228071] 1de0: c0802100 c0801e18 c0801e54 c0801df8 c0471bfc c03c5ddc 
c0801e3c c06e5ae8
[   13.236245] 1e00: c08029f4 c08029f4 c083d821 2e86f000 c07486c0 eefb76c0 
c0801e18 c0801e18
[   13.244419] 1e20: c0801e20 c0801e20 ee841900  c080208c c080 
0100 0003
[   13.252594] 1e40: c0802080 4003 c0801eb4 c0801e58 c012810c c04719fc 
0001 ee808000
[   13.260768] 1e60: c08024a8 0020 c0802100 fffee77b 000a c06017ec 
c083f940 c07432f8
[   13.268941] 1e80: c0802080 c0801e58 c0801eb4 c07463cc   
0001 ee808000
[   13.277115] 1ea0: c08024a8 eefe01c0 c0801ec4 c0801eb8 c01284f8 c0127ff4 
c0801eec c0801ec8
[   

Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?

2016-05-21 Thread Shmulik Ladkani
Hi,

inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not
to request resubmission for local delivery.

For an INET6_PROTO_FINAL handler, the following actions gets executed
prior delivery, in ip6_input_finish:

nf_reset(skb);

skb_postpull_rcsum(skb, skb_network_header(skb),
   skb_network_header_len(skb));

For some reason, l2tp_ip6_protocol handler is NOT marked as
INET6_PROTO_FINAL. Probably an oversight.

Since 'l2tp_ip6_recv' never results in a resubmission, the above actions
are not applied to skbs passed to l2tp_ip6.

Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL?

What's the consequences not executing the above actions for l2tp_ip6
packets?

Thanks,
Shmulik


[PATCH 0051/1529] Fix typo

2016-05-21 Thread Andrea Gelmini
Signed-off-by: Andrea Gelmini 
---
 Documentation/networking/vortex.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/vortex.txt 
b/Documentation/networking/vortex.txt
index 97282da..40f1ab2 100644
--- a/Documentation/networking/vortex.txt
+++ b/Documentation/networking/vortex.txt
@@ -243,7 +243,7 @@ Media selection
 A number of the older NICs such as the 3c590 and 3c900 series have
 10base2 and AUI interfaces.
 
-Prior to January, 2001 this driver would autoeselect the 10base2 or AUI
+Prior to January, 2001 this driver would autoselect the 10base2 or AUI
 port if it didn't detect activity on the 10baseT port.  It would then
 get stuck on the 10base2 port and a driver reload was necessary to
 switch back to 10baseT.  This behaviour could not be prevented with a
-- 
2.8.2.534.g1f66975



[PATCH 0043/1529] Fix typo

2016-05-21 Thread Andrea Gelmini
Signed-off-by: Andrea Gelmini 
---
 Documentation/isdn/HiSax.cert | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert
index f2a6fcb..1483cd9d 100644
--- a/Documentation/isdn/HiSax.cert
+++ b/Documentation/isdn/HiSax.cert
@@ -32,7 +32,7 @@ special hardware used will be regarded as approved if at 
least one
 solution has been tested including those electrical tests. So if cards 
 or tas have been completely approved for any other os, the approval
 for those electrical tests is valid for linux, too.
-Please send any questions regarding this drivers or approval abouts to 
+Please send any questions regarding this drivers or approval about to 
 wer...@isdn-development.de 
 Additional information and the type approval documents will be found
 shortly on the Colognechip website www.colognechip.com 
-- 
2.8.2.534.g1f66975



[PATCH v2 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.

2016-05-21 Thread Haishuang Yan
In gre6 xmit path, we are sending a GRE packet, so set fl6 proto
to IPPROTO_GRE properly.

Signed-off-by: Haishuang Yan 
---
Changes in v2:
  - Initialize the flow protocol in ip6gre_tnl_link_config
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 8ea5a4d..e706621 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int 
set_mtu)
fl6->daddr = p->raddr;
fl6->flowi6_oif = p->link;
fl6->flowlabel = 0;
+   fl6->flowi6_proto = IPPROTO_GRE;
 
if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
-- 
1.8.3.1





[PATCH v2 1/2] ip6_gre: Fix MTU setting for ip6gretap

2016-05-21 Thread Haishuang Yan
When creat an ip6gretap interface with an unreachable route,
the MTU is about 14 bytes larger than what was needed.

If the remote address is reachable:
ping6 2001:0:130::1 -c 2
PING 2001:0:130::1(2001:0:130::1) 56 data bytes
64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms
64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms

--- 2001:0:130::1 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1001ms
rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms

ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
11: ip6gretap1@NONE:  mtu 1434 ...
link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff

The MTU value 1434 is right. But if we delete the direct route:
ip -6 route del 2001:0:130::/64
ping6 2001:0:130::1 -c 2
connect: Network is unreachable
ip link add ip6gretap1 type ip6gretap\
 local 2001:0:130::2 remote 2001:0:130::1
ip link show ip6gretap1
12: ip6gretap1@NONE:  mtu 1448 ...
link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff

Now, the MTU value 1448 is larger than what was needed.

The reason is that if there is a reachable route, when
run following code in ip6gre_tnl_link_config:

if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
  (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));

struct rt6_info *rt = rt6_lookup(t->net,
 &p->raddr, &p->laddr,
 p->link, strict);

if (!rt)
return;

if (rt->dst.dev) {
dev->hard_header_len = rt->dst.dev->hard_header_len +
   t_hlen;

if (set_mtu) {
dev->mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & 
IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
if (dev->type == ARPHRD_ETHER)
dev->mtu -= ETH_HLEN;

if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
}
}
ip6_rt_put(rt);
}

Because rt is not NULL here, so dev->mtu will subtract the ethernet
header length later. But when rt is NULL, it just simply return, so
dev->mtu doesn't update correctly in this situation.

This patch first verify the dev->type is ARPHRD_ETHER for ip6gretap
interface, and then decrease the mtu as early as possible.

Signed-off-by: Haishuang Yan 
---
Changes in v2:
  - Make the commit message more clearer.
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4541fa5..8ea5a4d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device 
*dev)
 
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
+   if (dev->type == ARPHRD_ETHER)
+   dev->mtu -= ETH_HLEN;
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
 
-- 
1.8.3.1





Re: IPv6 extension header privileges

2016-05-21 Thread Sowmini Varadhan
On (05/21/16 11:34), Hannes Frederic Sowa wrote:
> 
> My wording directly from the RFC was too strong, true, but given that
> there is a CALIPSO patch already floating around for the kernel and
> those options are strictly controlled by selinux policy and build the
> foundation for the networking separation we can't make it simply
> non-priv.

sure, I agree with that point.

--Sowmini



Re: IPv6 extension header privileges

2016-05-21 Thread Hannes Frederic Sowa
On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote:
> On (05/21/16 02:20), Hannes Frederic Sowa wrote:
> > 
> > There are some options inherently protocol depending like the jumbo
> > payload option, which should be under control of the kernel, or the
> > router alert option for igmp, which causes packets to be steered towards
> > the slow/software path of routers, which can be used for DoS attacks.
> > 
> > Setting CALIPSO options in IPv6 on packets as users would defeat the
> > whole CALIPSO model, etc.
> > 
> > The RFC3542 requires at least some of the options in dst/hop-by-hop
> 
> "requires" is a strong word. 3542 declares it as a  "may" (lower case).
> The only thing required strongly is IPV6_NEXTHOP itself.
> 
> I suspect 3542 was written at a time when hbh and dst opt were loosely
> defined and the "may" is just a place-holder (i.e., it's not even a MAY)

My wording directly from the RFC was too strong, true, but given that
there is a CALIPSO patch already floating around for the kernel and
those options are strictly controlled by selinux policy and build the
foundation for the networking separation we can't make it simply
non-priv.

Bye,
Hannes