[PATCH net] tipc: fix potential null-ptr dereference bugs in netlink compat functions
Before calling the nla_parse_nested function, its third argument should be checked to make sure it is not null. This patch fixes several potential null pointer dereference vulnerabilities in the tipc netlink functions. Signed-off-by: Baozeng Ding --- net/tipc/netlink_compat.c | 111 ++ 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4dfc5c1..1e18b78 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; - nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], -NULL); + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, nla_data(bearer[TIPC_NLA_BEARER_NAME]), @@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; char *name; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; - nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], -NULL); + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; - nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], -NULL); + err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL); + if (err) + return err; name = (char *)TLV_DATA(msg->req); if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) @@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_link_info link_info; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); + if (err) + return err; link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); @@ -751,6 +781,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; char port_str[27]; struct tipc_name_table_query *ntq; struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; @@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, static const char * const scope_str[] = {"", " zone", " cluster", " node"}; - nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, -attrs[TIPC_NLA_NAME_TABLE], NULL); + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], -NULL); + err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + if (err) + return err;
[PATCH net] tipc: fix potential null pointer dereference in some compat, functions
Before calling the nla_parse_nested function, make sure the pointer to the attribute is not null. This patch fixes several potential null pointer dereference vulnerabilities in the tipc netlink functions. Signed-off-by: Baozeng Ding --- net/tipc/netlink_compat.c | 111 ++ 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4dfc5c1..1e18b78 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; - nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], -NULL); + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, nla_data(bearer[TIPC_NLA_BEARER_NAME]), @@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; char *name; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; - nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], -NULL); + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; - nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], -NULL); + err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL); + if (err) + return err; name = (char *)TLV_DATA(msg->req); if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) @@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_link_info link_info; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); + if (err) + return err; link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); @@ -751,6 +781,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { + int err; char port_str[27]; struct tipc_name_table_query *ntq; struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; @@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, static const char * const scope_str[] = {"", " zone", " cluster", " node"}; - nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, -attrs[TIPC_NLA_NAME_TABLE], NULL); + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], -NULL); + err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + if (err) + return err; ntq =
Re: IPv6 extension header privileges
On 21.05.2016 19:46, Sowmini Varadhan wrote: > Tom Herbert wrote: > If you don't mind I'll change this to make specific options are > privileged and not all hbh and destopt. There is talk in IETF about > reinventing IP extensibility within UDP since the kernel APIs don't > allow setting EH. I would like to avoid that :-) > > Do you mean this > http://www.ietf.org/mail-archive/web/spud/current/msg00365.html > > Maybe I misunderstood that rather long thread, but the author of that > draft seems to be arguing for reinventing tcp congavoid and windowing > on top of udp to bypass kernel? ;-) Hmm, haven't read carefully but isn't that just plain TCP in UDP? I saw extension headers mentioned but haven't grasped why they deem necessary. > Hannes Frederic Sowa wrote: A white list of certain registered IPv6 IANA-options for non-priv whould > > Problem is that APIs are not IANA'ed. > Even RFC 3542 is just Informationaal. > > And even the classic socket API's that come down from BSD are not > ietf'ed or iana'ed. I think I don't completely understand this. IANA is numbering registry and if we have the proper option number allocated we can make sensible decisions and put options on the white list or provide a more complete sensible implementation of the specification in the kernel. E.g. if an option for encapsulation is going to be specified, normal users should not be able to set those, like with CALIPSO or some VNI inside hop-by-hop options. That should probably be controlled by a routing table or a flow matching subsystem, in the kernel. Congestion and windowing information need to be settable and receivable from user space. Unassigned option, we don't know anything about them so far and should probably being blocked for the time being, as we don't know which spec would use the number. I think it is also in favor of the IETF to get those numbers specified and allocated in a proper way, otherwise security won't be manageable at all any more. > Hannes Frederic Sowa wrote: >> Can you give some more details about the planned new options? I think we >> can also open the API up for all options where the fourth bit is set, >> which AFAIK denotes the experimental option space. And only have a >> blacklist for the fourth bit == 0 case. Otherwise this is something IETF >> people probably know more about what an impact this change could have. > > I think it would be ok for some options to be privileged, others to not. > We certainly have precedent for that from the classic socket APIs.. > and man pages etc can document any restrictions, as we do with other > ioctls/sockopts etc. Sure, we can make options unprivileged, we just need to know which one and find out the details how they mix with each other. Also user space should only be allowed to query the options it is allowed to set, too, and not the whole option blocks. Bye, Hannes
Re: [PATCH 0043/1529] Fix typo
I'm not applying a ton of patches that all have the same Subject line. How can anyone looking at the GIT shortlog figure out what might be different amongst any of these 1529 patches? You must prefix your Subject line with the subsystem or area that your patch is changing, followed by a colon character, then a space character. For this specific patch an appropriate subject line would be "[PATCH 0043/1529] hisax: Fix typo."
Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
On 21.05.2016 21:47, Francois Romieu wrote: > Shuyu Wei : > [...] >> diff --git a/drivers/net/ethernet/arc/emac_main.c >> b/drivers/net/ethernet/arc/emac_main.c >> index a3a9392..c2447b0 100644 >> --- a/drivers/net/ethernet/arc/emac_main.c >> +++ b/drivers/net/ethernet/arc/emac_main.c >> @@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct >> net_device *ndev) >> >> skb_tx_timestamp(skb); >> >> + /* Make sure timestamp is set */ >> + smp_wmb(); > > Should be dma_wmb() (see davem's message). > > It's completely unrelated to SMP. > Its also completely unrelated to dma so I doubt that this is what davem meant. As far as I understood he was referring to the dma descriptor. Lino
Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
On 21.05.2016 18:09, Shuyu Wei wrote: > Looks like I got it wrong in the first place. > > priv->tx_buff is not for the device, so there's no need to move it. > The race has been fixed by commit c278c253f3d9, I forgot to check > it out. That's my fault. > > I do find another problem. We need to use a barrier to make sure > skb_tx_timestamp() is called before setting the FOR_EMAC flag. > Shuyu, Could you please test the patch below? I implemented a new approach in which tx_clean uses txbd_curr to determine if there are more descriptors to check or if the loop can be left. Memory barriers on both sides (xmit and clean) should ensure that SKB and info are only accessed if they are valid. I also hope that skb_tx_timestamp is not an issue any more. BTW: concerning get_maintainer Alexander Kochetkov should be CCed for modifications in this area, so thats what I do hereby. --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -162,7 +162,13 @@ static void arc_emac_tx_clean(struct net_device *ndev) struct sk_buff *skb = tx_buff->skb; unsigned int info = le32_to_cpu(txbd->info); - if ((info & FOR_EMAC) || !txbd->data || !skb) + if (info & FOR_EMAC) + break; + + /* Make sure curr pointer is consistent with info */ + rmb(); + + if (*txbd_dirty == priv->txbd_curr) break; if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) { @@ -195,8 +201,8 @@ static void arc_emac_tx_clean(struct net_device *ndev) *txbd_dirty = (*txbd_dirty + 1) % TX_BD_NUM; } - /* Ensure that txbd_dirty is visible to tx() before checking -* for queue stopped. + /* Ensure that txbd_dirty is visible to tx() and we see the most recent +* value for txbd_curr. */ smp_mb(); @@ -680,35 +686,29 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len); priv->txbd[*txbd_curr].data = cpu_to_le32(addr); - - /* Make sure pointer to data buffer is set */ - wmb(); + priv->tx_buff[*txbd_curr].skb = skb; skb_tx_timestamp(skb); *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); - /* Make sure info word is set */ + /* 1. Make sure that with respect to tx_clean everything is set up +* properly before we advance txbd_curr. +* 2. Make sure writes to DMA descriptors are completed before we inform +* the hardware. +*/ wmb(); - priv->tx_buff[*txbd_curr].skb = skb; - /* Increment index to point to the next BD */ *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM; - /* Ensure that tx_clean() sees the new txbd_curr before -* checking the queue status. This prevents an unneeded wake -* of the queue in tx_clean(). + /* Ensure we see the most recent value of txbd_dirty and tx_clean() sees +* the updated value of txbd_curr. */ smp_mb(); - if (!arc_emac_tx_avail(priv)) { + if (!arc_emac_tx_avail(priv)) netif_stop_queue(ndev); - /* Refresh tx_dirty */ - smp_mb(); - if (arc_emac_tx_avail(priv)) - netif_start_queue(ndev); - } arc_reg_set(priv, R_STATUS, TXPL_MASK); -- 1.9.1
Re: Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?
On Sat, 21 May 2016 17:55:59 +0200 Hannes Frederic Sowa wrote: > On 21.05.2016 14:50, Shmulik Ladkani wrote: > > Hi, > > > > inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not > > to request resubmission for local delivery. > > > > For an INET6_PROTO_FINAL handler, the following actions gets executed > > prior delivery, in ip6_input_finish: > > > > nf_reset(skb); > > > > skb_postpull_rcsum(skb, skb_network_header(skb), > >skb_network_header_len(skb)); > > > > For some reason, l2tp_ip6_protocol handler is NOT marked as > > INET6_PROTO_FINAL. Probably an oversight. > > > > Since 'l2tp_ip6_recv' never results in a resubmission, the above actions > > are not applied to skbs passed to l2tp_ip6. > > > > Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL? > > I don't see any specific reason why it shouldn't be a INET6_PROTO_FINAL. > Anyway, receive path of L2TPv3 without UDP encapsulation doesn't deal > with checksums anyway, as far as I know. > > > What's the consequences not executing the above actions for l2tp_ip6 > > packets? > > Probably not a whole lot in this case. OK, so the skb_postpull_rcsum is irrelevant for IPPROTO_L2TP over ipv6. However, one more thing WRT to INET6_PROTO_FINAL not being set - we're also missing the multicast filtering of 'ip6_input_finish': if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, &hdr->saddr) && !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; I assume no reason to allow multicast daddr which aren't in the mc_list (or saddr excluded) to pass up into 'l2tp_ip6_recv'?
Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
Shuyu Wei : [...] > diff --git a/drivers/net/ethernet/arc/emac_main.c > b/drivers/net/ethernet/arc/emac_main.c > index a3a9392..c2447b0 100644 > --- a/drivers/net/ethernet/arc/emac_main.c > +++ b/drivers/net/ethernet/arc/emac_main.c > @@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct > net_device *ndev) > > skb_tx_timestamp(skb); > > + /* Make sure timestamp is set */ > + smp_wmb(); Should be dma_wmb() (see davem's message). It's completely unrelated to SMP. -- Ueimor
Re: [PATCH net] uapi glibc compat: fix compilation when !__USE_MISC in glibc
On Thu, May 19, 2016 at 05:26:29PM +0200, Nicolas Dichtel wrote: > These structures are defined only if __USE_MISC is set in glibc net/if.h > headers, ie when _BSD_SOURCE or _SVID_SOURCE are defined. Also, reading /usr/include/features.h from Debian glibc 2.22-4: ... The `-ansi' switch to the GNU C compiler, and standards conformance options such as `-std=c99', define __STRICT_ANSI__. If none of these are defined, or if _DEFAULT_SOURCE is defined, the default is to have _POSIX_SOURCE set to one and _POSIX_C_SOURCE set to 200809L, as well as enabling miscellaneous functions from BSD and SVID. If more than one of these are defined, they accumulate. For example __STRICT_ANSI__, _POSIX_SOURCE and _POSIX_C_SOURCE together give you ISO C, 1003.1, and 1003.2, but nothing else. ... /* _BSD_SOURCE and _SVID_SOURCE are deprecated aliases for _DEFAULT_SOURCE. ... In the compile test I go with the defaults. I guess I should test all of these feature variants and in the future check deeper into the glibc ifdefs. > CC: Jan Engelhardt > CC: Josh Boyer > CC: Stephen Hemminger > CC: Waldemar Brodkorb > CC: Gabriel Laskar > CC: Mikko Rapeli > Fixes: 4a91cb61bb99 ("uapi glibc compat: fix compile errors when glibc > net/if.h included before linux/if.h") > Signed-off-by: Nicolas Dichtel Signed-off-by: Mikko Rapeli > --- > include/uapi/linux/libc-compat.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/include/uapi/linux/libc-compat.h > b/include/uapi/linux/libc-compat.h > index d5e38c73377c..e4f048ee7043 100644 > --- a/include/uapi/linux/libc-compat.h > +++ b/include/uapi/linux/libc-compat.h > @@ -52,7 +52,7 @@ > #if defined(__GLIBC__) > > /* Coordinate with glibc net/if.h header. */ > -#if defined(_NET_IF_H) > +#if defined(_NET_IF_H) && defined(__USE_MISC) > > /* GLIBC headers included first so don't define anything > * that would already be defined. */ > -- > 2.8.1 >
Re: IPv6 extension header privileges
Tom Herbert wrote: > >>> If you don't mind I'll change this to make specific options are > >>> privileged and not all hbh and destopt. There is talk in IETF about > >>> reinventing IP extensibility within UDP since the kernel APIs don't > >>> allow setting EH. I would like to avoid that :-) Do you mean this http://www.ietf.org/mail-archive/web/spud/current/msg00365.html Maybe I misunderstood that rather long thread, but the author of that draft seems to be arguing for reinventing tcp congavoid and windowing on top of udp to bypass kernel? ;-) Hannes Frederic Sowa wrote: > >> A white list of certain registered IPv6 IANA-options for non-priv whould Problem is that APIs are not IANA'ed. Even RFC 3542 is just Informationaal. And even the classic socket API's that come down from BSD are not ietf'ed or iana'ed. Hannes Frederic Sowa wrote: > Can you give some more details about the planned new options? I think we > can also open the API up for all options where the fourth bit is set, > which AFAIK denotes the experimental option space. And only have a > blacklist for the fourth bit == 0 case. Otherwise this is something IETF > people probably know more about what an impact this change could have. I think it would be ok for some options to be privileged, others to not. We certainly have precedent for that from the classic socket APIs.. and man pages etc can document any restrictions, as we do with other ioctls/sockopts etc. --Sowmini
Re: IPv6 extension header privileges
On 21.05.2016 18:00, Tom Herbert wrote: > On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa > wrote: >> On 21.05.2016 17:19, Tom Herbert wrote: >>> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa >>> wrote: On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: > On (05/21/16 02:20), Hannes Frederic Sowa wrote: >> >> There are some options inherently protocol depending like the jumbo >> payload option, which should be under control of the kernel, or the >> router alert option for igmp, which causes packets to be steered towards >> the slow/software path of routers, which can be used for DoS attacks. >> >> Setting CALIPSO options in IPv6 on packets as users would defeat the >> whole CALIPSO model, etc. >> >> The RFC3542 requires at least some of the options in dst/hop-by-hop > > "requires" is a strong word. 3542 declares it as a "may" (lower case). > The only thing required strongly is IPV6_NEXTHOP itself. > > I suspect 3542 was written at a time when hbh and dst opt were loosely > defined and the "may" is just a place-holder (i.e., it's not even a MAY) My wording directly from the RFC was too strong, true, but given that there is a CALIPSO patch already floating around for the kernel and those options are strictly controlled by selinux policy and build the foundation for the networking separation we can't make it simply non-priv. >>> If you don't mind I'll change this to make specific options are >>> privileged and not all hbh and destopt. There is talk in IETF about >>> reinventing IP extensibility within UDP since the kernel APIs don't >>> allow setting EH. I would like to avoid that :-) >> >> Hehe, certainly. >> >> A white list of certain registered IPv6 IANA-options for non-priv whould >> certainly fly in my opinion. That is what I meant with "More >> fine-grained parsing and setting of those options has never been >> implemented." from my first mail. >> >> I am not that certain about a blacklist though, but haven't thought >> about that enough. I didn't yet get around to review other options, but >> basically people could use private options in some proprietary settings >> and we could break their assumptions by such a change. >> >> Would a white list be sufficient? >> > Probably not. The "kernel is the problem" community always seem to be > looking for even the slightest API or implementation deficiency to > justify bypassing the kernel entirely. :-( Another issue, which could come up also is the ordering options during the merge of kernel policy and user space provided options, hmpf... Maybe a way out of it is to use some kind of sysctl, capability, or whatever, depending on the use cases? Bye, Hannes
[PATCH] net/tipc: fix potential null pointer dereference in several tipc netlink compat functions
Before calling the nla_parse_nested funciton, its third argument should be checked to make sure it is not null. This patch fixes several potential null pointer dereference vulnerability in the tipc netlink functions. Signed-off-by: Baozeng Ding --- net/tipc/netlink_compat.c | 111 ++ 1 file changed, 93 insertions(+), 18 deletions(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 4dfc5c1..959e989 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -345,10 +345,16 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { +int err; struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; -nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], - NULL); +if (!attrs[TIPC_NLA_BEARER]) +return -EINVAL; + +err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL); +if (err) +return err; return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, nla_data(bearer[TIPC_NLA_BEARER_NAME]), @@ -456,18 +462,35 @@ static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { +int err; char *name; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; -nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); +if (!attrs[TIPC_NLA_LINK]) +return -EINVAL; -nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], - NULL); +err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); +if (err) +return err; + +if (!link[TIPC_NLA_LINK_PROP]) +return -EINVAL; -nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], - NULL); +err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL); +if (err) +return err; + +if (!link[TIPC_NLA_LINK_STATS]) +return -EINVAL; + +err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL); +if (err) +return err; name = (char *)TLV_DATA(msg->req); if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) @@ -567,10 +590,17 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { +int err; struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_link_info link_info; -nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); +if (!attrs[TIPC_NLA_LINK]) +return -EINVAL; + +err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], NULL); +if (err) +return err; link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); @@ -751,6 +781,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { +int err; char port_str[27]; struct tipc_name_table_query *ntq; struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; @@ -759,11 +790,21 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, static const char * const scope_str[] = {"", " zone", " cluster", " node"}; -nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL); +if (!attrs[TIPC_NLA_NAME_TABLE]) +return -EINVAL; -nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], - NULL); +err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); +if (err) +return err; + +if (!attrs[TIPC_NLA_NAME_TABLE]) +return -EINVAL; + +err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); +if (err) +return err; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); @@ -813,10 +854,17 @@ out: static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { +int err; u32 type, lower, upper; struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; -nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); +if (!attrs[TIPC_NLA_PUBL]) +return -EINVAL; + +err = nla_parse_nested(publ, TIPC_NLA_P
Re: IPv6 extension header privileges
On 21.05.2016 18:00, Tom Herbert wrote: > On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa > wrote: >> On 21.05.2016 17:19, Tom Herbert wrote: >>> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa >>> wrote: On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: > On (05/21/16 02:20), Hannes Frederic Sowa wrote: >> >> There are some options inherently protocol depending like the jumbo >> payload option, which should be under control of the kernel, or the >> router alert option for igmp, which causes packets to be steered towards >> the slow/software path of routers, which can be used for DoS attacks. >> >> Setting CALIPSO options in IPv6 on packets as users would defeat the >> whole CALIPSO model, etc. >> >> The RFC3542 requires at least some of the options in dst/hop-by-hop > > "requires" is a strong word. 3542 declares it as a "may" (lower case). > The only thing required strongly is IPV6_NEXTHOP itself. > > I suspect 3542 was written at a time when hbh and dst opt were loosely > defined and the "may" is just a place-holder (i.e., it's not even a MAY) My wording directly from the RFC was too strong, true, but given that there is a CALIPSO patch already floating around for the kernel and those options are strictly controlled by selinux policy and build the foundation for the networking separation we can't make it simply non-priv. >>> If you don't mind I'll change this to make specific options are >>> privileged and not all hbh and destopt. There is talk in IETF about >>> reinventing IP extensibility within UDP since the kernel APIs don't >>> allow setting EH. I would like to avoid that :-) >> >> Hehe, certainly. >> >> A white list of certain registered IPv6 IANA-options for non-priv whould >> certainly fly in my opinion. That is what I meant with "More >> fine-grained parsing and setting of those options has never been >> implemented." from my first mail. >> >> I am not that certain about a blacklist though, but haven't thought >> about that enough. I didn't yet get around to review other options, but >> basically people could use private options in some proprietary settings >> and we could break their assumptions by such a change. >> >> Would a white list be sufficient? >> > Probably not. The "kernel is the problem" community always seem to be > looking for even the slightest API or implementation deficiency to > justify bypassing the kernel entirely. :-( Hmm, I see... Can you give some more details about the planned new options? I think we can also open the API up for all options where the fourth bit is set, which AFAIK denotes the experimental option space. And only have a blacklist for the fourth bit == 0 case. Otherwise this is something IETF people probably know more about what an impact this change could have. Bye, Hannes
Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
Looks like I got it wrong in the first place. priv->tx_buff is not for the device, so there's no need to move it. The race has been fixed by commit c278c253f3d9, I forgot to check it out. That's my fault. I do find another problem. We need to use a barrier to make sure skb_tx_timestamp() is called before setting the FOR_EMAC flag. According to the comment(include/linux/skbuff.h): >/** > * skb_tx_timestamp() - Driver hook for transmit timestamping > * > * Ethernet MAC Drivers should call this function in their hard_xmit() > * function immediately before giving the sk_buff to the MAC hardware. > * > * Specifically, one should make absolutely sure that this function is > * called before TX completion of this packet can trigger. Otherwise > * the packet could potentially already be freed. > * > * @skb: A socket buffer. > */ --- diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index a3a9392..c2447b0 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -686,6 +686,9 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev) skb_tx_timestamp(skb); + /* Make sure timestamp is set */ + smp_wmb(); + *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); /* Make sure info word is set */
Re: IPv6 extension header privileges
On Sat, May 21, 2016 at 8:33 AM, Hannes Frederic Sowa wrote: > On 21.05.2016 17:19, Tom Herbert wrote: >> On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa >> wrote: >>> On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: On (05/21/16 02:20), Hannes Frederic Sowa wrote: > > There are some options inherently protocol depending like the jumbo > payload option, which should be under control of the kernel, or the > router alert option for igmp, which causes packets to be steered towards > the slow/software path of routers, which can be used for DoS attacks. > > Setting CALIPSO options in IPv6 on packets as users would defeat the > whole CALIPSO model, etc. > > The RFC3542 requires at least some of the options in dst/hop-by-hop "requires" is a strong word. 3542 declares it as a "may" (lower case). The only thing required strongly is IPV6_NEXTHOP itself. I suspect 3542 was written at a time when hbh and dst opt were loosely defined and the "may" is just a place-holder (i.e., it's not even a MAY) >>> >>> My wording directly from the RFC was too strong, true, but given that >>> there is a CALIPSO patch already floating around for the kernel and >>> those options are strictly controlled by selinux policy and build the >>> foundation for the networking separation we can't make it simply >>> non-priv. >>> >> If you don't mind I'll change this to make specific options are >> privileged and not all hbh and destopt. There is talk in IETF about >> reinventing IP extensibility within UDP since the kernel APIs don't >> allow setting EH. I would like to avoid that :-) > > Hehe, certainly. > > A white list of certain registered IPv6 IANA-options for non-priv whould > certainly fly in my opinion. That is what I meant with "More > fine-grained parsing and setting of those options has never been > implemented." from my first mail. > > I am not that certain about a blacklist though, but haven't thought > about that enough. I didn't yet get around to review other options, but > basically people could use private options in some proprietary settings > and we could break their assumptions by such a change. > > Would a white list be sufficient? > Probably not. The "kernel is the problem" community always seem to be looking for even the slightest API or implementation deficiency to justify bypassing the kernel entirely. :-( Tom > Bye, > Hannes >
Re: Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?
On 21.05.2016 14:50, Shmulik Ladkani wrote: > Hi, > > inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not > to request resubmission for local delivery. > > For an INET6_PROTO_FINAL handler, the following actions gets executed > prior delivery, in ip6_input_finish: > > nf_reset(skb); > > skb_postpull_rcsum(skb, skb_network_header(skb), > skb_network_header_len(skb)); > > For some reason, l2tp_ip6_protocol handler is NOT marked as > INET6_PROTO_FINAL. Probably an oversight. > > Since 'l2tp_ip6_recv' never results in a resubmission, the above actions > are not applied to skbs passed to l2tp_ip6. > > Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL? I don't see any specific reason why it shouldn't be a INET6_PROTO_FINAL. Anyway, receive path of L2TPv3 without UDP encapsulation doesn't deal with checksums anyway, as far as I know. > What's the consequences not executing the above actions for l2tp_ip6 > packets? Probably not a whole lot in this case. Bye, Hannes
Re: 4.5.0 on sun7i-a20-olinuxino-lime2: libphy: PHY stmmac-0:ffffffff not found (regression from rc7)
Hi, On 2016-05-20 12:36, Marc Zyngier wrote: On 20/05/16 11:30, Andre Heider wrote: Hi, On Fri, May 20, 2016 at 10:14 AM, Giuseppe CAVALLARO wrote: On 5/20/2016 9:56 AM, Marc Zyngier wrote: On 20/05/16 06:44, Andre Heider wrote: Giuseppe, Alexandre, et al., On Thu, Mar 17, 2016 at 8:52 AM, Marc Zyngier wrote: On Thu, 17 Mar 2016 00:56:40 +0100 Bert Lindner wrote: On 2016-03-16 18:42, Marc Zyngier wrote: On 16/03/16 15:10, Bert Lindner wrote: On 2016-03-16 14:10, Andreas Färber wrote: Am 16.03.2016 um 13:09 schrieb Robin Murphy: On 16/03/16 11:39, Marc Zyngier wrote: On 16/03/16 11:19, Bert Lindner wrote: ... For the board sun7i-a20-olinuxino-lime2, there seems to be a problem with the eth0 PHY in mainline kernel 4.5.0 that developed since 4.5.0-rc7. Ethernet does not work, although eth0 is reported: ... [9.767125] NET: Registered protocol family 10 [ 10.357405] libphy: PHY stmmac-0: not found [ 10.362382] eth0: Could not attach to PHY [ 10.366557] stmmac_open: Cannot attach to PHY (error: -19) ... v4 fixes for 4.5 are here: https://patchwork.ozlabs.org/patch/598195/ (revert) https://patchwork.ozlabs.org/patch/598196/ ... Good to know, thanks. Could you also give the potential fix a go (as mentioned by Andreas)? Just to make sure that whatever gets merged next will actually fix the issue. Yes sure, it took a while because I had to travel. Confirmed, the v4-for-4.5 fix works well for me, on sun7i-a20-olinuxino-lime2: root@lime2-079f:~# cat /proc/version Linux version 4.5.0-598195-598196-v4 (root@lime2-079f) (gcc version 4.9.1 (Ubuntu/Linaro 4.9.1-16ubuntu6) ) #1 SMP Wed Mar 16 16:44:22 UTC 2016 dmesg: [8.245273] NET: Registered protocol family 10 [9.297406] RX IPC Checksum Offload disabled [9.297460] No MAC Management Counters available [9.297951] IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready [ 16.285658] sun7i-dwmac 1c5.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx [ 16.285798] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready The board is connected to my laptop rather than to a switch, so that might be where the flow control message comes from (not sure). Anyway ethernet works. Cool, many thanks for taking the time to test and report. Hopefully Giuseppe will get this merged quickly enough in mainline, and it should then trickle into a 4.5-stable release (cc-ing stable on these patches would probably be a good idea, BTW). stmmac is broken on at least Lime2, BananaPi and Cubieboard2 since v4.5 [0], including all five stable releases :( All the A20 platforms are dead, actually. The v4.5 patches quoted above are already +4 weeks old, could we please get them into stable? For that, the maintainer would have needed to CC stable, which he didn't. I'd expect someone who cares to send these patches to stable. It'd be better if the maintainer would do it himself though. sure, I can send the patches to stable (sorry if I missed to add stable ML on CC). Andre, I have not clear if the train of patches actually fix the issue or if you need my support to fix something else. In that case I need some input for debugging (e.g. kernel log). Bert already confirmed that those two patches fixes stmmac on his Lime2, so I assume that it fixes the issue for all A20 platforms. let me know, is it enough to re-send the patches only? Just a resend with cc:stable :) Not quite. Please read Documentation/stable_kernel_rules.txt, and the section that concerns networking patches (and then consult Documentation/networking/netdev-FAQ.txt which has all the details). FWIW, recent 4.6-rc series and 4.6.0 have worked fine for me and the lime2. Had not tried 4.5.x again. Best, -bert
Re: IPv6 extension header privileges
On 21.05.2016 17:19, Tom Herbert wrote: > On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa > wrote: >> On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: >>> On (05/21/16 02:20), Hannes Frederic Sowa wrote: There are some options inherently protocol depending like the jumbo payload option, which should be under control of the kernel, or the router alert option for igmp, which causes packets to be steered towards the slow/software path of routers, which can be used for DoS attacks. Setting CALIPSO options in IPv6 on packets as users would defeat the whole CALIPSO model, etc. The RFC3542 requires at least some of the options in dst/hop-by-hop >>> >>> "requires" is a strong word. 3542 declares it as a "may" (lower case). >>> The only thing required strongly is IPV6_NEXTHOP itself. >>> >>> I suspect 3542 was written at a time when hbh and dst opt were loosely >>> defined and the "may" is just a place-holder (i.e., it's not even a MAY) >> >> My wording directly from the RFC was too strong, true, but given that >> there is a CALIPSO patch already floating around for the kernel and >> those options are strictly controlled by selinux policy and build the >> foundation for the networking separation we can't make it simply >> non-priv. >> > If you don't mind I'll change this to make specific options are > privileged and not all hbh and destopt. There is talk in IETF about > reinventing IP extensibility within UDP since the kernel APIs don't > allow setting EH. I would like to avoid that :-) Hehe, certainly. A white list of certain registered IPv6 IANA-options for non-priv whould certainly fly in my opinion. That is what I meant with "More fine-grained parsing and setting of those options has never been implemented." from my first mail. I am not that certain about a blacklist though, but haven't thought about that enough. I didn't yet get around to review other options, but basically people could use private options in some proprietary settings and we could break their assumptions by such a change. Would a white list be sufficient? Bye, Hannes
Re: IPv6 extension header privileges
On Sat, May 21, 2016 at 2:34 AM, Hannes Frederic Sowa wrote: > On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: >> On (05/21/16 02:20), Hannes Frederic Sowa wrote: >> > >> > There are some options inherently protocol depending like the jumbo >> > payload option, which should be under control of the kernel, or the >> > router alert option for igmp, which causes packets to be steered towards >> > the slow/software path of routers, which can be used for DoS attacks. >> > >> > Setting CALIPSO options in IPv6 on packets as users would defeat the >> > whole CALIPSO model, etc. >> > >> > The RFC3542 requires at least some of the options in dst/hop-by-hop >> >> "requires" is a strong word. 3542 declares it as a "may" (lower case). >> The only thing required strongly is IPV6_NEXTHOP itself. >> >> I suspect 3542 was written at a time when hbh and dst opt were loosely >> defined and the "may" is just a place-holder (i.e., it's not even a MAY) > > My wording directly from the RFC was too strong, true, but given that > there is a CALIPSO patch already floating around for the kernel and > those options are strictly controlled by selinux policy and build the > foundation for the networking separation we can't make it simply > non-priv. > If you don't mind I'll change this to make specific options are privileged and not all hbh and destopt. There is talk in IETF about reinventing IP extensibility within UDP since the kernel APIs don't allow setting EH. I would like to avoid that :-) > Bye, > Hannes
Re: [PATCH 0043/1529] Fix typo
On Sat, May 21, 2016 at 01:41:21PM +0200, Andrea Gelmini wrote: > Signed-off-by: Andrea Gelmini > --- > Documentation/isdn/HiSax.cert | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert > index f2a6fcb..1483cd9d 100644 > --- a/Documentation/isdn/HiSax.cert > +++ b/Documentation/isdn/HiSax.cert > @@ -32,7 +32,7 @@ special hardware used will be regarded as approved if at > least one > solution has been tested including those electrical tests. So if cards > or tas have been completely approved for any other os, the approval > for those electrical tests is valid for linux, too. > -Please send any questions regarding this drivers or approval abouts to > +Please send any questions regarding this drivers or approval about to Should be "these drivers". > wer...@isdn-development.de > Additional information and the type approval documents will be found > shortly on the Colognechip website www.colognechip.com > -- > 2.8.2.534.g1f66975 >
Re: [PATCH v2] ethernet:arc: Fix racing of TX ring buffer
On Thu, May 19, 2016 at 11:15:56PM +0200, Lino Sanfilippo wrote: > drivers/net/ethernet/arc/emac_main.c | 33 + > 1 file changed, 17 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/ethernet/arc/emac_main.c > b/drivers/net/ethernet/arc/emac_main.c > index a3a9392..b986499 100644 > --- a/drivers/net/ethernet/arc/emac_main.c > +++ b/drivers/net/ethernet/arc/emac_main.c > @@ -162,8 +162,13 @@ static void arc_emac_tx_clean(struct net_device *ndev) > struct sk_buff *skb = tx_buff->skb; > unsigned int info = le32_to_cpu(txbd->info); > > - if ((info & FOR_EMAC) || !txbd->data || !skb) > + if (info & FOR_EMAC) { > + /* Make sure we see consistent values for info, skb > + * and data. > + */ > + rmb(); > break; > + } > > if (unlikely(info & (DROP | DEFR | LTCL | UFLO))) { > stats->tx_errors++; > @@ -680,35 +685,31 @@ static int arc_emac_tx(struct sk_buff *skb, struct > net_device *ndev) > dma_unmap_len_set(&priv->tx_buff[*txbd_curr], len, len); > > priv->txbd[*txbd_curr].data = cpu_to_le32(addr); > - > - /* Make sure pointer to data buffer is set */ > - wmb(); > + priv->tx_buff[*txbd_curr].skb = skb; > > skb_tx_timestamp(skb); > > - *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); > - > - /* Make sure info word is set */ > + /* Make sure info is set after data and skb with respect to > + * tx_clean(). > + */ > wmb(); > > - priv->tx_buff[*txbd_curr].skb = skb; > + *info = cpu_to_le32(FOR_EMAC | FIRST_OR_LAST_MASK | len); > > /* Increment index to point to the next BD */ > *txbd_curr = (*txbd_curr + 1) % TX_BD_NUM; > > - /* Ensure that tx_clean() sees the new txbd_curr before > + /* 1.Ensure that tx_clean() sees the new txbd_curr before >* checking the queue status. This prevents an unneeded wake >* of the queue in tx_clean(). > + * 2.Ensure that all values are written to RAM and to DMA > + * before hardware is informed. > + * 3.Ensure we see the most recent value for tx_dirty. >*/ > - smp_mb(); > + mb(); > > - if (!arc_emac_tx_avail(priv)) { > + if (!arc_emac_tx_avail(priv)) > netif_stop_queue(ndev); > - /* Refresh tx_dirty */ > - smp_mb(); > - if (arc_emac_tx_avail(priv)) > - netif_start_queue(ndev); > - } > > arc_reg_set(priv, R_STATUS, TXPL_MASK); > > Hi Lino, I applied your patch and tested on my devices, it directly went to panic. So there must be something wrong. [ 13.084412] rockchip_emac 10204000.ethernet eth0: Link is Up - 100Mbps/Full - flow control rx/tx [ 13.093360] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [ 13.105915] Unable to handle kernel NULL pointer dereference at virtual address 00a8 [ 13.114173] pgd = c0004000 [ 13.117069] [00a8] *pgd= [ 13.120778] Internal error: Oops: 5 [#1] SMP ARM [ 13.125404] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0+ #88 [ 13.131404] Hardware name: Rockchip (Device Tree) [ 13.136108] task: c0804e00 ti: c080 task.ti: c080 [ 13.141515] PC is at __dev_kfree_skb_irq+0xc/0x8c [ 13.146223] LR is at arc_emac_poll+0x94/0x584 [ 13.150581] pc : []lr : []psr: 60080113 [ 13.150581] sp : c0801d88 ip : c0801da0 fp : c0801d9c [ 13.162046] r10: ee092000 r9 : ee093000 r8 : 0008 [ 13.167268] r7 : r6 : 0004 r5 : f08d0400 r4 : 0010 [ 13.173789] r3 : 0001 r2 : r1 : 0001 r0 : [ 13.180312] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 13.187441] Control: 10c5387d Table: 8d24c04a DAC: 0051 [ 13.193184] Process swapper/0 (pid: 0, stack limit = 0xc0800210) [ 13.199187] Stack: (0xc0801d88 to 0xc0802000) [ 13.203548] 1d80: 0010 f08d0400 c0801df4 c0801da0 c03c5e64 c046f588 [ 13.211723] 1da0: ee0cc154 ee0924a8 c0801dcc 0028 c0806614 f08d0408 [ 13.219897] 1dc0: ee0922a8 007f ee0924a8 c03c5dd0 0028 012c fffee77c [ 13.228071] 1de0: c0802100 c0801e18 c0801e54 c0801df8 c0471bfc c03c5ddc c0801e3c c06e5ae8 [ 13.236245] 1e00: c08029f4 c08029f4 c083d821 2e86f000 c07486c0 eefb76c0 c0801e18 c0801e18 [ 13.244419] 1e20: c0801e20 c0801e20 ee841900 c080208c c080 0100 0003 [ 13.252594] 1e40: c0802080 4003 c0801eb4 c0801e58 c012810c c04719fc 0001 ee808000 [ 13.260768] 1e60: c08024a8 0020 c0802100 fffee77b 000a c06017ec c083f940 c07432f8 [ 13.268941] 1e80: c0802080 c0801e58 c0801eb4 c07463cc 0001 ee808000 [ 13.277115] 1ea0: c08024a8 eefe01c0 c0801ec4 c0801eb8 c01284f8 c0127ff4 c0801eec c0801ec8 [
Missing INET6_PROTO_FINAL in l2tp_ip6_protocol?
Hi, inet6_protocol's INET6_PROTO_FINAL flag denotes handler is expected not to request resubmission for local delivery. For an INET6_PROTO_FINAL handler, the following actions gets executed prior delivery, in ip6_input_finish: nf_reset(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); For some reason, l2tp_ip6_protocol handler is NOT marked as INET6_PROTO_FINAL. Probably an oversight. Since 'l2tp_ip6_recv' never results in a resubmission, the above actions are not applied to skbs passed to l2tp_ip6. Any reason why l2tp_ip6_protocol should NOT be marked INET6_PROTO_FINAL? What's the consequences not executing the above actions for l2tp_ip6 packets? Thanks, Shmulik
[PATCH 0051/1529] Fix typo
Signed-off-by: Andrea Gelmini --- Documentation/networking/vortex.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 97282da..40f1ab2 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -243,7 +243,7 @@ Media selection A number of the older NICs such as the 3c590 and 3c900 series have 10base2 and AUI interfaces. -Prior to January, 2001 this driver would autoeselect the 10base2 or AUI +Prior to January, 2001 this driver would autoselect the 10base2 or AUI port if it didn't detect activity on the 10baseT port. It would then get stuck on the 10base2 port and a driver reload was necessary to switch back to 10baseT. This behaviour could not be prevented with a -- 2.8.2.534.g1f66975
[PATCH 0043/1529] Fix typo
Signed-off-by: Andrea Gelmini --- Documentation/isdn/HiSax.cert | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert index f2a6fcb..1483cd9d 100644 --- a/Documentation/isdn/HiSax.cert +++ b/Documentation/isdn/HiSax.cert @@ -32,7 +32,7 @@ special hardware used will be regarded as approved if at least one solution has been tested including those electrical tests. So if cards or tas have been completely approved for any other os, the approval for those electrical tests is valid for linux, too. -Please send any questions regarding this drivers or approval abouts to +Please send any questions regarding this drivers or approval about to wer...@isdn-development.de Additional information and the type approval documents will be found shortly on the Colognechip website www.colognechip.com -- 2.8.2.534.g1f66975
[PATCH v2 2/2] ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.
In gre6 xmit path, we are sending a GRE packet, so set fl6 proto to IPPROTO_GRE properly. Signed-off-by: Haishuang Yan --- Changes in v2: - Initialize the flow protocol in ip6gre_tnl_link_config --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8ea5a4d..e706621 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; + fl6->flowi6_proto = IPPROTO_GRE; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; -- 1.8.3.1
[PATCH v2 1/2] ip6_gre: Fix MTU setting for ip6gretap
When creat an ip6gretap interface with an unreachable route, the MTU is about 14 bytes larger than what was needed. If the remote address is reachable: ping6 2001:0:130::1 -c 2 PING 2001:0:130::1(2001:0:130::1) 56 data bytes 64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms 64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms --- 2001:0:130::1 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1001ms rtt min/avg/max/mdev = 1.465/41.316/81.167/39.851 ms ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 11: ip6gretap1@NONE: mtu 1434 ... link/ether c2:f3:f8:c1:2c:bf brd ff:ff:ff:ff:ff:ff The MTU value 1434 is right. But if we delete the direct route: ip -6 route del 2001:0:130::/64 ping6 2001:0:130::1 -c 2 connect: Network is unreachable ip link add ip6gretap1 type ip6gretap\ local 2001:0:130::2 remote 2001:0:130::1 ip link show ip6gretap1 12: ip6gretap1@NONE: mtu 1448 ... link/ether 7e:e1:d2:c4:06:5e brd ff:ff:ff:ff:ff:ff Now, the MTU value 1448 is larger than what was needed. The reason is that if there is a reachable route, when run following code in ip6gre_tnl_link_config: if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); if (!rt) return; if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + t_hlen; if (set_mtu) { dev->mtu = rt->dst.dev->mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } } ip6_rt_put(rt); } Because rt is not NULL here, so dev->mtu will subtract the ethernet header length later. But when rt is NULL, it just simply return, so dev->mtu doesn't update correctly in this situation. This patch first verify the dev->type is ARPHRD_ETHER for ip6gretap interface, and then decrease the mtu as early as possible. Signed-off-by: Haishuang Yan --- Changes in v2: - Make the commit message more clearer. --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4541fa5..8ea5a4d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1029,6 +1029,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + if (dev->type == ARPHRD_ETHER) + dev->mtu -= ETH_HLEN; if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; -- 1.8.3.1
Re: IPv6 extension header privileges
On (05/21/16 11:34), Hannes Frederic Sowa wrote: > > My wording directly from the RFC was too strong, true, but given that > there is a CALIPSO patch already floating around for the kernel and > those options are strictly controlled by selinux policy and build the > foundation for the networking separation we can't make it simply > non-priv. sure, I agree with that point. --Sowmini
Re: IPv6 extension header privileges
On Sat, May 21, 2016, at 03:56, Sowmini Varadhan wrote: > On (05/21/16 02:20), Hannes Frederic Sowa wrote: > > > > There are some options inherently protocol depending like the jumbo > > payload option, which should be under control of the kernel, or the > > router alert option for igmp, which causes packets to be steered towards > > the slow/software path of routers, which can be used for DoS attacks. > > > > Setting CALIPSO options in IPv6 on packets as users would defeat the > > whole CALIPSO model, etc. > > > > The RFC3542 requires at least some of the options in dst/hop-by-hop > > "requires" is a strong word. 3542 declares it as a "may" (lower case). > The only thing required strongly is IPV6_NEXTHOP itself. > > I suspect 3542 was written at a time when hbh and dst opt were loosely > defined and the "may" is just a place-holder (i.e., it's not even a MAY) My wording directly from the RFC was too strong, true, but given that there is a CALIPSO patch already floating around for the kernel and those options are strictly controlled by selinux policy and build the foundation for the networking separation we can't make it simply non-priv. Bye, Hannes