[PATCH net V3] openvswitch: fix skb_panic due to the incorrect actions attrlen

2017-08-15 Thread Liping Zhang
From: Liping Zhang 

For sw_flow_actions, the actions_len only represents the kernel part's
size, and when we dump the actions to the userspace, we will do the
convertions, so it's true size may become bigger than the actions_len.

But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len
to alloc the skbuff, so the user_skb's size may become insufficient and
oops will happen like this:
  skbuff: skb_over_panic: text:8148fabf len:1749 put:157 head:
  881300f39000 data:881300f39000 tail:0x6d5 end:0x6c0 dev:
  [ cut here ]
  kernel BUG at net/core/skbuff.c:129!
  [...]
  Call Trace:
   
   [] skb_put+0x43/0x44
   [] skb_zerocopy+0x6c/0x1f4
   [] queue_userspace_packet+0x3a3/0x448 [openvswitch]
   [] ovs_dp_upcall+0x30/0x5c [openvswitch]
   [] output_userspace+0x132/0x158 [openvswitch]
   [] ? ip6_rcv_finish+0x74/0x77 [ipv6]
   [] do_execute_actions+0xcc1/0xdc8 [openvswitch]
   [] ovs_execute_actions+0x74/0x106 [openvswitch]
   [] ovs_dp_process_packet+0xe1/0xfd [openvswitch]
   [] ? key_extract+0x63c/0x8d5 [openvswitch]
   [] ovs_vport_receive+0xa1/0xc3 [openvswitch]
  [...]

Also we can find that the actions_len is much little than the orig_len:
  crash> struct sw_flow_actions 0x8812f539d000
  struct sw_flow_actions {
rcu = {
  next = 0x8812f5398800,
  func = 0xe3b00035db32
},
orig_len = 1384,
actions_len = 592,
actions = 0x8812f539d01c
  }

So as a quick fix, use the orig_len instead of the actions_len to alloc
the user_skb.

Last, this oops happened on our system running a relative old kernel, but
the same risk still exists on the mainline, since we use the wrong
actions_len from the beginning.

Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet 
upcall to userspace")
Cc: Neil McKee 
Signed-off-by: Liping Zhang 
---
 V3: remove the unnecessary dp_upcall_info->actions_attrlen, suggested
 by Pravin Shelar.
 V2: move actions_attrlen into ovs_skb_cb, which will make codes more
 clean, suggested by Pravin Shelar.

 net/openvswitch/actions.c  | 1 +
 net/openvswitch/datapath.c | 7 ---
 net/openvswitch/datapath.h | 2 ++
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..a54a556fcdb5 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1337,6 +1337,7 @@ int ovs_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
goto out;
}
 
+   OVS_CB(skb)->acts_origlen = acts->orig_len;
err = do_execute_actions(dp, skb, key,
 acts->actions, acts->actions_len);
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..6b44fe405282 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -381,7 +381,7 @@ static int queue_gso_packets(struct datapath *dp, struct 
sk_buff *skb,
 }
 
 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen)
+ unsigned int hdrlen, int actions_attrlen)
 {
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@ -398,7 +398,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info 
*upcall_info,
 
/* OVS_PACKET_ATTR_ACTIONS */
if (upcall_info->actions_len)
-   size += nla_total_size(upcall_info->actions_len);
+   size += nla_total_size(actions_attrlen);
 
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
@@ -465,7 +465,8 @@ static int queue_userspace_packet(struct datapath *dp, 
struct sk_buff *skb,
else
hlen = skb->len;
 
-   len = upcall_msg_size(upcall_info, hlen - cutlen);
+   len = upcall_msg_size(upcall_info, hlen - cutlen,
+ OVS_CB(skb)->acts_origlen);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 5d8dcd88815f..480600649d0b 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -99,11 +99,13 @@ struct datapath {
  * when a packet is received by OVS.
  * @mru: The maximum received fragement size; 0 if the packet is not
  * fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
  * @cutlen: The number of bytes from the packet end to be removed.
  */
 struct ovs_skb_cb {
struct vport*input_vport;
u16 mru;
+   u16 acts_origlen;
u32 cutlen;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
-- 
2.13.4




Re: [PATCH net V2] openvswitch: fix skb_panic due to the incorrect actions attrlen

2017-08-15 Thread Liping Zhang
2017-08-16 7:35 GMT+08:00 Pravin Shelar :
[...]
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index e4610676299b..f849ef52853f 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -921,6 +921,7 @@ static int output_userspace(struct datapath *dp, struct 
>> sk_buff *skb,
>> /* Include actions. */
>> upcall.actions = actions;
>> upcall.actions_len = actions_len;
>> +   upcall.actions_attrlen = OVS_CB(skb)->acts_origlen;
> OVS_CB acts_origlen should be accessible in upcall_msg_size (), is
> there reason to add this member to struct dp_upcall_info?

Hmm... this means we should add an extra parameter to the upcall_msg_size()
function, i.e.:
 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen)
+ unsigned int hdrlen, int actions_attrlen)

So which one do you prefer? If the latter, I can send V3.


[PATCH net V2] openvswitch: fix skb_panic due to the incorrect actions attrlen

2017-08-15 Thread Liping Zhang
From: Liping Zhang 

For sw_flow_actions, the actions_len only represents the kernel part's
size, and when we dump the actions to the userspace, we will do the
convertions, so it's true size may become bigger than the actions_len.

But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len
to alloc the skbuff, so the user_skb's size may become insufficient and
oops will happen like this:
  skbuff: skb_over_panic: text:8148fabf len:1749 put:157 head:
  881300f39000 data:881300f39000 tail:0x6d5 end:0x6c0 dev:
  [ cut here ]
  kernel BUG at net/core/skbuff.c:129!
  [...]
  Call Trace:
   
   [] skb_put+0x43/0x44
   [] skb_zerocopy+0x6c/0x1f4
   [] queue_userspace_packet+0x3a3/0x448 [openvswitch]
   [] ovs_dp_upcall+0x30/0x5c [openvswitch]
   [] output_userspace+0x132/0x158 [openvswitch]
   [] ? ip6_rcv_finish+0x74/0x77 [ipv6]
   [] do_execute_actions+0xcc1/0xdc8 [openvswitch]
   [] ovs_execute_actions+0x74/0x106 [openvswitch]
   [] ovs_dp_process_packet+0xe1/0xfd [openvswitch]
   [] ? key_extract+0x63c/0x8d5 [openvswitch]
   [] ovs_vport_receive+0xa1/0xc3 [openvswitch]
  [...]

Also we can find that the actions_len is much little than the orig_len:
  crash> struct sw_flow_actions 0x8812f539d000
  struct sw_flow_actions {
rcu = {
  next = 0x8812f5398800,
  func = 0xe3b00035db32
},
orig_len = 1384,
actions_len = 592,
actions = 0x8812f539d01c
  }

So as a quick fix, use the orig_len instead of the actions_len to alloc
the user_skb.

Last, this oops happened on our system running a relative old kernel, but
the same risk still exists on the mainline, since we use the wrong
actions_len from the beginning.

Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet 
upcall to userspace")
Cc: Neil McKee 
Signed-off-by: Liping Zhang 
---
 V2: move actions_attrlen into ovs_skb_cb, which will make codes more
 clean, suggested by Pravin Shelar.

 net/openvswitch/actions.c  | 2 ++
 net/openvswitch/datapath.c | 2 +-
 net/openvswitch/datapath.h | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..f849ef52853f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -921,6 +921,7 @@ static int output_userspace(struct datapath *dp, struct 
sk_buff *skb,
/* Include actions. */
upcall.actions = actions;
upcall.actions_len = actions_len;
+   upcall.actions_attrlen = OVS_CB(skb)->acts_origlen;
break;
}
 
@@ -1337,6 +1338,7 @@ int ovs_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
goto out;
}
 
+   OVS_CB(skb)->acts_origlen = acts->orig_len;
err = do_execute_actions(dp, skb, key,
 acts->actions, acts->actions_len);
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..66162e64e8b5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -398,7 +398,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info 
*upcall_info,
 
/* OVS_PACKET_ATTR_ACTIONS */
if (upcall_info->actions_len)
-   size += nla_total_size(upcall_info->actions_len);
+   size += nla_total_size(upcall_info->actions_attrlen);
 
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 5d8dcd88815f..8fd902c946ff 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -99,11 +99,13 @@ struct datapath {
  * when a packet is received by OVS.
  * @mru: The maximum received fragement size; 0 if the packet is not
  * fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
  * @cutlen: The number of bytes from the packet end to be removed.
  */
 struct ovs_skb_cb {
struct vport*input_vport;
u16 mru;
+   u16 acts_origlen;
u32 cutlen;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -124,6 +126,7 @@ struct dp_upcall_info {
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
+   int actions_attrlen;
u32 portid;
u8 cmd;
u16 mru;
-- 
2.13.4




Re: [PATCH net] openvswitch: fix skb_panic due to the incorrect actions attrlen

2017-08-14 Thread Liping Zhang
2017-08-15 13:01 GMT+08:00 Pravin Shelar :
[...]
>>  net/openvswitch/actions.c  | 39 +--
>>  net/openvswitch/datapath.c |  2 +-
>>  net/openvswitch/datapath.h |  1 +
>>  3 files changed, 27 insertions(+), 15 deletions(-)
>>
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index e4610676299b..799a22dfb89e 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -48,6 +48,7 @@ struct deferred_action {
>> struct sk_buff *skb;
>> const struct nlattr *actions;
>> int actions_len;
>> +   int actions_attrlen;
>>
> Have you considered passing this value using struct ovs_skb_cb? That
> would save passing this parameter in all these functions.

Thanks for your reviewing.

Right, this will make codes more clean, I will send V2 later.


[PATCH net] openvswitch: fix skb_panic due to the incorrect actions attrlen

2017-08-13 Thread Liping Zhang
From: Liping Zhang 

For sw_flow_actions, the actions_len only represents the kernel part's
size, and when we dump the actions to the userspace, we will do the
convertions, so it's true size may become bigger than the actions_len.

But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len
to alloc the skbuff, so the user_skb's size may become insufficient and
oops will happen like this:
  skbuff: skb_over_panic: text:8148fabf len:1749 put:157 head:
  881300f39000 data:881300f39000 tail:0x6d5 end:0x6c0 dev:
  [ cut here ]
  kernel BUG at net/core/skbuff.c:129!
  [...]
  Call Trace:
   
   [] skb_put+0x43/0x44
   [] skb_zerocopy+0x6c/0x1f4
   [] queue_userspace_packet+0x3a3/0x448 [openvswitch]
   [] ovs_dp_upcall+0x30/0x5c [openvswitch]
   [] output_userspace+0x132/0x158 [openvswitch]
   [] ? ip6_rcv_finish+0x74/0x77 [ipv6]
   [] do_execute_actions+0xcc1/0xdc8 [openvswitch]
   [] ovs_execute_actions+0x74/0x106 [openvswitch]
   [] ovs_dp_process_packet+0xe1/0xfd [openvswitch]
   [] ? key_extract+0x63c/0x8d5 [openvswitch]
   [] ovs_vport_receive+0xa1/0xc3 [openvswitch]
  [...]

Also we can find that the actions_len is much little than the orig_len:
  crash> struct sw_flow_actions 0x8812f539d000
  struct sw_flow_actions {
rcu = {
  next = 0x8812f5398800,
  func = 0xe3b00035db32
},
orig_len = 1384,
actions_len = 592,
actions = 0x8812f539d01c
  }

So as a quick fix, use the orig_len instead of the actions_len to alloc
the user_skb.

Last, this oops happened on our system running a relative old kernel, but
the same risk still exists on the mainline, since we use the wrong
actions_len from the beginning.

Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet 
upcall to userspace")
Cc: Neil McKee 
Signed-off-by: Liping Zhang 
---
 net/openvswitch/actions.c  | 39 +--
 net/openvswitch/datapath.c |  2 +-
 net/openvswitch/datapath.h |  1 +
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..799a22dfb89e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -48,6 +48,7 @@ struct deferred_action {
struct sk_buff *skb;
const struct nlattr *actions;
int actions_len;
+   int actions_attrlen;
 
/* Store pkt_key clone when creating deferred action. */
struct sw_flow_key pkt_key;
@@ -135,7 +136,8 @@ static struct deferred_action *action_fifo_put(struct 
action_fifo *fifo)
 static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct sw_flow_key *key,
const struct nlattr *actions,
-   const int actions_len)
+   const int actions_len,
+   const int actions_attrlen)
 {
struct action_fifo *fifo;
struct deferred_action *da;
@@ -146,6 +148,7 @@ static struct deferred_action *add_deferred_actions(struct 
sk_buff *skb,
da->skb = skb;
da->actions = actions;
da->actions_len = actions_len;
+   da->actions_attrlen = actions_attrlen;
da->pkt_key = *key;
}
 
@@ -166,6 +169,7 @@ static int clone_execute(struct datapath *dp, struct 
sk_buff *skb,
 struct sw_flow_key *key,
 u32 recirc_id,
 const struct nlattr *actions, int len,
+int actions_attrlen,
 bool last, bool clone_flow_key);
 
 static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -880,7 +884,7 @@ static void do_output(struct datapath *dp, struct sk_buff 
*skb, int out_port,
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len,
-   uint32_t cutlen)
+   int actions_attrlen, uint32_t cutlen)
 {
struct dp_upcall_info upcall;
const struct nlattr *a;
@@ -921,6 +925,7 @@ static int output_userspace(struct datapath *dp, struct 
sk_buff *skb,
/* Include actions. */
upcall.actions = actions;
upcall.actions_len = actions_len;
+   upcall.actions_attrlen = actions_attrlen;
break;
}
 
@@ -936,7 +941,7 @@ static int output_userspace(struct datapath *dp, struct 
sk_buff *skb,
  */
 static int sample(struct datapath *dp, struct sk_buff *skb,
  struct sw_flow_key *key, const struct nlattr *attr,
- bool last)
+ int actions_a

[PATCH net] openvswitch: fix potential out of bound access in parse_ct

2017-07-23 Thread Liping Zhang
From: Liping Zhang 

Before the 'type' is validated, we shouldn't use it to fetch the
ovs_ct_attr_lens's minlen and maxlen, else, out of bound access
may happen.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Signed-off-by: Liping Zhang 
---
 net/openvswitch/conntrack.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e3c4c6c..03859e3 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1310,8 +1310,8 @@ static int parse_ct(const struct nlattr *attr, struct 
ovs_conntrack_info *info,
 
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
-   int maxlen = ovs_ct_attr_lens[type].maxlen;
-   int minlen = ovs_ct_attr_lens[type].minlen;
+   int maxlen;
+   int minlen;
 
if (type > OVS_CT_ATTR_MAX) {
OVS_NLERR(log,
@@ -1319,6 +1319,9 @@ static int parse_ct(const struct nlattr *attr, struct 
ovs_conntrack_info *info,
  type, OVS_CT_ATTR_MAX);
return -EINVAL;
}
+
+   maxlen = ovs_ct_attr_lens[type].maxlen;
+   minlen = ovs_ct_attr_lens[type].minlen;
if (nla_len(a) < minlen || nla_len(a) > maxlen) {
OVS_NLERR(log,
  "Conntrack attr type has unexpected length 
(type=%d, length=%d, expected=%d)",
-- 
2.5.5




[PATCH net-next] net: rps: don't skip offline cpus when set rps_cpus

2017-05-15 Thread Liping Zhang
From: Liping Zhang 

On our 4-core system, sometimes I can enable all CPUs to process packets.
But sometimes I can't, if all the CPUs become offline except core 0, I
will get the following result, which is really annoying for my script:
 # echo f > /sys/class/net/eth0/queues/rx-0/rps_cpus
 # cat /sys/class/net/eth0/queues/rx-0/rps_cpus
 1

Since we won't steer the packets to these offline cpus, it's reasonable
to enable all configed cpus to the rps_map, even if they are offline for
the time being.

Signed-off-by: Liping Zhang 
---
 net/core/net-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65ea0ff..8adb36d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -731,7 +731,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
 
i = 0;
-   for_each_cpu_and(cpu, mask, cpu_online_mask)
+   for_each_cpu_and(cpu, mask, cpu_possible_mask)
map->cpus[i++] = cpu;
 
if (i)
-- 
2.9.3




Re: [PATCH] net: netfilter: Replace explicit NULL comparisons

2017-04-09 Thread Liping Zhang
2017-04-09 16:26 GMT+08:00 Jan Engelhardt :
>
> On Sunday 2017-04-09 05:42, Arushi Singhal wrote:
>>On Sun, Apr 9, 2017 at 1:44 AM, Pablo Neira Ayuso  wrote:
>>  On Sat, Apr 08, 2017 at 08:21:56PM +0200, Jan Engelhardt wrote:
>>  > On Saturday 2017-04-08 19:21, Arushi Singhal wrote:
>>  >
>>  > >Replace explicit NULL comparison with ! operator to simplify code.
>>  >
>>  > I still wouldn't do this, for the same reason as before. Comparing to
>>  > NULL explicitly more or less gave an extra guarantee that the other
>>  > operand was also a pointer.
>>
>>  Arushi, where does it say in the coding style that this is prefered?
>>
>>This is reported by checkpatch.pl script.
>
> checkpatch has been controversial at times, like when people took the 80
> character limit way too literally. Changing pointer comparisons looks like
> another thing that is better left ignored.

Yes, I agree too. Converting the "if (p != NULL)" to "if (p)" like this seems
unnecessary.


Re: [PATCH 05/10] netfilter: nf_tables: fix mismatch in big-endian system

2017-03-16 Thread Liping Zhang
Hi David,
2017-03-16 18:58 GMT+08:00 David Laight :
[...]
>> For the similar reason, when loading an u16 value from the u32 data
>> register, we should use "*(u16 *) sreg;" instead of "(u16)*sreg;",
>> the 2nd method will get the wrong value in the big-endian system.
> ...
>
> That seems to be papering over some of the obvious cracks.
>
> The fact that the entire 32bits are zeroed makes me suspect that
> in some places values are written as 8 bits and read later as 32.
> The only way that can work on BE is if the values are always written
> as 32 bit ones (assignment style 2).

In nftables, user can use concatenations to put two or more selectors together.
For example, we can use "nft add rule x y tcp dport . ip saddr 22 . 1.1.1.1"
to match the tcp dest port and ip source addr at the same time.

In such case, tcp dport will be stored to REG0 and ip saddr will be stored to
REG1, so the layout will be like this(from the lowest address):
  PORT(2 bytes) - (2 bytes) - IPADDR(4 bytes)

Later we will use the statement "memcmp(®, &data, 8)" to do compare,
so the  part must be filled with zero to avoid garbage value.

>
> OTOH using memcmp(,,2) relies on the data being in the lower addressed
> bytes.
>
> If the data does need to be in the lower addressed bytes I'd suggest
> using a union rather than all the casts.
>
> David
>


Re: [PATCH nf 1/1] netfilter: h323,sip: Fix possible dead loop in nat_rtp_rtcp and nf_nat_sdp_media

2017-03-02 Thread Liping Zhang
Hi,
2017-03-02 18:18 GMT+08:00 Gao Feng :
[...]
> The expect class is NF_CT_EXPECT_CLASS_DEFAULT, and proto is
> IPPROTO_UDP at the function "expect_rtp_rtcp",
> And it makes sure the port is even number.
>
> But look at the process_gcf, the port is got from the packet data at
> function get_h225_addr.
> So it may be odd number.
> It also would add one expect node whose class is
> NF_CT_EXPECT_CLASS_DEFAULT, and proto is IPPROTO_UDP.

The nat_rtp_rtcp() is only invoked by expect_rtp_rtcp, and nf_nat_sdp_media()
is only invoked by set_expected_rtp_rtcp. So the RTP port is ensured to be
even, as well as the rtp_exp->tuple.dst.u.udp.port.

Note: the rtp_exp is alloced by nf_ct_expect_alloc, and initialized by
nf_ct_expect_init, then passed to nat_rtp_rtcp or nf_nat_sdp_media.

So I cannot figure out why process_gcf will affect this? Or I missed something?


Re: [PATCH nf 1/1] netfilter: h323,sip: Fix possible dead loop in nat_rtp_rtcp and nf_nat_sdp_media

2017-03-02 Thread Liping Zhang
Hi,

2017-03-02 15:57 GMT+08:00  :
> From: Gao Feng 
>
> When h323 and sip try to insert expect nodes, they would increase
> the port by 2 for loop, and the loop condition is that "port != 0".
> So when the start port is odd number, port never increases to zero.

This seems will never happen, since the RTP port has been ensured to
be even.

For example, at expect_rtp_rtcp():
   ...
   /* RTP port is even */
   rtp_port = port & ~htons(1);
   rtcp_port = port | htons(1);

And at set_expected_rtp_rtcp():
   ...
   base_port = ntohs(tuple.dst.u.udp.port) & ~1;
   rtp_port = htons(base_port);


[PATCH net] net: route: add missing nla_policy entry for RTA_MARK attribute

2017-02-27 Thread Liping Zhang
From: Liping Zhang 

This will add stricter validating for RTA_MARK attribute.

Signed-off-by: Liping Zhang 
---
 net/ipv4/fib_frontend.c | 1 +
 net/ipv6/route.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b39a791..42bfd08 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_ENCAP_TYPE]= { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_UID]   = { .type = NLA_U32 },
+   [RTA_MARK]  = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54f426..d94f1df 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2891,6 +2891,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] 
= {
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_EXPIRES]   = { .type = NLA_U32 },
[RTA_UID]   = { .type = NLA_U32 },
+   [RTA_MARK]  = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
-- 
2.5.5




Re: [patch] netfilter: nf_tables: underflow in nft_parse_u32_check()

2016-10-12 Thread Liping Zhang
2016-10-12 14:08 GMT+08:00 Dan Carpenter :
> We don't want to allow negatives here.
>
> Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 
> netlink attributes')
> Signed-off-by: Dan Carpenter 
>
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index b70d3ea..dd55187 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx 
> *ctx,
>   */

I think it's better if you can convert it to follows:

>  unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 
> *dest)

int nft_parse_u32_check(const struct nlattr *attr, u32 max, u32 *dest)

>  {
> -   int val;
> +   uint val;

u32 val;

>
> val = ntohl(nla_get_be32(attr));
> if (val > max)


Re: kernel v4.8: iptables logs are truncated with the 4.8 kernel?

2016-10-10 Thread Liping Zhang
2016-10-11 11:57 GMT+08:00 Chris Caputo :
> I have tested the above patch with 4.8.1, with and without nflog-size
> defined in an iptables configuration, and it works well.
>
> The ulogd-2.0.5 segfaults no longer happen when nflog-size is not present
> in a target.
>
> I recommend this fix.

Thanks, I will send an official patch later.


Re: kernel v4.8: iptables logs are truncated with the 4.8 kernel?

2016-10-10 Thread Liping Zhang
2016-10-11 2:33 GMT+08:00 Chris Caputo :
>>
>> What numbers did you specify after --nflog-size option?
>> --nflog-size 0 or ...? If you want log the whole packet to
>> the ulogd, please do not specify this nflog-size option.
>
> Not specifying nflog-size does not appear to log the whole packet...
>
> If "--nflog-size" is unspecified, and the iptables config is left
> unchanged when the kernel is upgraded to 4.8, ulogd-2.0.5 crashes.
>
> If "--nflog-size 0" is used, ulogd-2.0.5 crashes.
>
> If "--nflog-size" is used with size 1 or greater, ulogd-2.0.5 is fine.
>
>> > I'm surprised to see a kernel change cause unexpected userspace segfaults,
>> > so further investigation into a kernel fix would seem a good idea.
>>
>> According to the original user's manual, nflog-range option was
>> designed to be the number of bytes copied to userspace, but
>> unfortunately there's a bug from the beginning and it never works,
>> i.e. in kernel, it just ignored this option.
>>
>> Try to change the current nflog-range option's semantics may
>> cause unexpected results(maybe like this ulogd crash) ...
>>
>> In order to keep compatibility, Vishwanath introduce a new
>> nflog-size option and keep nflog-range unchanged. If you just
>> upgrade the kernel, and do not change iptables rules, this
>> problem will not happen.
>
> I am reporting that the problem does happen simply with an upgrade to
> kernel 4.8 and no other changes.  When "--nflog-size" is unspecified or
> set to 0, the bug in ulogd-2.0.5 gets triggered.
>
> I agree there is a bug in ulogd-2.0.5 that this kernel change exposed, but
> I am trying to explain that all ulogd users risk this segfault if they
> upgrade to kernel 4.8 and don't either update to a fixed ulogd (possibly
> using your patch below) or an unreleased iptables with iptables config
> changes to implement nflog-size on each NFLOG target.

Yes, thanks for clarifying this. There's a bug in kernel, can you try
this patch:

diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 018eed7..8c069b4 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct
xt_action_param *par)
li.u.ulog.copy_len   = info->len;
li.u.ulog.group  = info->group;
li.u.ulog.qthreshold = info->threshold;
+   li.u.ulog.flags  = 0;

if (info->flags & XT_NFLOG_F_COPY_LEN)
li.u.ulog.flags |= NF_LOG_F_COPY_LEN;

Thanks


Re: kernel v4.8: iptables logs are truncated with the 4.8 kernel?

2016-10-10 Thread Liping Zhang
Hi Chris,

2016-10-10 15:02 GMT+08:00 Chris Caputo :
> On Tue, 4 Oct 2016, Justin Piszcz wrote:
>> kernel 4.8 with ulogd-2.0.5- IPs are no longer logged:
>>
>> Oct  4 17:51:30 atom INPUT_BLOCK IN=eth1 OUT=
>> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
>> TTL=0 ID=0 PROTO=0 MARK=0
>> Oct  4 17:51:31 atom INPUT_BLOCK IN=eth1 OUT=
>> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
>> TTL=0 ID=0 PROTO=0 MARK=0
>> Oct  4 17:51:32 atom INPUT_BLOCK IN=eth1 OUT=
>> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
>> TTL=0 ID=0 PROTO=0 MARK=0
>>
>> (reboot back to kernel 4.7, works fine)
>>
>> kernel 4.7 with ulogd-2.0.5:
>> Oct  4 17:56:44 atom INPUT_BLOCK IN=eth1 OUT=
>> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 SRC=74.125.22.125
>> DST=1.2.3.4 LEN=397 TOS=00 PREC=0x00 TTL=48 ID=58093 PROTO=TCP
>> SPT=5222 DPT=19804 SEQ=2032644254 ACK=2273184383 WINDOW=55272 ACK PSH
>> URGP=0 MARK=0
>> Oct  4 17:56:45 atom INPUT_BLOCK IN=eth1 OUT=
>> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 SRC=74.125.22.125
>> DST=1.2.3.4 LEN=397 TOS=00 PREC=0x00 TTL=48 ID=58725 PROTO=TCP
>> SPT=5222 DPT=19804 SEQ=2032644254 ACK=2273184383 WINDOW=55272 ACK PSH
>> URGP=0 MARK=0
>>
>> Looks like there were some changes in the 4.8 kernel regarding ulogd,
>> has anyone else run into this problem?
>
> For me, kernel 4.8.1 results in segfaults in ulogd-2.0.5 at:
>
>   Program received signal SIGSEGV, Segmentation fault.
>   0x765fd18a in _interp_iphdr (pi=0x617f50, len=0) at 
> ulogd_raw2packet_BASE.c:720
>
>   715 static int _interp_iphdr(struct ulogd_pluginstance *pi, uint32_t 
> len)
>   716 {
>   717 struct ulogd_key *ret = pi->output.keys;
>   718 struct iphdr *iph =
>   719 ikey_get_ptr(&pi->input.keys[INKEY_RAW_PCKT]);
>   720 void *nexthdr = (uint32_t *)iph + iph->ihl;
>
> I believe 7643507fe8b5bd8ab7522f6a81058cc1209d2585 changed previous
> behavior by not always copying IP header data to user space.
>
> On my machine IPv4 log packets result in a ulogd segfault while IPv6
> packets do not.  I'm not sure of the cause of the difference.
>
> The corresponding userspace commit for the 209d2585 kernel change is:
>
>   
> https://git.netfilter.org/iptables/commit/?id=7070b1f3c88a0c3d4e315c00cca61f05b0fbc882
>
> This adds --nflog-size to iptables.  When --nflog-size is used with my
> iptables NFLOG lines, the ulogd-2.0.5 segfaults cease.

What numbers did you specify after --nflog-size option?
--nflog-size 0 or ...? If you want log the whole packet to
the ulogd, please do not specify this nflog-size option.

>
> I'm surprised to see a kernel change cause unexpected userspace segfaults,
> so further investigation into a kernel fix would seem a good idea.

According to the original user's manual, nflog-range option was
designed to be the number of bytes copied to userspace, but
unfortunately there's a bug from the beginning and it never works,
i.e. in kernel, it just ignored this option.

Try to change the current nflog-range option's semantics may
cause unexpected results(maybe like this ulogd crash) ...

In order to keep compatibility, Vishwanath introduce a new
nflog-size option and keep nflog-range unchanged. If you just
upgrade the kernel, and do not change iptables rules, this
problem will not happen.

So I think this is ulogd's bug, in _interp_iphdr, it try to
dereference the iphdr pointer before validation check, meanwhile
this problem does not exist in ipv6 path.  Can you try this patch:

diff --git a/filter/raw2packet/ulogd_raw2packet_BASE.c
b/filter/raw2packet/ulogd_raw2packet_BASE.c
index 8a6180c..fd2665a 100644
--- a/filter/raw2packet/ulogd_raw2packet_BASE.c
+++ b/filter/raw2packet/ulogd_raw2packet_BASE.c
@@ -717,7 +717,7 @@ static int _interp_iphdr(struct ulogd_pluginstance
*pi, uint32_t len)
struct ulogd_key *ret = pi->output.keys;
struct iphdr *iph =
ikey_get_ptr(&pi->input.keys[INKEY_RAW_PCKT]);
-   void *nexthdr = (uint32_t *)iph + iph->ihl;
+   void *nexthdr;

if (len < sizeof(struct iphdr) || len <= (uint32_t)(iph->ihl * 4))
return ULOGD_IRET_OK;
@@ -734,6 +734,7 @@ static int _interp_iphdr(struct ulogd_pluginstance
*pi, uint32_t len)
okey_set_u16(&ret[KEY_IP_ID], ntohs(iph->id));
okey_set_u16(&ret[KEY_IP_FRAGOFF], ntohs(iph->frag_off));

+   nexthdr = (uint32_t *)iph + iph->ihl;
switch (iph->protocol) {
case IPPROTO_TCP:
_interp_tcp(pi, nexthdr, len);

Thanks

> Having to add the likes of "--nflog-size 200" (200 simply being what I am
> using) to every NFLOG line in firewall configs is a significant burden for
> many.
>
> Putting out a new release of iptables may help ease this transition if the
> kernel is not patched to fix this.  I had to use the git code since 1.6.0
> doesn't have it.
>
> Chris


Re: kernel v4.8: iptables logs are truncated with the 4.8 kernel?

2016-10-04 Thread Liping Zhang
Hi Justin,

2016-10-05 6:02 GMT+08:00 Justin Piszcz :
> Hello,
>
> kernel 4.8 with ulogd-2.0.5- IPs are no longer logged:
>
> Oct  4 17:51:30 atom INPUT_BLOCK IN=eth1 OUT=
> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
> TTL=0 ID=0 PROTO=0 MARK=0
> Oct  4 17:51:31 atom INPUT_BLOCK IN=eth1 OUT=
> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
> TTL=0 ID=0 PROTO=0 MARK=0
> Oct  4 17:51:32 atom INPUT_BLOCK IN=eth1 OUT=
> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 LEN=0 TOS=00 PREC=0x00
> TTL=0 ID=0 PROTO=0 MARK=0
>
> (reboot back to kernel 4.7, works fine)
>
> kernel 4.7 with ulogd-2.0.5:
> Oct  4 17:56:44 atom INPUT_BLOCK IN=eth1 OUT=
> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 SRC=74.125.22.125
> DST=1.2.3.4 LEN=397 TOS=00 PREC=0x00 TTL=48 ID=58093 PROTO=TCP
> SPT=5222 DPT=19804 SEQ=2032644254 ACK=2273184383 WINDOW=55272 ACK PSH
> URGP=0 MARK=0
> Oct  4 17:56:45 atom INPUT_BLOCK IN=eth1 OUT=
> MAC=00:1b:21:9c:3b:fa:3e:94:d5:d2:49:1e:08:00 SRC=74.125.22.125
> DST=1.2.3.4 LEN=397 TOS=00 PREC=0x00 TTL=48 ID=58725 PROTO=TCP
> SPT=5222 DPT=19804 SEQ=2032644254 ACK=2273184383 WINDOW=55272 ACK PSH
> URGP=0 MARK=0
>
> Looks like there were some changes in the 4.8 kernel regarding ulogd,
> has anyone else run into this problem?
>
> } ulog;
> +   if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
> +   (li->u.ulog.copy_len < data_len))
> +   data_len = li->u.ulog.copy_len;
> li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
> +   li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
> li->u.ulog.copy_len =
> if (nla_put_be16(skb, NFTA_LOG_GROUP, 
> htons(li->u.ulog.group)))
> -   if (li->u.ulog.copy_len) {
> +   if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
>  htonl(li->u.ulog.copy_len)))
> li.u.ulog.group  = info->group;
> li.u.ulog.qthreshold = info->threshold;
> +   li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
>
> Thanks,
>
> Justin.

Which one are you using? iptables or nftables?

Could you please paste the related iptables/nftables rules here?


Re: [PATCH nf-next v2 1/2] netfilter: Fix potential null pointer dereference

2016-09-27 Thread Liping Zhang
2016-09-28 11:08 GMT+08:00 Liping Zhang :
> Hi Feng,
>
> 2016-09-28 9:23 GMT+08:00 Feng Gao :
>> Hi Aaraon,
>>
>> On Tue, Sep 27, 2016 at 9:38 PM, Aaron Conole  wrote:
>>> It's possible for nf_hook_entry_head to return NULL if two
>>> nf_unregister_net_hook calls happen simultaneously with a single hook
>>
>> The critical region of nf_unregister_net_hook is protected by &nf_hook_mutex.
>> When it would be called simultaneously?
>
> This is unrelated to race condition.
>
> Suppose that only the last nf_hook_entry exist, and two callers want to do
> un-register work.
>
> The first one will remove it successfully, after the end of the work, the
> second one will enter the critical section, but it will see the NULL pointer.
> Because the last nf_hook_entry was already removed by the first one.
>
>>
>> Regards
>> Feng
>>
>>> entry in the list.  This fix ensures that no null pointer dereference
>>> could occur when such a race happens.
>>>
>>> Signed-off-by: Aaron Conole 

I read the commit log again, I think the description here is a
little confusing indeed.


Re: [PATCH nf-next v2 1/2] netfilter: Fix potential null pointer dereference

2016-09-27 Thread Liping Zhang
Hi Feng,

2016-09-28 9:23 GMT+08:00 Feng Gao :
> Hi Aaraon,
>
> On Tue, Sep 27, 2016 at 9:38 PM, Aaron Conole  wrote:
>> It's possible for nf_hook_entry_head to return NULL if two
>> nf_unregister_net_hook calls happen simultaneously with a single hook
>
> The critical region of nf_unregister_net_hook is protected by &nf_hook_mutex.
> When it would be called simultaneously?

This is unrelated to race condition.

Suppose that only the last nf_hook_entry exist, and two callers want to do
un-register work.

The first one will remove it successfully, after the end of the work, the
second one will enter the critical section, but it will see the NULL pointer.
Because the last nf_hook_entry was already removed by the first one.

>
> Regards
> Feng
>
>> entry in the list.  This fix ensures that no null pointer dereference
>> could occur when such a race happens.
>>
>> Signed-off-by: Aaron Conole 


Re: [PATCH] Fix link error in 32bit arch because of 64bit division

2016-09-27 Thread Liping Zhang
Hi  Vishwanath Pai,

2016-09-27 15:42 GMT+08:00 Vishwanath Pai :
> Fix link error in 32bit arch because of 64bit division

This should be "netfilter: xt_hashlimit: fix ... "

>
> --- a/net/netfilter/xt_hashlimit.c
> +++ b/net/netfilter/xt_hashlimit.c
> @@ -465,19 +465,20 @@ static u64 user2credits(u64 user, int revision)
>  {
> if (revision == 1) {
> /* If multiplying would overflow... */
> -   if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
> +   if (user > div64_u64(0x, (HZ*CREDITS_PER_JIFFY_v1)))

Here divisor and dividend are all 32-bit integer, so covert "/" to div64_u64
seems unnecessary.

> /* Divide first. */
> -   return (user / XT_HASHLIMIT_SCALE) *\
> +   return div64_u64(user, XT_HASHLIMIT_SCALE) *\
> HZ * CREDITS_PER_JIFFY_v1;
>
> -   return (user * HZ * CREDITS_PER_JIFFY_v1) \
> -   / XT_HASHLIMIT_SCALE;
> +   return div64_u64((user * HZ * CREDITS_PER_JIFFY_v1),
> + XT_HASHLIMIT_SCALE);
> } else {
> -   if (user > 0x / (HZ*CREDITS_PER_JIFFY))
> -   return (user / XT_HASHLIMIT_SCALE_v2) *\
> +   if (user > div64_u64(0x, 
> (HZ*CREDITS_PER_JIFFY)))

0x and "HZ*CREDITS_PER_JIFFY" are both
constant, and GCC will do constant folding optimization, so I think
convert "/" to div64_u64 here is also unnecessary.

> +   return div64_u64(user, XT_HASHLIMIT_SCALE_v2) *\
> HZ * CREDITS_PER_JIFFY;
>
> -   return (user * HZ * CREDITS_PER_JIFFY) / 
> XT_HASHLIMIT_SCALE_v2;
> +   return div64_u64((user * HZ * CREDITS_PER_JIFFY),
> +XT_HASHLIMIT_SCALE_v2);
> }
>  }
>


Re: [PATCH nf-next] netfilter: xt_osf: Use explicit member assignment to avoid implicit no padding rule

2016-09-26 Thread Liping Zhang
Hi Feng,

2016-09-27 14:00 GMT+08:00 Gao Feng :
> Hi Liping,
>
>>
>> This xt_osf_user_finger{} is carefully designed, no padding now, and
>> will not be changed in the future, otherwise backward compatibility will
>> be broken.
>
> Yes, there is no padding now. So it is ok to use memcmp now.
> I am afraid the struct would be modified for other requirements.

This is structure was passed by netlink attribute, so modify it will
break backward compatibility.

>
> If it is never changed forever, it is ok certainly.
>
>>
>> I don't think this convert is necessary, actually it is a little ugly, and 
>> will
>> increase the maintenance burden.
>
> I just want the codes don't depend any implicit rule.
>
> It is a tradeoff. If never change, needn't do any convert.
> If may change, the memcmp is a little dangerous.
>
> Regards
> Feng


Re: [PATCH nf-next] netfilter: xt_osf: Use explicit member assignment to avoid implicit no padding rule

2016-09-26 Thread Liping Zhang
Hi Feng,

2016-09-27 12:39 GMT+08:00  :
> From: Gao Feng 
>
> Current xt_osf codes use memcmp to check if two user fingers are same,
> so it depends on that the struct xt_osf_user_finger is no padding.
> It is one implicit rule, and is not good to maintain.
>
> Now use zero memory and assign the members explicitly.
>
> Signed-off-by: Gao Feng 
> ---
>  net/netfilter/xt_osf.c | 32 ++--
>  1 file changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
> index 2455b69..9793670 100644
> --- a/net/netfilter/xt_osf.c
> +++ b/net/netfilter/xt_osf.c
> @@ -61,6 +61,34 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX 
> + 1] = {
> [OSF_ATTR_FINGER]   = { .len = sizeof(struct xt_osf_user_finger) 
> },
>  };
>
> +static void copy_user_finger(struct xt_osf_user_finger *dst,
> +const struct xt_osf_user_finger *src)
> +{
> +#define OSF_COPY_MEMBER(mem)   dst->mem = src->mem
> +
> +   int i;
> +
> +   OSF_COPY_MEMBER(wss.wc);
> +   OSF_COPY_MEMBER(wss.val);
> +
> +   OSF_COPY_MEMBER(ttl);
> +   OSF_COPY_MEMBER(df);
> +   OSF_COPY_MEMBER(ss);
> +   OSF_COPY_MEMBER(mss);
> +   OSF_COPY_MEMBER(opt_num);
> +
> +   memcpy(dst->genre, src->genre, sizeof(dst->genre));
> +   memcpy(dst->version, src->version, sizeof(dst->version));
> +   memcpy(dst->subtype, src->subtype, sizeof(dst->subtype));
> +
> +   for (i = 0; i < MAX_IPOPTLEN; ++i) {
> +   OSF_COPY_MEMBER(opt[i].kind);
> +   OSF_COPY_MEMBER(opt[i].length);
> +   OSF_COPY_MEMBER(opt[i].wc.wc);
> +   OSF_COPY_MEMBER(opt[i].wc.val);
> +   }
> +}
> +

This xt_osf_user_finger{} is carefully designed, no padding now, and
will not be changed in the future, otherwise backward compatibility will
be broken.

I don't think this convert is necessary, actually it is a little ugly, and will
increase the maintenance burden.


Re: [PATCH v3 2/2] netfilter: Create revision 2 of xt_hashlimit to support higher pps rates

2016-09-26 Thread Liping Zhang
Hi Vishwanath,

2016-09-23 0:43 GMT+08:00 Vishwanath Pai :
>
>  /* Precision saver. */
> -static u32 user2credits(u32 user)
> +static u64 user2credits(u64 user, int revision)
>  {
> -   /* If multiplying would overflow... */
> -   if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
> -   /* Divide first. */
> -   return (user / XT_HASHLIMIT_SCALE_v1) *\
> -   HZ * CREDITS_PER_JIFFY_v1;
> +   if (revision == 1) {
> +   /* If multiplying would overflow... */
> +   if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
> +   /* Divide first. */
> +   return (user / XT_HASHLIMIT_SCALE_v1) *\
> +   HZ * CREDITS_PER_JIFFY_v1;
> +
> +   return (user * HZ * CREDITS_PER_JIFFY_v1) \
> +   / XT_HASHLIMIT_SCALE_v1;
> +   } else {
> +   if (user > 0x / (HZ*CREDITS_PER_JIFFY))
> +   return (user / XT_HASHLIMIT_SCALE) *\
> +   HZ * CREDITS_PER_JIFFY;
>
> -   return (user * HZ * CREDITS_PER_JIFFY_v1) / XT_HASHLIMIT_SCALE_v1;
> +   return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
> +   }
>  }
>

In my memory, 64-bit division operation should be replaced by
div_u64 or div64_u64, otherwise on some 32-bit architecture
systems, link error will happen. Something like this:
... undefined reference to `__udivdi3'.


[PATCH iproute2] ipmonitor: fix ip monitor can't work when NET_NS is not enabled

2016-09-20 Thread Liping Zhang
From: Liping Zhang 

In ip monitor, netns_map_init will check getnsid is supported or not.
But when /proc/self/ns/net does not exist, we just print out error
messages and exit. So user cannot use ip monitor anymore when
CONFIG_NET_NS is disabled:
  # ip monitor
  open("/proc/self/ns/net"): No such file or directory

If open "/proc/self/ns/net" failed, set have_rtnl_getnsid to false.

Fixes: d652ccbf8195 ("netns: allow to dump and monitor nsid")
Signed-off-by: Liping Zhang 
---
 ip/ipnetns.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index af87065..ccc652c 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -72,8 +72,8 @@ static int ipnetns_have_nsid(void)
if (have_rtnl_getnsid < 0) {
fd = open("/proc/self/ns/net", O_RDONLY);
if (fd < 0) {
-   perror("open(\"/proc/self/ns/net\")");
-   exit(1);
+   have_rtnl_getnsid = 0;
+   return 0;
}
 
addattr32(&req.n, 1024, NETNSA_FD, fd);
-- 
1.9.1



Re: [PATCH 1/2 nf] netfilter: seqadj: Fix some possible panics of seqadj when mem is exhausted

2016-09-01 Thread Liping Zhang
Hi Feng,
2016-09-02 9:48 GMT+08:00  :
> From: Gao Feng 
> @@ -171,6 +176,11 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
> struct nf_ct_seqadj *this_way, *other_way;
> int res;
>
> +   if (unlikely(!seqadj)) {

IPS_SEQ_ADJUST_BIT will be tested before we call nf_ct_seq_adjust(),
so I think seqadj
will never be NULL here.

> +   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");
> +   return 0;
> +   }
> +
> this_way  = &seqadj->seq[dir];
> other_way = &seqadj->seq[!dir];
>
> @@ -218,8 +228,10 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct,
> struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
> struct nf_ct_seqadj *this_way;
>
> -   if (!seqadj)
> +   if (unlikely(!seqadj)) {
> +   WARN_ONCE(1, "Missing nfct_seqadj_ext_add() setup call\n");

But in nf_ct_seq_offset, seqadj is likely to be NULL, see the function
call path:
tcp_packet->tcp_in_window->nf_ct_seq_offset, so WARN_ONCE seems unnecessary.

> return 0;
> +   }


Re: [PATCH 1/1] netfilter: Add helper array register/unregister functions

2016-07-19 Thread Liping Zhang
2016-07-18 11:39 GMT+08:00  :
> From: Gao Feng 
>
> Add nf_ct_helper_init, nf_conntrack_helpers_register/unregister
> functions to enhance the conntrack helper codes.

I think this patch is breaking something ...

This irc:
> -   if (ports[i] == IRC_PORT)
> -   sprintf(irc[i].name, "irc");
> -   else
> -   sprintf(irc[i].name, "irc-%u", i);
> -
> -   ret = nf_conntrack_helper_register(&irc[i]);
> +   nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
> + IRC_PORT, ports[i], &irc_exp_policy, 0, 0,
> + help, NULL, THIS_MODULE);
> +   }

This sip:
> -   if (ports[i] == SIP_PORT)
> -   sprintf(sip[i][j].name, "sip");
> -   else
> -   sprintf(sip[i][j].name, "sip-%u", i);

And this tftp:
> -   if (ports[i] == TFTP_PORT)
> -   sprintf(tftp[i][j].name, "tftp");
> -   else
> -   sprintf(tftp[i][j].name, "tftp-%u", i);

For example, if the user install the nf_conntrack_tftp module an
specify the ports to "69,10069",
then the helper name is "tftp" and "tftp-1".

But apply this patch, the helper name will be changed to "tftp" and
"tftp-10069", this may break the
existing iptables rules which used the helper match or CT target.

And this was already discussed  at https://patchwork.ozlabs.org/patch/622238/


Re: [PATCH] netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq

2016-07-04 Thread Liping Zhang
2016-07-04 14:14 GMT+08:00 Christophe Leroy :
>> I think there is no need to convert simple_strtoul to kstrtouint, add
>> a further check seems better?
>> Like this:
>>  -   if (!cseq) {
>> +   if (!cseq && *(*dptr + matchoff) != '0') {
>>
>
> And what about an invalid CSeq that would look like CSeq: 0abzk852 ?
> Should we check it is 0 + space instead ?

In this case, i.e. some stupid sip clients set CSeq to "0abzk852",
your patch will also fail to detect this "error".

Because for "Cseq", int (*match_len)(...) point to digits_len(see
struct sip_header ct_sip_hdrs definition).
So in this case match_len will just be setted to ONE (not
sizeof("0abzk852")-1), then cseq will be parsed
as 0 by  kstrtouint, not as an error.


Re: [PATCH] netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq

2016-07-03 Thread Liping Zhang
2016-07-01 17:48 GMT+08:00 Christophe Leroy :
> Do not drop packet when CSeq is 0 as 0 is also a valid value for CSeq.
>
> --- a/net/netfilter/nf_conntrack_sip.c
> +++ b/net/netfilter/nf_conntrack_sip.c
> @@ -1368,6 +1368,7 @@ static int process_sip_response(struct sk_buff *skb, 
> unsigned int protoff,
> struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
> unsigned int matchoff, matchlen, matchend;
> unsigned int code, cseq, i;
> +   char buf[21];
>
> if (*datalen < strlen("SIP/2.0 200"))
> return NF_ACCEPT;
> @@ -1382,8 +1383,13 @@ static int process_sip_response(struct sk_buff *skb, 
> unsigned int protoff,
> nf_ct_helper_log(skb, ct, "cannot parse cseq");
> return NF_DROP;
> }
> -   cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
> -   if (!cseq) {

I think there is no need to convert simple_strtoul to kstrtouint, add
a further check seems better?
Like this:
 -   if (!cseq) {
+   if (!cseq && *(*dptr + matchoff) != '0') {

> +   if (matchlen > sizeof(buf) - 1) {
> +   nf_ct_helper_log(skb, ct, "cannot parse cseq (too big)");
> +   return NF_DROP;
> +   }
> +   memcpy(buf, *dptr + matchoff, matchlen);
> +   buf[matchlen] = 0;
> +   if (kstrtouint(buf, 10, &cseq)) {
> nf_ct_helper_log(skb, ct, "cannot get cseq");
> return NF_DROP;
> }


[PATCH V2 1/2] net: socket: return EADDRNOTAVAIL when source address becomes nonlocal

2016-04-05 Thread Liping Zhang
From: Liping Zhang 

A socket can use bind(directly) or connect(indirectly) to bind to a local
ip address, and later if the network becomes down, that cause the source
address becomes nonlocal, then send() call will fail and return EINVAL.
But this error code is confusing, acctually we did not pass any invalid
arguments. Furthermore, send() maybe return ok at first, it now returns
fail just because of a temporary network problem, i.e. when the network
recovery, send() call will become ok. Return EADDRNOTAVAIL instead of
EINVAL in such situation is better.

Take the ping utility for example, we can use -I option to specify the
source address (e.g ping -I 10.19.145.26 117.135.169.41), when network
becomes down, error message will be printed out as follows:
64 bytes from  (117.135.169.41): icmp_seq=9 ttl=54 time=46.7 ms
ping: sendmsg: Invalid argument
ping: sendmsg: Invalid argument

Apply this patch, error message will become:
64 bytes from  (117.135.169.41): icmp_seq=5 ttl=54 time=47.5 ms
ping: sendmsg: Cannot assign requested address
ping: sendmsg: Cannot assign requested address

Signed-off-by: Liping Zhang 
---
 net/ipv4/route.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c6229..857f7b3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2149,11 +2149,13 @@ struct rtable *__ip_route_output_key_hash(struct net 
*net, struct flowi4 *fl4,
 
rcu_read_lock();
if (fl4->saddr) {
-   rth = ERR_PTR(-EINVAL);
+   rth = ERR_PTR(-EADDRNOTAVAIL);
if (ipv4_is_multicast(fl4->saddr) ||
ipv4_is_lbcast(fl4->saddr) ||
-   ipv4_is_zeronet(fl4->saddr))
+   ipv4_is_zeronet(fl4->saddr)) {
+   rth = ERR_PTR(-EINVAL);
goto out;
+   }
 
/* I removed check for oif == dev_out->oif here.
   It was wrong for two reasons:
-- 
1.7.9.5




[PATCH V2 0/2] net: socket: return a proper error code when source address becomes nonlocal

2016-04-05 Thread Liping Zhang
From: Liping Zhang 

This patch version 2 spilt the original patch into 2 patches, because it fix
two separate problems actually.

Liping Zhang (2):
  net: socket: return EADDRNOTAVAIL when source address becomes
nonlocal
  net: socket: return EADDRNOTAVAIL when IPV6_PKTINFO's ipi6_addr is
not available

 net/ipv4/route.c|6 --
 net/ipv6/datagram.c |2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

-- 
1.7.9.5




[PATCH V2 2/2] net: socket: return EADDRNOTAVAIL when IPV6_PKTINFO's ipi6_addr is not available

2016-04-05 Thread Liping Zhang
From: Liping Zhang 

We can use IPV6_PKTINFO to specify the ipv6 source address when call
sendmsg() to send packet, but if the address is not available, call will
fail and EINVAL is returned. This error code is not very appropriate,
it failed maybe just because of a temporary network problem, i.e. when
the network recovery, sendmsg() call will become ok. Also RFC3542,
section 6.6 describe an example in error scenario that returns
EADDRNOTAVAIL: "ipi6_ifindex specifies an interface but the address
ipi6_addr is not available for use on that interface.". So return
EADDRNOTAVAIL instead of EINVAL here.

Signed-off-by: Liping Zhang 
---
 net/ipv6/datagram.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a73d701..20a968b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -753,7 +753,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
   strict ? dev : NULL, 0) &&
!ipv6_chk_acast_addr_src(net, dev,
 
&src_info->ipi6_addr))
-   err = -EINVAL;
+   err = -EADDRNOTAVAIL;
else
fl6->saddr = src_info->ipi6_addr;
}
-- 
1.7.9.5




[PATCH] net: socket: return a proper error code when source address becomes nonlocal

2016-04-04 Thread Liping Zhang
From: Liping Zhang 

1. Socket can use bind(directly) or connect(indirectly) to bind to a local
   ip address, and later if the network becomes down, that cause the source
   address becomes nonlocal, then send() call will fail and return EINVAL.
   But this error code is confusing, acctually we did not pass any invalid
   arguments. Furthermore, send() maybe return ok at first, it now returns
   fail just because of a temporary network problem, i.e. when the network
   recovery, send() call will become ok. Return EADDRNOTAVAIL instead of
   EINVAL in such situation is better.
2. We can use IPV6_PKTINFO to specify the ipv6 source address when call
   sendmsg() to send packet, but if the address is not available, call will
   fail and EINVAL is returned. This error code is not very appropriate,
   it failed maybe just because of a temporary network problem. Also
   RFC3542, section 6.6 describe an example returns EADDRNOTAVAIL:
   "ipi6_ifindex specifies an interface but the address ipi6_addr is not
   available for use on that interface.". So return EADDRNOTAVAIL instead
   of EINVAL here.

Signed-off-by: Liping Zhang 
---
 net/ipv4/route.c|6 --
 net/ipv6/datagram.c |2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c6229..857f7b3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2149,11 +2149,13 @@ struct rtable *__ip_route_output_key_hash(struct net 
*net, struct flowi4 *fl4,
 
rcu_read_lock();
if (fl4->saddr) {
-   rth = ERR_PTR(-EINVAL);
+   rth = ERR_PTR(-EADDRNOTAVAIL);
if (ipv4_is_multicast(fl4->saddr) ||
ipv4_is_lbcast(fl4->saddr) ||
-   ipv4_is_zeronet(fl4->saddr))
+   ipv4_is_zeronet(fl4->saddr)) {
+   rth = ERR_PTR(-EINVAL);
goto out;
+   }
 
/* I removed check for oif == dev_out->oif here.
   It was wrong for two reasons:
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4281621..04d62e8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -746,7 +746,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
   strict ? dev : NULL, 0) &&
!ipv6_chk_acast_addr_src(net, dev,
 
&src_info->ipi6_addr))
-   err = -EINVAL;
+   err = -EADDRNOTAVAIL;
else
fl6->saddr = src_info->ipi6_addr;
}
-- 
1.7.9.5