[PATCH libnftnl V2] expr: queue: add NFTA_QUEUE_SREG_QNUM attr support

2016-09-14 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

After adding _SREG_QNUM attr, queuenum is not must option anymore,
so we must test NFTNL_EXPR_QUEUE_NUM first before dumpping queue num
in snprintf_default. Also add a tailing space in snprintf_default,
this is consistent with other expressions.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: adjust two sreg to one sreg, keep consistent with the kernel

 include/buffer.h|  1 +
 include/libnftnl/expr.h |  1 +
 include/linux/netfilter/nf_tables.h |  2 ++
 src/expr/queue.c| 57 -
 4 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/include/buffer.h b/include/buffer.h
index a753c78..ab1d468 100644
--- a/include/buffer.h
+++ b/include/buffer.h
@@ -77,6 +77,7 @@ int nftnl_buf_reg(struct nftnl_buf *b, int type, union 
nftnl_data_reg *reg,
 #define SREG_PROTO_MIN "sreg_proto_min"
 #define SREG_KEY   "sreg_key"
 #define SREG_DATA  "sreg_data"
+#define SREG_QNUM  "sreg_qnum"
 #define SREG   "sreg"
 #define TABLE  "table"
 #define TOTAL  "total"
diff --git a/include/libnftnl/expr.h b/include/libnftnl/expr.h
index 8b35203..476f82d 100644
--- a/include/libnftnl/expr.h
+++ b/include/libnftnl/expr.h
@@ -176,6 +176,7 @@ enum {
NFTNL_EXPR_QUEUE_NUM= NFTNL_EXPR_BASE,
NFTNL_EXPR_QUEUE_TOTAL,
NFTNL_EXPR_QUEUE_FLAGS,
+   NFTNL_EXPR_QUEUE_SREG_QNUM,
 };
 
 enum {
diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index e608054..b4366f3 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -888,12 +888,14 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: 
nft_registers)
  */
 enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
+   NFTA_QUEUE_SREG_QNUM,
__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
diff --git a/src/expr/queue.c b/src/expr/queue.c
index 033c542..316a9ed 100644
--- a/src/expr/queue.c
+++ b/src/expr/queue.c
@@ -21,6 +21,7 @@
 #include 
 
 struct nftnl_expr_queue {
+   enum nft_registers  sreg_qnum;
uint16_tqueuenum;
uint16_tqueues_total;
uint16_tflags;
@@ -41,6 +42,9 @@ static int nftnl_expr_queue_set(struct nftnl_expr *e, 
uint16_t type,
case NFTNL_EXPR_QUEUE_FLAGS:
queue->flags = *((uint16_t *)data);
break;
+   case NFTNL_EXPR_QUEUE_SREG_QNUM:
+   queue->sreg_qnum = *((uint32_t *)data);
+   break;
default:
return -1;
}
@@ -63,6 +67,9 @@ nftnl_expr_queue_get(const struct nftnl_expr *e, uint16_t 
type,
case NFTNL_EXPR_QUEUE_FLAGS:
*data_len = sizeof(queue->flags);
return >flags;
+   case NFTNL_EXPR_QUEUE_SREG_QNUM:
+   *data_len = sizeof(queue->sreg_qnum);
+   return >sreg_qnum;
}
return NULL;
 }
@@ -82,6 +89,10 @@ static int nftnl_expr_queue_cb(const struct nlattr *attr, 
void *data)
if (mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
abi_breakage();
break;
+   case NFTA_QUEUE_SREG_QNUM:
+   if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
+   abi_breakage();
+   break;
}
 
tb[type] = attr;
@@ -99,6 +110,8 @@ nftnl_expr_queue_build(struct nlmsghdr *nlh, const struct 
nftnl_expr *e)
mnl_attr_put_u16(nlh, NFTA_QUEUE_TOTAL, 
htons(queue->queues_total));
if (e->flags & (1 << NFTNL_EXPR_QUEUE_FLAGS))
mnl_attr_put_u16(nlh, NFTA_QUEUE_FLAGS, htons(queue->flags));
+   if (e->flags & (1 << NFTNL_EXPR_QUEUE_SREG_QNUM))
+   mnl_attr_put_u32(nlh, NFTA_QUEUE_SREG_QNUM, 
htonl(queue->sreg_qnum));
 }
 
 static int
@@ -122,6 +135,10 @@ nftnl_expr_queue_parse(struct nftnl_expr *e, struct nlattr 
*attr)
queue->flags = ntohs(mnl_attr_get_u16(tb[NFTA_QUEUE_FLAGS]));
e->flags |= (1 << NFTNL_EXPR_QUEUE_FLAGS);
}
+   if (tb[NFTA_QUEUE_SREG_QNUM]) {
+   queue->sreg_qnum = 
ntohl(mnl_attr_get_u32(tb[NFTA_QUEUE_SREG_QNUM]));
+   e->flags |= (1 << NFTNL_EXPR_QUEUE_SREG_QNUM);
+   }
 
return 0;
 }
@@ -131,6 +148,7 @@ nftnl_expr_queue_json_parse(struc

[PATCH nf-next V2] netfilter: nft_queue: add _SREG_QNUM attr to select the queue number

2016-09-14 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Currently, the user can specify the queue numbers by _QUEUE_NUM and
_QUEUE_TOTAL attributes, this is enough in most situations.

But acctually, it is not very flexible, for example:
  tcp dport 80 mapped to queue0
  tcp dport 81 mapped to queue1
  tcp dport 82 mapped to queue2
In order to do this thing, we must add 3 nft rules, and more
mapping meant more rules ...

So take one register to select the queue number, then we can add one
simple rule to mapping queues, maybe like this:
  queue num tcp dport map { 80:0, 81:1, 82:2 ... }

Florian Westphal also proposed wider usage scenarios:
  queue num jhash ip saddr . ip daddr mod ...
  queue num meta cpu ...
  queue num meta mark ...

The last point is how to load a queue number from sreg, although we can
use *(u16*)>data[reg] to load the queue number, just like nat expr
to load its l4port do.

But we will cooperate with hash expr, meta cpu, meta mark expr and so on.
They all store the result to u32 type, so cast it to u16 pointer and
dereference it will generate wrong result in the big endian system.

So just keep it simple, we treat queue number as u32 type, although u16
type is already enough.

Suggested-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: use one sreg suggested by Florian and use select_ops to make
 code more clean suggested by Pablo

 include/uapi/linux/netfilter/nf_tables.h |   2 +
 net/netfilter/nft_queue.c| 102 +++
 2 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index bc0eb6a..17f8171 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -894,12 +894,14 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: 
nft_registers)
  */
 enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
+   NFTA_QUEUE_SREG_QNUM,
__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index d16d599..393d359 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,9 +22,10 @@
 static u32 jhash_initval __read_mostly;
 
 struct nft_queue {
-   u16 queuenum;
-   u16 queues_total;
-   u16 flags;
+   enum nft_registers  sreg_qnum:8;
+   u16 queuenum;
+   u16 queues_total;
+   u16 flags;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -54,26 +55,39 @@ static void nft_queue_eval(const struct nft_expr *expr,
regs->verdict.code = ret;
 }
 
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   struct nft_queue *priv = nft_expr_priv(expr);
+   u32 queue, ret;
+
+   queue = regs->data[priv->sreg_qnum];
+
+   ret = NF_QUEUE_NR(queue);
+   if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+   ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+   regs->verdict.code = ret;
+}
+
 static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM]= { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL]  = { .type = NLA_U16 },
[NFTA_QUEUE_FLAGS]  = { .type = NLA_U16 },
+   [NFTA_QUEUE_SREG_QNUM]  = { .type = NLA_U32 },
 };
 
 static int nft_queue_init(const struct nft_ctx *ctx,
-  const struct nft_expr *expr,
-  const struct nlattr * const tb[])
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
 {
struct nft_queue *priv = nft_expr_priv(expr);
u32 maxid;
 
-   if (tb[NFTA_QUEUE_NUM] == NULL)
-   return -EINVAL;
-
-   init_hashrandom(_initval);
priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
 
-   if (tb[NFTA_QUEUE_TOTAL] != NULL)
+   if (tb[NFTA_QUEUE_TOTAL])
priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
else
priv->queues_total = 1;
@@ -85,11 +99,34 @@ static int nft_queue_init(const struct nft_ctx *ctx,
if (maxid > U16_MAX)
return -ERANGE;
 
-   if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+   if (tb[NFTA_QUEUE_FLAGS]) {
+   priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+   if (priv->fl

Re: [PATCH nf-next] netfilter: nft_queue: add _SREG_FROM and _SRGE_TO to select the queue numbers

2016-09-13 Thread Liping Zhang
2016-09-13 17:19 GMT+08:00 Pablo Neira Ayuso <pa...@netfilter.org>:
> Hi Liping,
>
> A bit more comments on top of Florian's suggestion to use one single
> _SREG.
>
> On Sun, Sep 11, 2016 at 10:05:28PM +0800, Liping Zhang wrote:
>> diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
>>  static void nft_queue_eval(const struct nft_expr *expr,
>> @@ -32,17 +34,30 @@ static void nft_queue_eval(const struct nft_expr *expr,
>>  const struct nft_pktinfo *pkt)
>>  {
>>   struct nft_queue *priv = nft_expr_priv(expr);
>> - u32 queue = priv->queuenum;
>> + u32 queue, queues_total, queue_end;
>>   u32 ret;
>>
>> - if (priv->queues_total > 1) {
>> + if (priv->sreg_from) {
>> + queue = (u16)regs->data[priv->sreg_from];
>> + queue_end = (u16)regs->data[priv->sreg_to];
>> +
>> + if (queue_end > queue)
>> + queues_total = queue_end - queue + 1;
>> + else
>> + queues_total = 1;
>> + } else {
>> + queue = priv->queuenum;
>> + queues_total = priv->queues_total;
>> + }
>
> I suggest you use .select_ops to use a specific _eval function if
> _SREG is set. This would disentangle this code a bit.
>
>>
>> + /* nftables has no idea whether the kernel supports _SREG_FROM or not,
>> +  * so for compatibility reason, it may specify the _SREG_FROM and
>> +  * _QUEUE_NUM attributes at the same time. We prefer to use _SREG_FROM,
>> +  * it is more flexible and has less restriction, for example, queue
>> +  * range 0-65535 is ok when use _SREG_FROM and _SREG_TO.
>> +  */
>
> No need for this large comment, look.
>
> From userspace, we have to modify the nft parser to take an expression
> as 'num'. Then, from the netlink_linearize path, we can check the
> expression type:
>
> * If it's a value expression, we can simply use NFTA_QUEUE_NUM.
> * If it's a range expression, we have to set up NFTA_QUEUE_NUM and
>   NFTA_QUEUE_TOTAL.
> * If it's a mapping, then we only use _SREG.
>
> Old kernels will bail out if the mapping is used, as this is not
> supported, which is what we expect. This should be fine by now, until
> I finish the patchset to provide a VM description that userspace can
> use to generate bytecode depending on the features available in the
> kernel.
>
>>
>> @@ -97,9 +142,21 @@ static int nft_queue_dump(struct sk_buff *skb, const 
>> struct nft_expr *expr)
>>  {
>>   const struct nft_queue *priv = nft_expr_priv(expr);
>>
>> - if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) ||
>> - nla_put_be16(skb, NFTA_QUEUE_TOTAL, htons(priv->queues_total)) ||
>> - nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
>> + if (priv->sreg_from) {
>> + if (nft_dump_register(skb, NFTA_QUEUE_SREG_FROM,
>> +   priv->sreg_from))
>> + goto nla_put_failure;
>> + if (nft_dump_register(skb, NFTA_QUEUE_SREG_TO,
>> +   priv->sreg_to))
>> + goto nla_put_failure;
>> + } else {
>> + if (nla_put_be16(skb, NFTA_QUEUE_NUM, htons(priv->queuenum)) ||
>> + nla_put_be16(skb, NFTA_QUEUE_TOTAL,
>> +  htons(priv->queues_total)))
>> + goto nla_put_failure;
>> + }
>
> Looking at this, the code will look better if we should use
> .select_ops indeed.
>
> Thanks for resolving existing nft_queue limitations!

I will re-spin my patch based on your and Florian's suggestions.
Thanks Pablo.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] netfilter: nft_hash: Add hash offset value

2016-09-13 Thread Liping Zhang
Hi Laura,

2016-09-06 14:44 GMT+08:00 Laura Garcia Liebana :
>  static int nft_hash_init(const struct nft_ctx *ctx,
> @@ -60,6 +62,11 @@ static int nft_hash_init(const struct nft_ctx *ctx,
> !tb[NFTA_HASH_MODULUS])
> return -EINVAL;
>
> +   if (tb[NFTA_HASH_SUM])
> +   priv->sum = ntohl(nla_get_be32(tb[NFTA_HASH_SUM]));
> +   else
> +   priv->sum = 0;
> +
> priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
> if (priv->sreg < 0)
> return -ERANGE;
> @@ -77,6 +84,9 @@ static int nft_hash_init(const struct nft_ctx *ctx,
> if (priv->modulus <= 1)
> return -ERANGE;
>
> +   if (priv->sum + priv->modulus - 1 < U32_MAX)
> +   return -EOVERFLOW;

I think this judgement here is wrong, it is likely to be true...

When two integer a and b do addition operation, and the calculation
results satisfy the
following conditions: (a + b < a) or (a + b < b), then we can assure
that integer overflow
happened.

So the judgement should be converted to:
 if (priv->sum + priv->modulus - 1 < priv->sum)

> +
> priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
>
> return nft_validate_register_load(priv->sreg, priv->len) &&
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next] netfilter: nf_queue: get rid of dependency on IP6_NF_IPTABLES

2016-09-12 Thread Liping Zhang
2016-09-13 1:50 GMT+08:00 Pablo Neira Ayuso :
> We have nft_queue support for bridge now, but nfqueue_hash() takes a
> pkt->pf parameter expecting NFPROTO_IPV4 or NFPROTO_IPV6.
>
> So nft_queue hashing with bridge is currently broken.

Yes, I will send another patch to do this :)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nft_numgen: fix race between num generate and store it

2016-09-12 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

After we generate a new number, we still use the priv->counter and
store it to the dreg. This is not correct, another cpu may already
change it to a new number. So we must use the generated number, not
the priv->counter itself.

Fixes: 91dbc6be0a62 ("netfilter: nf_tables: add number generator expression")
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_numgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index f51a3ed..f173ebe 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -37,7 +37,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
} while (atomic_cmpxchg(>counter, oval, nval) != oval);
 
-   memcpy(>data[priv->dreg], >counter, sizeof(u32));
+   regs->data[priv->dreg] = nval;
 }
 
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft V2] src: fix compile error due to _UNTIL renamed to _MODULUS in libnftnl

2016-09-12 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

In the latest libnftnl, NFTNL_EXPR_NG_UNTIL was renamed to
NFTNL_EXPR_NG_MODULUS, so compile error happened:
  netlink_linearize.c: In function ‘netlink_gen_numgen’:
  netlink_linearize.c:184:26: error: ‘NFTNL_EXPR_NG_UNTIL’ undeclared
  (first use in this function)

Also update NFTA_NG_UNTIL to NFTA_NG_MODULUS.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: update NFTA_NG_UNTIL to NFTA_NG_MODULUS reported by Laura Garcia Liebana.

 include/linux/netfilter/nf_tables.h | 4 ++--
 src/netlink_delinearize.c   | 2 +-
 src/netlink_linearize.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index 8a63f22..1bec149 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -1126,13 +1126,13 @@ enum nft_trace_types {
  * enum nft_ng_attributes - nf_tables number generator expression netlink 
attributes
  *
  * @NFTA_NG_DREG: destination register (NLA_U32)
- * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
  * @NFTA_NG_TYPE: operation type (NLA_U32)
  */
 enum nft_ng_attributes {
NFTA_NG_UNSPEC,
NFTA_NG_DREG,
-   NFTA_NG_UNTIL,
+   NFTA_NG_MODULUS,
NFTA_NG_TYPE,
__NFTA_NG_MAX
 };
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index 05edb01..6bb27b6 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -554,7 +554,7 @@ static void netlink_parse_numgen(struct netlink_parse_ctx 
*ctx,
struct expr *expr;
 
type  = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_TYPE);
-   until = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_UNTIL);
+   until = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_MODULUS);
 
expr = numgen_expr_alloc(loc, type, until);
dreg = netlink_parse_register(nle, NFTNL_EXPR_NG_DREG);
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 5204154..558deb2 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -181,7 +181,7 @@ static void netlink_gen_numgen(struct netlink_linearize_ctx 
*ctx,
nle = alloc_nft_expr("numgen");
netlink_put_register(nle, NFTNL_EXPR_NG_DREG, dreg);
netlink_put_register(nle, NFTNL_EXPR_NG_TYPE, expr->numgen.type);
-   nftnl_expr_set_u32(nle, NFTNL_EXPR_NG_UNTIL, expr->numgen.mod);
+   nftnl_expr_set_u32(nle, NFTNL_EXPR_NG_MODULUS, expr->numgen.mod);
nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nft] src: fix compile error due to _UNTIL renamed to _MODULUS in libnftnl

2016-09-12 Thread Liping Zhang
2016-09-12 17:49 GMT+08:00 Laura Garcia <nev...@gmail.com>:
> On Sun, Sep 11, 2016 at 04:35:57PM +0800, Liping Zhang wrote:
>> From: Liping Zhang <liping.zh...@spreadtrum.com>
>>
>> In the latest libnftnl, NFTNL_EXPR_NG_UNTIL was renamed to
>> NFTNL_EXPR_NG_MODULUS, so compile error happened:
>>   netlink_linearize.c: In function ‘netlink_gen_numgen’:
>>   netlink_linearize.c:184:26: error: ‘NFTNL_EXPR_NG_UNTIL’ undeclared
>>   (first use in this function)
>>
>
> In order to complete the renaming, NFTA_NG_UNTIL should be changed to
> NFTA_NG_MODULUS as well.
>

Yes, I missed it. Although NFTA_NG_XXX is not used in nftables, but I think it's
better to do this in one single patch.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next] netfilter: nft_queue: add _SREG_FROM and _SRGE_TO to select the queue numbers

2016-09-11 Thread Liping Zhang
2016-09-12 5:12 GMT+08:00 Florian Westphal <f...@strlen.de>:
> Liping Zhang <zlpnob...@163.com> wrote:
>> So similer to nft_nat, take two registers to select the queue numbers,
>> then we can add one simple rule to mapping queues, maybe like this:
>>   queue num tcp dport map { 80:0, 81:1, 82:2 ... }
>
> I like this.
>
> My first thought was that it would be better to just support one single
> sreg (the queue number) and eventually externalize the hashing/queue
> selection:
>
> queue num jhash ip saddr . ip daddr mod ...

Sounds good.

At first, my another intention is use _SREG_FROM and _SREG_TO to replace
_QUEUE_NUM and _QUEUE_TOTAL, there's no restriction to use range 0-65535:

[ immediate reg 1 0x ]
[ immediate reg 2 0x ]
[ queue num 0 reg_from 1 reg_to 2 ]

But I think your "queue num jhash ip saddr . ip daddr mod ..." is more
flexible and
there's no restriction to use range 0-65535 too.

I agree with you, one sreg seems enough. I will send V2 later.

> Problem is that with plain jhash we won't get a symmetric hash
> for origin and reply, so for this we would need a new expression/hash
> mode.
>
> We would also need another expression to allow distribution
> starting with a queue other than 0.

I think Laura is developing this option, see
https://patchwork.ozlabs.org/patch/666334/.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH libnftnl 3/3] expr: queue: add sreg_from and sreg_to support

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Add NFTA_QUEUE_SREG_FROM and NFTA_QUEUE_SREG_TO attributes support.

After adding _SREG_FROM attr, queuenum is not must option anymore,
so we must test NFTNL_EXPR_QUEUE_NUM first before dumpping queue num
in snprintf_default.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/buffer.h|  2 +
 include/libnftnl/expr.h |  2 +
 include/linux/netfilter/nf_tables.h |  4 ++
 src/expr/queue.c| 84 +
 tests/nft-expr_queue-test.c |  8 
 5 files changed, 93 insertions(+), 7 deletions(-)

diff --git a/include/buffer.h b/include/buffer.h
index a753c78..f8884cd 100644
--- a/include/buffer.h
+++ b/include/buffer.h
@@ -77,6 +77,8 @@ int nftnl_buf_reg(struct nftnl_buf *b, int type, union 
nftnl_data_reg *reg,
 #define SREG_PROTO_MIN "sreg_proto_min"
 #define SREG_KEY   "sreg_key"
 #define SREG_DATA  "sreg_data"
+#define SREG_FROM  "sreg_from"
+#define SREG_TO"sreg_to"
 #define SREG   "sreg"
 #define TABLE  "table"
 #define TOTAL  "total"
diff --git a/include/libnftnl/expr.h b/include/libnftnl/expr.h
index 94ce529..8075d9c 100644
--- a/include/libnftnl/expr.h
+++ b/include/libnftnl/expr.h
@@ -176,6 +176,8 @@ enum {
NFTNL_EXPR_QUEUE_NUM= NFTNL_EXPR_BASE,
NFTNL_EXPR_QUEUE_TOTAL,
NFTNL_EXPR_QUEUE_FLAGS,
+   NFTNL_EXPR_QUEUE_SREG_FROM,
+   NFTNL_EXPR_QUEUE_SREG_TO,
 };
 
 enum {
diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index dd8b746..8ebd767 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -886,12 +886,16 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_FROM: source register of queue number start (NLA_U32: 
nft_registers)
+ * @NFTA_QUEUE_SREG_TO: source register of queue number end (NLA_U32: 
nft_registers)
  */
 enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
+   NFTA_QUEUE_SREG_FROM,
+   NFTA_QUEUE_SREG_TO,
__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
diff --git a/src/expr/queue.c b/src/expr/queue.c
index 033c542..01380a6 100644
--- a/src/expr/queue.c
+++ b/src/expr/queue.c
@@ -21,6 +21,8 @@
 #include 
 
 struct nftnl_expr_queue {
+   enum nft_registers  sreg_from;
+   enum nft_registers  sreg_to;
uint16_tqueuenum;
uint16_tqueues_total;
uint16_tflags;
@@ -41,6 +43,12 @@ static int nftnl_expr_queue_set(struct nftnl_expr *e, 
uint16_t type,
case NFTNL_EXPR_QUEUE_FLAGS:
queue->flags = *((uint16_t *)data);
break;
+   case NFTNL_EXPR_QUEUE_SREG_FROM:
+   queue->sreg_from = *((uint32_t *)data);
+   break;
+   case NFTNL_EXPR_QUEUE_SREG_TO:
+   queue->sreg_to = *((uint32_t *)data);
+   break;
default:
return -1;
}
@@ -63,6 +71,12 @@ nftnl_expr_queue_get(const struct nftnl_expr *e, uint16_t 
type,
case NFTNL_EXPR_QUEUE_FLAGS:
*data_len = sizeof(queue->flags);
return >flags;
+   case NFTNL_EXPR_QUEUE_SREG_FROM:
+   *data_len = sizeof(queue->sreg_from);
+   return >sreg_from;
+   case NFTNL_EXPR_QUEUE_SREG_TO:
+   *data_len = sizeof(queue->sreg_to);
+   return >sreg_to;
}
return NULL;
 }
@@ -82,6 +96,11 @@ static int nftnl_expr_queue_cb(const struct nlattr *attr, 
void *data)
if (mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
abi_breakage();
break;
+   case NFTA_QUEUE_SREG_FROM:
+   case NFTA_QUEUE_SREG_TO:
+   if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
+   abi_breakage();
+   break;
}
 
tb[type] = attr;
@@ -99,6 +118,10 @@ nftnl_expr_queue_build(struct nlmsghdr *nlh, const struct 
nftnl_expr *e)
mnl_attr_put_u16(nlh, NFTA_QUEUE_TOTAL, 
htons(queue->queues_total));
if (e->flags & (1 << NFTNL_EXPR_QUEUE_FLAGS))
mnl_attr_put_u16(nlh, NFTA_QUEUE_FLAGS, htons(queue->flags));
+   if (e->flags & (1 << NFTNL_EXPR_QUEUE_SREG_FROM))
+   mnl_attr_put_u32(nlh, NFTA_QUEUE_SREG_FROM, 
htonl(queue->sreg_from));
+   if (e->flags & (1 << NFTNL_EXPR_QUEUE_S

[PATCH libnftnl 1/3] expr: queue: remove redundant NFTNL_EXPR_QUEUE_NUM set in json parse

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We have already set NFTNL_EXPR_QUEUE_NUM when parse "num" successfully,
here is wrong and redundant, remove it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 src/expr/queue.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/expr/queue.c b/src/expr/queue.c
index 522231b..033c542 100644
--- a/src/expr/queue.c
+++ b/src/expr/queue.c
@@ -136,7 +136,6 @@ nftnl_expr_queue_json_parse(struct nftnl_expr *e, json_t 
*root,
 
if (nftnl_jansson_parse_val(root, "num", NFTNL_TYPE_U16, , err) == 
0)
nftnl_expr_set_u16(e, NFTNL_EXPR_QUEUE_NUM, type);
-   nftnl_expr_set_u16(e, NFTNL_EXPR_QUEUE_NUM, type);
 
if (nftnl_jansson_parse_val(root, "total", NFTNL_TYPE_U16, , err) 
== 0)
nftnl_expr_set_u16(e, NFTNL_EXPR_QUEUE_TOTAL, code);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH libnftnl 2/3] tests: queue: add missing NFTNL_EXPR_QUEUE_FLAGS compare test

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We forgot to compare NFTNL_EXPR_QUEUE_FLAGS between two exprs,
now add it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 tests/nft-expr_queue-test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/nft-expr_queue-test.c b/tests/nft-expr_queue-test.c
index 1cc39aa..81d7dd2 100644
--- a/tests/nft-expr_queue-test.c
+++ b/tests/nft-expr_queue-test.c
@@ -39,6 +39,9 @@ static void cmp_nftnl_expr(struct nftnl_expr *rule_a,
if (nftnl_expr_get_u16(rule_a, NFTNL_EXPR_QUEUE_TOTAL) !=
nftnl_expr_get_u16(rule_b, NFTNL_EXPR_QUEUE_TOTAL))
print_err("Expr NFTNL_EXPR_QUEUE_TOTAL mismatches");
+   if (nftnl_expr_get_u16(rule_a, NFTNL_EXPR_QUEUE_FLAGS) !=
+   nftnl_expr_get_u16(rule_b, NFTNL_EXPR_QUEUE_FLAGS))
+   print_err("Expr NFTNL_EXPR_QUEUE_FLAGS mismatches");
 }
 
 int main(int argc, char *argv[])
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH libnftnl 0/3] expr: queue: add sreg_from and sreg_to support

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

This patch sets mainly used to add NFTA_QUEUE_SREG_FROM and NFTA_QUEUE_SREG_TO
attributes support in libnftnl. Meanwhile, I find some trivial bugs exsit in
queue expr. So try to fix them at patch #1 and patch #2.

Liping Zhang (3):
  expr: queue: remove redundant NFTNL_EXPR_QUEUE_NUM set in json parse
  tests: queue: add missing NFTNL_EXPR_QUEUE_FLAGS compare test
  expr: queue: add sreg_from and sreg_to support

 include/buffer.h|  2 +
 include/libnftnl/expr.h |  2 +
 include/linux/netfilter/nf_tables.h |  4 ++
 src/expr/queue.c| 85 +
 tests/nft-expr_queue-test.c | 11 +
 5 files changed, 96 insertions(+), 8 deletions(-)

-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nft_queue: add _SREG_FROM and _SRGE_TO to select the queue numbers

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Currently, the user can specify the queue numbers by _QUEUE_NUM and
_QUEUE_TOTAL attributes, this is enough in most situations.

But acctually, it is not very flexible, for example:
  tcp dport 80 mapped to queue0
  tcp dport 81 mapped to queue1
  tcp dport 82 mapped to queue2
In order to do this thing, we must add 3 nft rules, and more
mapping means more rules ...

So similer to nft_nat, take two registers to select the queue numbers,
then we can add one simple rule to mapping queues, maybe like this:
  queue num tcp dport map { 80:0, 81:1, 82:2 ... }

Another drawback is that queue range 0-65535 is not available, because
queue_total is u16 type, range 0-65535 means queue_total is 65536, then
it is overflowed to 0. Now there's no such restriction when we use
_SREG_FROM and _SREG_TO.

For compatibility, _SREG_FROM and _QUEUE_NUM can be specified at the same
time, but we will be preferred to use _SREG_FROM attr.

Suggested-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 ++
 net/netfilter/nft_queue.c| 99 +---
 2 files changed, 82 insertions(+), 21 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 24161e2..5b4e373 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -886,12 +886,16 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_FROM: source register of queue number start (NLA_U32: 
nft_registers)
+ * @NFTA_QUEUE_SREG_TO: source register of queue number end (NLA_U32: 
nft_registers)
  */
 enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
+   NFTA_QUEUE_SREG_FROM,
+   NFTA_QUEUE_SREG_TO,
__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index d16d599..6557118 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,9 +22,11 @@
 static u32 jhash_initval __read_mostly;
 
 struct nft_queue {
-   u16 queuenum;
-   u16 queues_total;
-   u16 flags;
+   enum nft_registers  sreg_from:8;
+   enum nft_registers  sreg_to:8;
+   u16 queuenum;
+   u16 queues_total;
+   u16 flags;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -32,17 +34,30 @@ static void nft_queue_eval(const struct nft_expr *expr,
   const struct nft_pktinfo *pkt)
 {
struct nft_queue *priv = nft_expr_priv(expr);
-   u32 queue = priv->queuenum;
+   u32 queue, queues_total, queue_end;
u32 ret;
 
-   if (priv->queues_total > 1) {
+   if (priv->sreg_from) {
+   queue = (u16)regs->data[priv->sreg_from];
+   queue_end = (u16)regs->data[priv->sreg_to];
+
+   if (queue_end > queue)
+   queues_total = queue_end - queue + 1;
+   else
+   queues_total = 1;
+   } else {
+   queue = priv->queuenum;
+   queues_total = priv->queues_total;
+   }
+
+   if (queues_total > 1) {
if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
int cpu = smp_processor_id();
 
-   queue = priv->queuenum + cpu % priv->queues_total;
+   queue += cpu % queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
-priv->queues_total, pkt->pf,
+queues_total, pkt->pf,
 jhash_initval);
}
}
@@ -58,6 +73,8 @@ static const struct nla_policy 
nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM]= { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL]  = { .type = NLA_U16 },
[NFTA_QUEUE_FLAGS]  = { .type = NLA_U16 },
+   [NFTA_QUEUE_SREG_FROM]  = { .type = NLA_U32 },
+   [NFTA_QUEUE_SREG_TO]= { .type = NLA_U32 },
 };
 
 static int nft_queue_init(const struct nft_ctx *ctx,
@@ -66,30 +83,58 @@ static int nft_queue_init(const struct nft_ctx *ctx,
 {
struct nft_queue *priv = nft_expr_priv(expr);
u32 maxid;
+   int err;
 
-   if (tb[NFTA_QUEUE_NUM] == NULL)
+   if (!tb[NFTA_QUEUE_NUM] && !tb[NFTA_QUEUE_SREG_FROM])
return -EINVAL;
 
init_hashrandom(_initv

[PATCH nf-next] netfilter: nf_queue: get rid of dependency on IP6_NF_IPTABLES

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

hash_v6 is used by both nftables and ip6tables, so depend on
IP6_NF_IPTABLES is not properly.

Actually, it only parses ipv6hdr and computes a hash value, so
even if IPV6 is disabled, there's no side effect too, remove it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/net/netfilter/nf_queue.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 0dbce55..cc8a11f 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -54,7 +54,6 @@ static inline u32 hash_v4(const struct sk_buff *skb, u32 
jhash_initval)
(__force u32)iph->saddr, iph->protocol, jhash_initval);
 }
 
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
 {
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -77,7 +76,6 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 
jhash_initval)
 
return jhash_3words(a, b, c, jhash_initval);
 }
-#endif
 
 static inline u32
 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
@@ -85,10 +83,8 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 
queues_total, u8 family,
 {
if (family == NFPROTO_IPV4)
queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 
32;
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
else if (family == NFPROTO_IPV6)
queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 
32;
-#endif
 
return queue;
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] src: fix compile error due to _UNTIL renamed to _MODULUS in libnftnl

2016-09-11 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

In the latest libnftnl, NFTNL_EXPR_NG_UNTIL was renamed to
NFTNL_EXPR_NG_MODULUS, so compile error happened:
  netlink_linearize.c: In function ‘netlink_gen_numgen’:
  netlink_linearize.c:184:26: error: ‘NFTNL_EXPR_NG_UNTIL’ undeclared
  (first use in this function)

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 src/netlink_delinearize.c | 2 +-
 src/netlink_linearize.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index 05edb01..6bb27b6 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -554,7 +554,7 @@ static void netlink_parse_numgen(struct netlink_parse_ctx 
*ctx,
struct expr *expr;
 
type  = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_TYPE);
-   until = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_UNTIL);
+   until = nftnl_expr_get_u32(nle, NFTNL_EXPR_NG_MODULUS);
 
expr = numgen_expr_alloc(loc, type, until);
dreg = netlink_parse_register(nle, NFTNL_EXPR_NG_DREG);
diff --git a/src/netlink_linearize.c b/src/netlink_linearize.c
index 5204154..558deb2 100644
--- a/src/netlink_linearize.c
+++ b/src/netlink_linearize.c
@@ -181,7 +181,7 @@ static void netlink_gen_numgen(struct netlink_linearize_ctx 
*ctx,
nle = alloc_nft_expr("numgen");
netlink_put_register(nle, NFTNL_EXPR_NG_DREG, dreg);
netlink_put_register(nle, NFTNL_EXPR_NG_TYPE, expr->numgen.type);
-   nftnl_expr_set_u32(nle, NFTNL_EXPR_NG_UNTIL, expr->numgen.mod);
+   nftnl_expr_set_u32(nle, NFTNL_EXPR_NG_MODULUS, expr->numgen.mod);
nftnl_rule_add_expr(ctx->nlr, nle);
 }
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] tests: py: replace "eth0" with "lo" in dup expr tests

2016-09-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

This patch follow up on Manuel's commit a8871ba6daa0 ("tests: py: any:
Make tests more generic by using other interfaces"). The ifindex of
"eth0" is not always 1, furthermore, "eth0" maybe not exist on some
systems. So replace it with "lo" will make tests more rubost.

In other test cases, "eth0" is used by iifname or oifname, so there's no
need to convert it to "lo". Even if "eth0" is not exist, test will never
fail.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 tests/py/ip/dup.t  | 4 ++--
 tests/py/ip/dup.t.payload  | 8 
 tests/py/ip6/dup.t | 4 ++--
 tests/py/ip6/dup.t.payload | 8 
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/py/ip/dup.t b/tests/py/ip/dup.t
index 5119636..700031c 100644
--- a/tests/py/ip/dup.t
+++ b/tests/py/ip/dup.t
@@ -3,5 +3,5 @@
 *ip;test-ip4;input
 
 dup to 192.168.2.1;ok
-dup to 192.168.2.1 device "eth0";ok
-dup to ip saddr map { 192.168.2.120 : 192.168.2.1 } device "eth0";ok
+dup to 192.168.2.1 device "lo";ok
+dup to ip saddr map { 192.168.2.120 : 192.168.2.1 } device "lo";ok
diff --git a/tests/py/ip/dup.t.payload b/tests/py/ip/dup.t.payload
index 94d55c6..347dc07 100644
--- a/tests/py/ip/dup.t.payload
+++ b/tests/py/ip/dup.t.payload
@@ -3,19 +3,19 @@ ip test-ip4 test
   [ immediate reg 1 0x0102a8c0 ]
   [ dup sreg_addr 1 ]
 
-# dup to 192.168.2.1 device "eth0"
+# dup to 192.168.2.1 device "lo"
 ip test-ip4 test 
   [ immediate reg 1 0x0102a8c0 ]
-  [ immediate reg 2 0x0002 ]
+  [ immediate reg 2 0x0001 ]
   [ dup sreg_addr 1 sreg_dev 2 ]
 
-# dup to ip saddr map { 192.168.2.120 : 192.168.2.1 } device "eth0"
+# dup to ip saddr map { 192.168.2.120 : 192.168.2.1 } device "lo"
 __map%d test-ip4 b
 __map%d test-ip4 0
 element 7802a8c0  : 0102a8c0 0 [end]
 ip test-ip4 test 
   [ payload load 4b @ network header + 12 => reg 1 ]
   [ lookup reg 1 set __map%d dreg 1 ]
-  [ immediate reg 2 0x0002 ]
+  [ immediate reg 2 0x0001 ]
   [ dup sreg_addr 1 sreg_dev 2 ]
 
diff --git a/tests/py/ip6/dup.t b/tests/py/ip6/dup.t
index f3c8c83..72cc5d5 100644
--- a/tests/py/ip6/dup.t
+++ b/tests/py/ip6/dup.t
@@ -3,5 +3,5 @@
 *ip6;test-ip6;input
 
 dup to abcd::1;ok
-dup to abcd::1 device "eth0";ok
-dup to ip6 saddr map { abcd::1 : cafe::cafe } device "eth0";ok
+dup to abcd::1 device "lo";ok
+dup to ip6 saddr map { abcd::1 : cafe::cafe } device "lo";ok
diff --git a/tests/py/ip6/dup.t.payload b/tests/py/ip6/dup.t.payload
index 30015cc..c441d4b 100644
--- a/tests/py/ip6/dup.t.payload
+++ b/tests/py/ip6/dup.t.payload
@@ -3,19 +3,19 @@ ip6 test test
   [ immediate reg 1 0xcdab 0x 0x 0x0100 ]
   [ dup sreg_addr 1 ]
 
-# dup to abcd::1 device "eth0"
+# dup to abcd::1 device "lo"
 ip6 test test 
   [ immediate reg 1 0xcdab 0x 0x 0x0100 ]
-  [ immediate reg 2 0x0002 ]
+  [ immediate reg 2 0x0001 ]
   [ dup sreg_addr 1 sreg_dev 2 ]
 
-# dup to ip6 saddr map { abcd::1 : cafe::cafe } device "eth0"
+# dup to ip6 saddr map { abcd::1 : cafe::cafe } device "lo"
 __map%d test-ip6 b
 __map%d test-ip6 0
 element cdab   0100  : feca  
 feca 0 [end]
 ip6 test-ip6 input 
   [ payload load 16b @ network header + 8 => reg 1 ]
   [ lookup reg 1 set __map%d dreg 1 ]
-  [ immediate reg 2 0x0002 ]
+  [ immediate reg 2 0x0001 ]
   [ dup sreg_addr 1 sreg_dev 2 ]
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next] netfilter: nft_queue: check the validation of queues_total and queuenum

2016-09-09 Thread Liping Zhang
2016-09-09 22:04 GMT+08:00 Pablo Neira Ayuso :
> More comments on things I see on nft_queue at this stage:
>
> 1) Another issue, I can see nfqueue_hash() depends on
> CONFIG_IP6_NF_IPTABLES, this is not good since nft_queue
> infrastructure should not depend on iptables. Probably making this
> dependent of CONFIG_IPV6 is enough, unless you find anything better.

Yes, I can send a patch to improve this.

> 2) It would be good if nft_queue takes a _SREG_FROM and _SREG_TO to
> select the queue numbers, in a similar fashion to nft_nat. The idea is
> that we allow plugging nft_queue into nftables mapping, currently this
> is not working given that the queue number is passed as an attribute
> that contains the value.

I will have a look into this. I think _SREG_FROM and _SREG_TO
stands for [startqnum:endqnum], currently queuenum and queue_total
has a restriction, see below.

> 3) It would be good to add py tests with larger range. I remember that
> the range 1-65535 currently doesn't work in both nft_queue and
> xt_NFQUEUE because the queue_total arithmetics are not right.

Cover with large range is necessary. Yes, I find that range 0-65535(not 1-65535)
currently doesn't work in both. This is because queue_total is U16 type,
but 0-65535 means queue_total is 65536, this is overflow, so queue_total is
treated as zero.

A workaround method is that when the queue_total is 0, we treat it as 65536,
but it's a little ugly and tricky. But if we use [startqnum:endqnum],
this problem
will be not exist.

I think for iptables, it's simple, we can introduce a new revision 4
to fix this problem.

But for nftables, if we just convert to use the new attribute _SREG_FROM and
_SREG_TO, when the user use the new nftables and the old kernel, queue expr
cannot work well. It seems that we must handle the compatibility in
the user space
too.

Thanks Pablo!
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nft_chain_route: re-route before skb is queued to userspace

2016-09-06 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Imagine such situation, user add the following nft rules, and queue
the packets to userspace for further check:
  # ip rule add fwmark 0x0/0x1 lookup eth0
  # ip rule add fwmark 0x1/0x1 lookup eth1
  # nft add table filter
  # nft add chain filter output {type route hook output priority 0 \;}
  # nft add rule filter output mark set 0x1
  # nft add rule filter output queue num 0

But after we reinject the skbuff, the packet will be sent via the
wrong route, i.e. in this case, the packet will be routed via eth0
table, not eth1 table. Because we skip to do re-route when verdict
is NF_QUEUE, even if the mark was changed.

Acctually, we should not touch sk_buff if verdict is NF_DROP or
NF_STOLEN, and when re-route fails, return NF_DROP with error code.
This is consistent with the mangle table in iptables.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/ipv4/netfilter/nft_chain_route_ipv4.c | 11 +++
 net/ipv6/netfilter/nft_chain_route_ipv6.c | 10 +++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c 
b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 2375b0a..30493be 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv,
__be32 saddr, daddr;
u_int8_t tos;
const struct iphdr *iph;
+   int err;
 
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv,
tos = iph->tos;
 
ret = nft_do_chain(, priv);
-   if (ret != NF_DROP && ret != NF_QUEUE) {
+   if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
 
if (iph->saddr != saddr ||
iph->daddr != daddr ||
skb->mark != mark ||
-   iph->tos != tos)
-   if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
-   ret = NF_DROP;
+   iph->tos != tos) {
+   err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+   if (err < 0)
+   ret = NF_DROP_ERR(err);
+   }
}
return ret;
 }
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c 
b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 71d995f..2535223 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv,
struct in6_addr saddr, daddr;
u_int8_t hop_limit;
u32 mark, flowlabel;
+   int err;
 
/* malformed packet, drop it */
if (nft_set_pktinfo_ipv6(, skb, state) < 0)
@@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv,
flowlabel = *((u32 *)ipv6_hdr(skb));
 
ret = nft_do_chain(, priv);
-   if (ret != NF_DROP && ret != NF_QUEUE &&
+   if (ret != NF_DROP && ret != NF_STOLEN &&
(memcmp(_hdr(skb)->saddr, , sizeof(saddr)) ||
 memcmp(_hdr(skb)->daddr, , sizeof(daddr)) ||
 skb->mark != mark ||
 ipv6_hdr(skb)->hop_limit != hop_limit ||
-flowlabel != *((u_int32_t *)ipv6_hdr(skb
-   return ip6_route_me_harder(state->net, skb) == 0 ? ret : 
NF_DROP;
+flowlabel != *((u_int32_t *)ipv6_hdr(skb {
+   err = ip6_route_me_harder(state->net, skb);
+   if (err < 0)
+   ret = NF_DROP_ERR(err);
+   }
 
return ret;
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nf_tables_trace: fix endiness when dump chain policy

2016-09-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

NFTA_TRACE_POLICY attribute is big endian, but we forget to call
htonl to convert it. Fortunately, this attribute is parsed as big
endian in libnftnl.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 39eb1cc..fa24a5b 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -237,7 +237,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
break;
case NFT_TRACETYPE_POLICY:
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
-info->basechain->policy))
+htonl(info->basechain->policy)))
goto nla_put_failure;
break;
}
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libip6t_SNAT/DNAT: add square bracket in xlat output when port is specified

2016-09-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

It is better to add square brackets to ip6 address in nft translation
output when the port is specified. This is keep consistent with the
nft syntax.

Before this patch:
  # ip6tables-translate -t nat -A OUTPUT -p tcp -j DNAT --to-destination \
  [123::4]:1
  nft add rule ip6 nat OUTPUT meta l4proto tcp counter dnat to 123::4 :1
  # ip6tables-translate -t nat -A POSTROUTING -p tcp -j SNAT --to-source \
  [123::4-123::8]:1
  nft add rule ip6 nat POSTROUTING meta l4proto tcp counter snat to 
123::4-123::8 :1

Apply this patch:
  # ip6tables-translate -t nat -A OUTPUT -p tcp -j DNAT --to-destination \
  [123::4]:1
  nft add rule ip6 nat OUTPUT meta l4proto tcp counter dnat to [123::4]:1
  # ip6tables-translate -t nat -A POSTROUTING -p tcp -j SNAT --to-source \
  [123::4-123::8]:1
  nft add rule ip6 nat POSTROUTING meta l4proto tcp counter snat to 
[123::4]-[123::8]:1

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libip6t_DNAT.c | 21 ++---
 extensions/libip6t_SNAT.c | 21 ++---
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/extensions/libip6t_DNAT.c b/extensions/libip6t_DNAT.c
index 97a8b1c..08d920d 100644
--- a/extensions/libip6t_DNAT.c
+++ b/extensions/libip6t_DNAT.c
@@ -234,17 +234,24 @@ static void DNAT_save(const void *ip, const struct 
xt_entry_target *target)
 static void print_range_xlate(const struct nf_nat_range *range,
  struct xt_xlate *xl)
 {
+   bool proto_specified = range->flags & NF_NAT_RANGE_PROTO_SPECIFIED;
+
if (range->flags & NF_NAT_RANGE_MAP_IPS) {
-   xt_xlate_add(xl, "%s",
-  xtables_ip6addr_to_numeric(>min_addr.in6));
+   xt_xlate_add(xl, "%s%s%s",
+proto_specified ? "[" : "",
+xtables_ip6addr_to_numeric(>min_addr.in6),
+proto_specified ? "]" : "");
 
if (memcmp(>min_addr, >max_addr,
-  sizeof(range->min_addr)))
-   xt_xlate_add(xl, "-%s",
-xtables_ip6addr_to_numeric(>max_addr.in6));
+  sizeof(range->min_addr))) {
+   xt_xlate_add(xl, "-%s%s%s",
+proto_specified ? "[" : "",
+
xtables_ip6addr_to_numeric(>max_addr.in6),
+proto_specified ? "]" : "");
+   }
}
-   if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-   xt_xlate_add(xl, " :%hu", ntohs(range->min_proto.tcp.port));
+   if (proto_specified) {
+   xt_xlate_add(xl, ":%hu", ntohs(range->min_proto.tcp.port));
 
if (range->max_proto.tcp.port != range->min_proto.tcp.port)
xt_xlate_add(xl, "-%hu",
diff --git a/extensions/libip6t_SNAT.c b/extensions/libip6t_SNAT.c
index c3d8190..671ac61 100644
--- a/extensions/libip6t_SNAT.c
+++ b/extensions/libip6t_SNAT.c
@@ -244,17 +244,24 @@ static void SNAT_save(const void *ip, const struct 
xt_entry_target *target)
 static void print_range_xlate(const struct nf_nat_range *range,
  struct xt_xlate *xl)
 {
+   bool proto_specified = range->flags & NF_NAT_RANGE_PROTO_SPECIFIED;
+
if (range->flags & NF_NAT_RANGE_MAP_IPS) {
-   xt_xlate_add(xl, "%s",
-  xtables_ip6addr_to_numeric(>min_addr.in6));
+   xt_xlate_add(xl, "%s%s%s",
+proto_specified ? "[" : "",
+xtables_ip6addr_to_numeric(>min_addr.in6),
+proto_specified ? "]" : "");
 
if (memcmp(>min_addr, >max_addr,
-  sizeof(range->min_addr)))
-   xt_xlate_add(xl, "-%s",
-xtables_ip6addr_to_numeric(>max_addr.in6));
+  sizeof(range->min_addr))) {
+   xt_xlate_add(xl, "-%s%s%s",
+proto_specified ? "[" : "",
+
xtables_ip6addr_to_numeric(>max_addr.in6),
+proto_specified ? "]" : "");
+   }
}
-   if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-   xt_xlate_add(xl, " :%hu", ntohs(range->min_proto.tcp.port));
+   if (proto_specified) {
+   xt_xlate_add(xl, ":%hu", ntohs(range->min_proto.tcp.port));
 
if (range->max_proto.tcp.port != range->min_proto.tcp.port)
xt_xlate_add(xl, "-%hu",
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nf_tables_netdev: remove redundant ip_hdr assignment

2016-08-28 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We have already use skb_header_pointer to get the ip header pointer,
so there's no need to use ip_hdr again. Moreover, in NETDEV INGRESS
hook, ip header maybe not linear, so use ip_hdr is not appropriate,
remove it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_netdev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 5eefe4a..75d696f 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -30,7 +30,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
if (!iph)
return;
 
-   iph = ip_hdr(skb);
if (iph->ihl < 5 || iph->version != 4)
return;
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables 3/3] extensions: libip[6]t_REDIRECT: use new nft syntax when do xlate

2016-08-28 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

After commit "parser_bison: redirect to :port for consistency with
nat/masq statement" in nftables tree, we should recommend the end
user to use the new syntax.

Before this patch:
  # iptables-translate -t nat -A PREROUTING -p tcp -j REDIRECT --to-ports 1
  nft add rule ip nat PREROUTING ip protocol tcp counter redirect to 1

Apply this patch:
  # iptables-translate -t nat -A PREROUTING -p tcp -j REDIRECT --to-ports 1
  nft add rule ip nat PREROUTING ip protocol tcp counter redirect to :1

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libip6t_REDIRECT.c | 2 +-
 extensions/libipt_REDIRECT.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/libip6t_REDIRECT.c b/extensions/libip6t_REDIRECT.c
index 32f85b9..8e04d2c 100644
--- a/extensions/libip6t_REDIRECT.c
+++ b/extensions/libip6t_REDIRECT.c
@@ -138,7 +138,7 @@ static int REDIRECT_xlate(struct xt_xlate *xl,
const struct nf_nat_range *range = (const void *)params->target->data;
 
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-   xt_xlate_add(xl, "redirect to %hu",
+   xt_xlate_add(xl, "redirect to :%hu",
   ntohs(range->min_proto.tcp.port));
if (range->max_proto.tcp.port != range->min_proto.tcp.port)
xt_xlate_add(xl, "-%hu ",
diff --git a/extensions/libipt_REDIRECT.c b/extensions/libipt_REDIRECT.c
index 31ca88c..7850306 100644
--- a/extensions/libipt_REDIRECT.c
+++ b/extensions/libipt_REDIRECT.c
@@ -143,7 +143,7 @@ static int REDIRECT_xlate(struct xt_xlate *xl,
const struct nf_nat_ipv4_range *r = >range[0];
 
if (r->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-   xt_xlate_add(xl, "redirect to %hu", ntohs(r->min.tcp.port));
+   xt_xlate_add(xl, "redirect to :%hu", ntohs(r->min.tcp.port));
if (r->max.tcp.port != r->min.tcp.port)
xt_xlate_add(xl, "-%hu ", ntohs(r->max.tcp.port));
if (mr->range[0].flags & NF_NAT_RANGE_PROTO_RANDOM)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables 2/3] extensions: libip[6]t_SNAT/DNAT: use the new nft syntax when do xlate

2016-08-28 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

After commit "src: add 'to' for snat and dnat" in nftables tree,
we should recommend the end user to use the new syntax.

Before this patch:
  # iptables-translate -t nat -A POSTROUTING -j SNAT --to-source 1.1.1.1
  nft add rule ip nat POSTROUTING counter snat 1.1.1.1
  # ip6tables-translate -t nat -A PREROUTING -j DNAT --to-destination
  2001::1
  nft add rule ip6 nat PREROUTING counter dnat 2001::1

Apply this patch:
  # iptables-translate -t nat -A POSTROUTING -j SNAT --to-source 1.1.1.1
  nft add rule ip nat POSTROUTING counter snat to 1.1.1.1
  # ip6tables-translate -t nat -A PREROUTING -j DNAT --to-destination
  2001::1
  nft add rule ip6 nat PREROUTING counter dnat to 2001::1

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libip6t_DNAT.c | 2 +-
 extensions/libip6t_SNAT.c | 2 +-
 extensions/libipt_DNAT.c  | 2 +-
 extensions/libipt_SNAT.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/extensions/libip6t_DNAT.c b/extensions/libip6t_DNAT.c
index 3925c3b..97a8b1c 100644
--- a/extensions/libip6t_DNAT.c
+++ b/extensions/libip6t_DNAT.c
@@ -259,7 +259,7 @@ static int DNAT_xlate(struct xt_xlate *xl,
bool sep_need = false;
const char *sep = " ";
 
-   xt_xlate_add(xl, "dnat ");
+   xt_xlate_add(xl, "dnat to ");
print_range_xlate(range, xl);
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
xt_xlate_add(xl, " random");
diff --git a/extensions/libip6t_SNAT.c b/extensions/libip6t_SNAT.c
index 4d742ea..c3d8190 100644
--- a/extensions/libip6t_SNAT.c
+++ b/extensions/libip6t_SNAT.c
@@ -269,7 +269,7 @@ static int SNAT_xlate(struct xt_xlate *xl,
bool sep_need = false;
const char *sep = " ";
 
-   xt_xlate_add(xl, "snat ");
+   xt_xlate_add(xl, "snat to ");
print_range_xlate(range, xl);
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
xt_xlate_add(xl, " random");
diff --git a/extensions/libipt_DNAT.c b/extensions/libipt_DNAT.c
index 7890719..a14d16f 100644
--- a/extensions/libipt_DNAT.c
+++ b/extensions/libipt_DNAT.c
@@ -271,7 +271,7 @@ static int DNAT_xlate(struct xt_xlate *xl,
const char *sep = " ";
 
for (i = 0; i < info->mr.rangesize; i++) {
-   xt_xlate_add(xl, "dnat ");
+   xt_xlate_add(xl, "dnat to ");
print_range_xlate(>mr.range[i], xl);
if (info->mr.range[i].flags & NF_NAT_RANGE_PROTO_RANDOM) {
xt_xlate_add(xl, " random");
diff --git a/extensions/libipt_SNAT.c b/extensions/libipt_SNAT.c
index 5c699d3..e92d811 100644
--- a/extensions/libipt_SNAT.c
+++ b/extensions/libipt_SNAT.c
@@ -282,7 +282,7 @@ static int SNAT_xlate(struct xt_xlate *xl,
const char *sep = " ";
 
for (i = 0; i < info->mr.rangesize; i++) {
-   xt_xlate_add(xl, "snat ");
+   xt_xlate_add(xl, "snat to ");
print_range_xlate(>mr.range[i], xl);
if (info->mr.range[i].flags & NF_NAT_RANGE_PROTO_RANDOM) {
xt_xlate_add(xl, " random");
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables 1/3] extensions: libipt_DNAT/SNAT: fix "OOM" when do translation to nft

2016-08-28 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When I want to translate SNAT target to nft rule, an error message
was printed out:
  # iptables-translate -A POSTROUTING -j SNAT --to-source 1.1.1.1
  iptables-translate v1.6.0: OOM

Because ipt_natinfo{} started with a xt_entry_target{}, so when we
get the ipt_natinfo pointer, we should use the target itself,
not its data pointer. Yes, it is a little tricky and it's different
with other targets.

Fixes: 7a0992da44cf ("src: introduce struct xt_xlate_{mt,tg}_params")
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libipt_DNAT.c | 2 +-
 extensions/libipt_SNAT.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/libipt_DNAT.c b/extensions/libipt_DNAT.c
index c463f07..7890719 100644
--- a/extensions/libipt_DNAT.c
+++ b/extensions/libipt_DNAT.c
@@ -265,7 +265,7 @@ static void print_range_xlate(const struct 
nf_nat_ipv4_range *r,
 static int DNAT_xlate(struct xt_xlate *xl,
  const struct xt_xlate_tg_params *params)
 {
-   const struct ipt_natinfo *info = (const void *)params->target->data;
+   const struct ipt_natinfo *info = (const void *)params->target;
unsigned int i = 0;
bool sep_need = false;
const char *sep = " ";
diff --git a/extensions/libipt_SNAT.c b/extensions/libipt_SNAT.c
index 71717fd..5c699d3 100644
--- a/extensions/libipt_SNAT.c
+++ b/extensions/libipt_SNAT.c
@@ -276,7 +276,7 @@ static void print_range_xlate(const struct 
nf_nat_ipv4_range *r,
 static int SNAT_xlate(struct xt_xlate *xl,
  const struct xt_xlate_tg_params *params)
 {
-   const struct ipt_natinfo *info = (const void *)params->target->data;
+   const struct ipt_natinfo *info = (const void *)params->target;
unsigned int i = 0;
bool sep_need = false;
const char *sep = " ";
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] meta: fix memory leak in tc classid parser

2016-08-28 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We forgot to free the str which was allocated by xstrdup,
so memory leak will happen.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 src/meta.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/meta.c b/src/meta.c
index 5a6fee5..87eafee 100644
--- a/src/meta.c
+++ b/src/meta.c
@@ -90,7 +90,7 @@ static struct error_record *tchandle_type_parse(const struct 
expr *sym,
struct expr **res)
 {
uint32_t handle;
-   char *str;
+   char *str = NULL;
 
if (strcmp(sym->identifier, "root") == 0)
handle = TC_H_ROOT;
@@ -127,6 +127,7 @@ static struct error_record *tchandle_type_parse(const 
struct expr *sym,
handle = strtoull(sym->identifier, NULL, 0);
}
 out:
+   xfree(str);
*res = constant_expr_alloc(>location, sym->dtype,
   BYTEORDER_HOST_ENDIAN,
   sizeof(handle) * BITS_PER_BYTE, );
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] tests: shell: add testcase for reject expr

2016-08-22 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Reject expr is only valid in input/forward/output chain,
and if user can add reject expr in prerouting chain, kernel
panic will happen.

So add a simple test case to cover this situation.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 tests/shell/testcases/chains/0012reject_in_prerouting_1 | 9 +
 1 file changed, 9 insertions(+)
 create mode 100755 tests/shell/testcases/chains/0012reject_in_prerouting_1

diff --git a/tests/shell/testcases/chains/0012reject_in_prerouting_1 
b/tests/shell/testcases/chains/0012reject_in_prerouting_1
new file mode 100755
index 000..81cda0c
--- /dev/null
+++ b/tests/shell/testcases/chains/0012reject_in_prerouting_1
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -e
+
+$NFT add table t
+$NFT add chain t prerouting {type filter hook prerouting priority 0 \; }
+# wrong hook prerouting, only input/forward/output is valid
+$NFT add rule t prerouting reject 2>/dev/null
+echo "E: accepted reject in prerouting hook" >&2
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nft_meta: improve the validity check of pkttype set expr

2016-08-22 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

"meta pkttype set" is only supported on prerouting chain with bridge
family and ingress chain with netdev family.

But the validate check is incomplete, and the user can add the nft
rules on input chain with bridge family, for example:
  # nft add table bridge filter
  # nft add chain bridge filter input {type filter hook input \
priority 0 \;}
  # nft add chain bridge filter test
  # nft add rule bridge filter test meta pkttype set unicast
  # nft add rule bridge filter input jump test

This patch fixes the problem.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/net/netfilter/nft_meta.h   |  4 
 net/bridge/netfilter/nft_meta_bridge.c |  1 +
 net/netfilter/nft_meta.c   | 17 +
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index d27588c..1139cde 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -36,4 +36,8 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 void nft_meta_set_destroy(const struct nft_ctx *ctx,
  const struct nft_expr *expr);
 
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data);
+
 #endif
diff --git a/net/bridge/netfilter/nft_meta_bridge.c 
b/net/bridge/netfilter/nft_meta_bridge.c
index 4b901d9..ad47a92 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -86,6 +86,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.init   = nft_meta_set_init,
.destroy= nft_meta_set_destroy,
.dump   = nft_meta_set_dump,
+   .validate   = nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 2863f34..8a6bc76 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -291,10 +291,16 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 }
 EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
-static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
 {
+   struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
 
+   if (priv->key != NFT_META_PKTTYPE)
+   return 0;
+
switch (ctx->afi->family) {
case NFPROTO_BRIDGE:
hooks = 1 << NF_BR_PRE_ROUTING;
@@ -308,6 +314,7 @@ static int nft_meta_set_init_pkttype(const struct nft_ctx 
*ctx)
 
return nft_chain_validate_hooks(ctx->chain, hooks);
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
 
 int nft_meta_set_init(const struct nft_ctx *ctx,
  const struct nft_expr *expr,
@@ -327,15 +334,16 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
len = sizeof(u8);
break;
case NFT_META_PKTTYPE:
-   err = nft_meta_set_init_pkttype(ctx);
-   if (err)
-   return err;
len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
 
+   err = nft_meta_set_validate(ctx, expr, NULL);
+   if (err < 0)
+   return err;
+
priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -407,6 +415,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.init   = nft_meta_set_init,
.destroy= nft_meta_set_destroy,
.dump   = nft_meta_set_dump,
+   .validate   = nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 1/3] netfilter: nfnetlink: use list_for_each_entry_safe to delete all objects

2016-08-22 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

cttimeout and acct objects are deleted from the list while traversing
it, so use list_for_each_entry is unsafe here.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_acct.c  | 6 +++---
 net/netfilter/nfnetlink_cttimeout.c | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 70eb2f6a..d44d89b 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -343,12 +343,12 @@ static int nfnl_acct_del(struct net *net, struct sock 
*nfnl,
 struct sk_buff *skb, const struct nlmsghdr *nlh,
 const struct nlattr * const tb[])
 {
-   char *acct_name;
-   struct nf_acct *cur;
+   struct nf_acct *cur, *tmp;
int ret = -ENOENT;
+   char *acct_name;
 
if (!tb[NFACCT_NAME]) {
-   list_for_each_entry(cur, >nfnl_acct_list, head)
+   list_for_each_entry_safe(cur, tmp, >nfnl_acct_list, head)
nfnl_acct_try_del(cur);
 
return 0;
diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 68216cd..f74fee1 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -350,12 +350,13 @@ static int cttimeout_del_timeout(struct net *net, struct 
sock *ctnl,
 const struct nlmsghdr *nlh,
 const struct nlattr * const cda[])
 {
-   struct ctnl_timeout *cur;
+   struct ctnl_timeout *cur, *tmp;
int ret = -ENOENT;
char *name;
 
if (!cda[CTA_TIMEOUT_NAME]) {
-   list_for_each_entry(cur, >nfct_timeout_list, head)
+   list_for_each_entry_safe(cur, tmp, >nfct_timeout_list,
+head)
ctnl_timeout_try_del(net, cur);
 
return 0;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 3/3] netfilter: cttimeout: unlink timeout objs in the unconfirmed ct lists

2016-08-22 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

KASAN reported this bug:
  BUG: KASAN: use-after-free in icmp_packet+0x25/0x50 [nf_conntrack_ipv4] at
  addr 880002db08c8
  Read of size 4 by task lt-nf-queue/19041
  Call Trace:
[] dump_stack+0x63/0x88
  [] kasan_report_error+0x528/0x560
  [] kasan_report+0x58/0x60
  [] ? icmp_packet+0x25/0x50 [nf_conntrack_ipv4]
  [] __asan_load4+0x61/0x80
  [] icmp_packet+0x25/0x50 [nf_conntrack_ipv4]
  [] nf_conntrack_in+0x550/0x980 [nf_conntrack]
  [] ? __nf_conntrack_confirm+0xb10/0xb10 [nf_conntrack]
  [ ... ]

The main reason is that we missed to unlink the timeout objects in the
unconfirmed ct lists, so we will access the timeout objects that have
already been freed.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_cttimeout.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 6844c7a..139e086 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -302,7 +302,16 @@ static void ctnl_untimeout(struct net *net, struct 
ctnl_timeout *timeout)
const struct hlist_nulls_node *nn;
unsigned int last_hsize;
spinlock_t *lock;
-   int i;
+   int i, cpu;
+
+   for_each_possible_cpu(cpu) {
+   struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+   spin_lock_bh(>lock);
+   hlist_nulls_for_each_entry(h, nn, >unconfirmed, hnnode)
+   untimeout(h, timeout);
+   spin_unlock_bh(>lock);
+   }
 
local_bh_disable();
 restart:
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 2/3] netfilter: cttimeout: put back l4proto when replacing timeout policy

2016-08-22 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We forget to call nf_ct_l4proto_put when replacing the existing
timeout policy. Acctually, there's no need to get ct l4proto
before doing replace, so we can move it to a later position.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_cttimeout.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index f74fee1..6844c7a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -98,31 +98,28 @@ static int cttimeout_new_timeout(struct net *net, struct 
sock *ctnl,
break;
}
 
-   l4proto = nf_ct_l4proto_find_get(l3num, l4num);
-
-   /* This protocol is not supportted, skip. */
-   if (l4proto->l4proto != l4num) {
-   ret = -EOPNOTSUPP;
-   goto err_proto_put;
-   }
-
if (matching) {
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
/* You cannot replace one timeout policy by another of
 * different kind, sorry.
 */
if (matching->l3num != l3num ||
-   matching->l4proto->l4proto != l4num) {
-   ret = -EINVAL;
-   goto err_proto_put;
-   }
-
-   ret = ctnl_timeout_parse_policy(>data,
-   l4proto, net,
-   cda[CTA_TIMEOUT_DATA]);
-   return ret;
+   matching->l4proto->l4proto != l4num)
+   return -EINVAL;
+
+   return ctnl_timeout_parse_policy(>data,
+matching->l4proto, net,
+cda[CTA_TIMEOUT_DATA]);
}
-   ret = -EBUSY;
+
+   return -EBUSY;
+   }
+
+   l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+   /* This protocol is not supportted, skip. */
+   if (l4proto->l4proto != l4num) {
+   ret = -EOPNOTSUPP;
goto err_proto_put;
}
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nft_reject: restrict to INPUT/FORWARD/OUTPUT

2016-08-21 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

After I add the nft rule "nft add rule filter prerouting reject
with tcp reset", kernel panic happened on my system:
  NULL pointer dereference at ...
  IP: [] nf_send_reset+0xaf/0x400
  Call Trace:
  [] ? nf_reject_ip_tcphdr_get+0x160/0x160
  [] nft_reject_ipv4_eval+0x61/0xb0 [nft_reject_ipv4]
  [] nft_do_chain+0x1fa/0x890 [nf_tables]
  [] ? __nft_trace_packet+0x170/0x170 [nf_tables]
  [] ? nf_ct_invert_tuple+0xb0/0xc0 [nf_conntrack]
  [] ? nf_nat_setup_info+0x5d4/0x650 [nf_nat]
  [...]

Because in the PREROUTING chain, routing information is not exist,
then we will dereference the NULL pointer and oops happen.

So we restrict reject expression to INPUT, FORWARD and OUTPUT chain.
This is consistent with iptables REJECT target.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/net/netfilter/nft_reject.h   |  4 
 net/ipv4/netfilter/nft_reject_ipv4.c |  1 +
 net/ipv6/netfilter/nft_reject_ipv6.c |  1 +
 net/netfilter/nft_reject.c   | 16 
 net/netfilter/nft_reject_inet.c  |  7 ++-
 5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nft_reject.h 
b/include/net/netfilter/nft_reject.h
index 60fa153..02e28c5 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -8,6 +8,10 @@ struct nft_reject {
 
 extern const struct nla_policy nft_reject_policy[];
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+   const struct nft_expr *expr,
+   const struct nft_data **data);
+
 int nft_reject_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c 
b/net/ipv4/netfilter/nft_reject_ipv4.c
index c24f41c..2c2553b 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = {
.eval   = nft_reject_ipv4_eval,
.init   = nft_reject_init,
.dump   = nft_reject_dump,
+   .validate   = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c 
b/net/ipv6/netfilter/nft_reject_ipv6.c
index 533cd57..92bda99 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -47,6 +47,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = {
.eval   = nft_reject_ipv6_eval,
.init   = nft_reject_init,
.dump   = nft_reject_dump,
+   .validate   = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 0522fc9..c64de3f7 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -26,11 +26,27 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 
1] = {
 };
 EXPORT_SYMBOL_GPL(nft_reject_policy);
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+   const struct nft_expr *expr,
+   const struct nft_data **data)
+{
+   return nft_chain_validate_hooks(ctx->chain,
+   (1 << NF_INET_LOCAL_IN) |
+   (1 << NF_INET_FORWARD) |
+   (1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_reject_validate);
+
 int nft_reject_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
 {
struct nft_reject *priv = nft_expr_priv(expr);
+   int err;
+
+   err = nft_reject_validate(ctx, expr, NULL);
+   if (err < 0)
+   return err;
 
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 759ca52..e79d9ca 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,7 +66,11 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
 {
struct nft_reject *priv = nft_expr_priv(expr);
-   int icmp_code;
+   int icmp_code, err;
+
+   err = nft_reject_validate(ctx, expr, NULL);
+   if (err < 0)
+   return err;
 
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
@@ -124,6 +128,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
.eval   = nft_reject_inet_eval,
.init   = nft_reject_inet_init,
.dump   = nft_reject_inet_dump,
+   .validate   = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_inet_type __read_mostly

[PATCH iptables] extensions: libxt_CLASSIFY: Add translation to nft

2016-08-21 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

For examples:
  # iptables-translate -A OUTPUT -j CLASSIFY --set-class 0:0
  nft add rule ip filter OUTPUT counter meta priority set none
  # iptables-translate -A OUTPUT -j CLASSIFY --set-class :
  nft add rule ip filter OUTPUT counter meta priority set root
  # iptables-translate -A OUTPUT -j CLASSIFY --set-class 1:234
  nft add rule ip filter OUTPUT counter meta priority set 1:234

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_CLASSIFY.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/extensions/libxt_CLASSIFY.c b/extensions/libxt_CLASSIFY.c
index cd016d8..ba88f75 100644
--- a/extensions/libxt_CLASSIFY.c
+++ b/extensions/libxt_CLASSIFY.c
@@ -80,6 +80,31 @@ arpCLASSIFY_print(const void *ip, const struct 
xt_entry_target *target,
CLASSIFY_save(ip, target);
 }
 
+static int CLASSIFY_xlate(struct xt_xlate *xl,
+ const struct xt_xlate_tg_params *params)
+{
+   const struct xt_classify_target_info *clinfo =
+   (const struct xt_classify_target_info *)params->target->data;
+   __u32 handle = clinfo->priority;
+
+   xt_xlate_add(xl, "meta priority set ");
+
+   switch (handle) {
+   case TC_H_ROOT:
+   xt_xlate_add(xl, "root");
+   break;
+   case TC_H_UNSPEC:
+   xt_xlate_add(xl, "none");
+   break;
+   default:
+   xt_xlate_add(xl, "%0x:%0x", TC_H_MAJ(handle) >> 16,
+TC_H_MIN(handle));
+   break;
+   }
+
+   return 1;
+}
+
 static struct xtables_target classify_target[] = {
{
.family = NFPROTO_UNSPEC,
@@ -92,6 +117,7 @@ static struct xtables_target classify_target[] = {
.save   = CLASSIFY_save,
.x6_parse   = CLASSIFY_parse,
.x6_options = CLASSIFY_opts,
+   .xlate  = CLASSIFY_xlate,
},
{
.family = NFPROTO_ARP,
@@ -103,6 +129,7 @@ static struct xtables_target classify_target[] = {
.print  = arpCLASSIFY_print,
.x6_parse   = CLASSIFY_parse,
.x6_options = CLASSIFY_opts,
+   .xlate  = CLASSIFY_xlate,
},
 };
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: cttimeout: fix use after free error when delete netns

2016-08-18 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

In general, when we want to delete a netns, cttimeout_net_exit will
be called before ipt_unregister_table, i.e. before ctnl_timeout_put.

But after call kfree_rcu in cttimeout_net_exit, we will still decrease
the timeout object's refcnt in ctnl_timeout_put, this is incorrect,
and will cause a use after free error.

It is easy to reproduce this problem:
  # while : ; do
  ip netns add xxx
  ip netns exec xxx nfct add timeout testx inet icmp timeout 200
  ip netns exec xxx iptables -t raw -p icmp -I OUTPUT -j CT --timeout testx
  ip netns del xxx
  done

  ===
  BUG kmalloc-96 (Tainted: GB   E  ): Poison overwritten
  ---
  INFO: 0x88002b5161e8-0x88002b5161e8. First byte 0x6a instead of
  0x6b
  INFO: Allocated in cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout]
  age=104 cpu=0 pid=3330
  ___slab_alloc+0x4da/0x540
  __slab_alloc+0x20/0x40
  __kmalloc+0x1c8/0x240
  cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout]
  nfnetlink_rcv_msg+0x21a/0x230 [nfnetlink]
  [ ... ]

So only when the refcnt decreased to 0, we call kfree_rcu to free the
timeout object. And like nfnetlink_acct do, use atomic_cmpxchg to
avoid race between ctnl_timeout_try_del and ctnl_timeout_put.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
I find that my attachment patch is not showed at patchwork, so I send
it again.

 net/netfilter/nfnetlink_cttimeout.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 4cdcd96..68216cd 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -330,16 +330,16 @@ static int ctnl_timeout_try_del(struct net *net, struct 
ctnl_timeout *timeout)
 {
int ret = 0;
 
-   /* we want to avoid races with nf_ct_timeout_find_get. */
-   if (atomic_dec_and_test(>refcnt)) {
+   /* We want to avoid races with ctnl_timeout_put. So only when the
+* current refcnt is 1, we decrease it to 0.
+*/
+   if (atomic_cmpxchg(>refcnt, 1, 0) == 1) {
/* We are protected by nfnl mutex. */
list_del_rcu(>head);
nf_ct_l4proto_put(timeout->l4proto);
ctnl_untimeout(net, timeout);
kfree_rcu(timeout, rcu_head);
} else {
-   /* still in use, restore reference counter. */
-   atomic_inc(>refcnt);
ret = -EBUSY;
}
return ret;
@@ -543,7 +543,9 @@ err:
 
 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
 {
-   atomic_dec(>refcnt);
+   if (atomic_dec_and_test(>refcnt))
+   kfree_rcu(timeout, rcu_head);
+
module_put(THIS_MODULE);
 }
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
@@ -591,7 +593,9 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, >nfct_timeout_list, head) {
list_del_rcu(>head);
nf_ct_l4proto_put(cur->l4proto);
-   kfree_rcu(cur, rcu_head);
+
+   if (atomic_dec_and_test(>refcnt))
+   kfree_rcu(cur, rcu_head);
}
 }
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf 1/2] netfilter: nfnetlink_acct: fix race between nfacct del and xt_nfacct destroy

2016-08-18 Thread Liping Zhang
Hi Pablo,

2016-08-18 6:37 GMT+08:00 Pablo Neira Ayuso :
>
> Wait. I noticed we have the same problem in cttimeout, so it would be
> good to fix this in the same logical change.
>
> I'm attaching your original patch that I have mangled here, including
> the cttimeout chunk.
>
> Let me know if you have any concern, otherwise I'll apply this new
> version, thanks!

Not exactly right.

Currently ctnl_timeout_try_del will not cause race with ctnl_timeout_put.
In ctnl_timeout_put, it only decreases the timeout object's refcnt,
but will not try to free it. This is different with nfnetlink_acct.

But when we do "ip netns del xxx", this will cause a use after free error.
In general, when we delete a netns, cttimeout_net_exit will be called
before ipt_unregister_table, i.e. before ctnl_timeout_put.

So after call kfree_rcu in cttimeout_net_exit, we will still decrease
the timeout object's refcnt in ctnl_timeout_put. Kernel will complain
about this:

  ===
  BUG kmalloc-96 (Tainted: GB   E  ): Poison overwritten
  ---
  INFO: 0x88002b5161e8-0x88002b5161e8. First byte 0x6a instead of
  0x6b
  INFO: Allocated in cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout]
  age=104 cpu=0 pid=3330
  ___slab_alloc+0x4da/0x540
  __slab_alloc+0x20/0x40
  __kmalloc+0x1c8/0x240
  cttimeout_new_timeout+0xd4/0x240 [nfnetlink_cttimeout]
  nfnetlink_rcv_msg+0x21a/0x230 [nfnetlink]

So I think it seems better that we take another patch to fix the
problem in cttimeout.

Attachment is my patch.


0001-netfilter-cttimeout-fix-use-after-free-error-when-de.patch
Description: Binary data


[PATCH nf] netfilter: conntrack: do not dump other netns's conntrack entries via proc

2016-08-15 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We should skip the conntracks that belong to a different namespace,
otherwise other unrelated netns's conntrack entries will be dumped via
/proc/net/nf_conntrack.

Fixes: 56d52d4892d0 ("netfilter: conntrack: use a single hashtable for all 
namespaces")
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_standalone.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_conntrack_standalone.c 
b/net/netfilter/nf_conntrack_standalone.c
index 958a145..9f267c3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -205,6 +205,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
+   struct net *net = seq_file_net(s);
int ret = 0;
 
NF_CT_ASSERT(ct);
@@ -215,6 +216,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (NF_CT_DIRECTION(hash))
goto release;
 
+   if (!net_eq(nf_ct_net(ct), net))
+   goto release;
+
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
NF_CT_ASSERT(l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 2/2] netfilter: nfnetlink_acct: report overquota to the right netns

2016-08-13 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We should report the over quota message to the right net namespace
instead of the init netns.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/linux/netfilter/nfnetlink_acct.h | 4 ++--
 net/netfilter/nfnetlink_acct.c   | 9 +
 net/netfilter/xt_nfacct.c| 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink_acct.h 
b/include/linux/netfilter/nfnetlink_acct.h
index 80ca889..664da00 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -15,6 +15,6 @@ struct nf_acct;
 struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-extern int nfnl_acct_overquota(const struct sk_buff *skb,
- struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+   struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 1f15535..216b853 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -445,7 +445,7 @@ void nfnl_acct_update(const struct sk_buff *skb, struct 
nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
-static void nfnl_overquota_report(struct nf_acct *nfacct)
+static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 {
int ret;
struct sk_buff *skb;
@@ -460,11 +460,12 @@ static void nfnl_overquota_report(struct nf_acct *nfacct)
kfree_skb(skb);
return;
}
-   netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+   netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
  GFP_ATOMIC);
 }
 
-int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+   struct nf_acct *nfacct)
 {
u64 now;
u64 *quota;
@@ -482,7 +483,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct 
nf_acct *nfacct)
 
if (now >= *quota &&
!test_and_set_bit(NFACCT_OVERQUOTA_BIT, >flags)) {
-   nfnl_overquota_report(nfacct);
+   nfnl_overquota_report(net, nfacct);
}
 
return ret;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 3048a7e..cf32759 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct 
xt_action_param *par)
 
nfnl_acct_update(skb, info->nfacct);
 
-   overquota = nfnl_acct_overquota(skb, info->nfacct);
+   overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
 
return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nfnetlink_log: add "nf-logger-3-1" module alias name

2016-08-13 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Otherwise, if nfnetlink_log.ko is not loaded, we cannot add rules
to log packets to the userspace when we specify it with arp family,
such as:

  # nft add rule arp filter input log group 0
  :1:1-37: Error: Could not process rule: No such file or
  directory
  add rule arp filter input log group 0
  ^

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cbcfdfb..6577db5 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1147,6 +1147,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
 MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
+MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
 
 module_init(nfnetlink_log_init);
 module_exit(nfnetlink_log_fini);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V3 nf-next] netfilter: conntrack: simplify the code by using nf_conntrack_get_ht

2016-08-13 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Since Commit 64b87639c9cb ("netfilter: conntrack: fix race between
nf_conntrack proc read and hash resize") introdue the
nf_conntrack_get_ht, so there's no need to check nf_conntrack_generation
again and again to get the hash table and hash size. And convert
nf_conntrack_get_ht to inline function here.

Suggested-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V3: Convert all possible place to use nf_conntrack_get_ht and make it inline.
 Based on patch http://patchwork.ozlabs.org/patch/658952/.

 include/net/netfilter/nf_conntrack.h  | 20 ++
 include/net/netfilter/nf_conntrack_core.h |  3 --
 net/netfilter/nf_conntrack_core.c | 46 +++
 3 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 445b019..2a12748 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -303,9 +303,29 @@ struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 int nf_conntrack_hash_resize(unsigned int hashsize);
+
+extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
+extern seqcount_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
 
+/* must be called with rcu read lock held */
+static inline void
+nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+   struct hlist_nulls_head *hptr;
+   unsigned int sequence, hsz;
+
+   do {
+   sequence = read_seqcount_begin(_conntrack_generation);
+   hsz = nf_conntrack_htable_size;
+   hptr = nf_conntrack_hash;
+   } while (read_seqcount_retry(_conntrack_generation, sequence));
+
+   *hash = hptr;
+   *hsize = hsz;
+}
+
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 const struct nf_conntrack_zone *zone,
 gfp_t flags);
diff --git a/include/net/netfilter/nf_conntrack_core.h 
b/include/net/netfilter/nf_conntrack_core.h
index 79d7ac5..62e17d1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
 
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
-
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
@@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct 
nf_conntrack_tuple *tuple,
 
 #define CONNTRACK_LOCKS 1024
 
-extern struct hlist_nulls_head *nf_conntrack_hash;
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 void nf_conntrack_lock(spinlock_t *lock);
 
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index dccac2c..7d90a5d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -74,7 +74,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
@@ -164,6 +163,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
+seqcount_t nf_conntrack_generation __read_mostly;
 
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -480,23 +480,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
   net_eq(net, nf_ct_net(ct));
 }
 
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
-{
-   struct hlist_nulls_head *hptr;
-   unsigned int sequence, hsz;
-
-   do {
-   sequence = read_seqcount_begin(_conntrack_generation);
-   hsz = nf_conntrack_htable_size;
-   hptr = nf_conntrack_hash;
-   } while (read_seqcount_retry(_conntrack_generation, sequence));
-
-   *hash = hptr;
-   *hsize = hsz;
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
-
 /*
  * Warning :
  * - Caller must take a reference on returned object
@@ -509,14 +492,11 @@ nf_conntrack_find(struct net *net, const struct 
nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
-   unsigned int bucket, sequence;
+   unsigned int bucket, hsize;
 
 begin:
-   d

Re: [PATCH] netfilter: remove ip_conntrack* sysctl compat code

2016-08-13 Thread Liping Zhang
Hi Pablo,
2016-08-12 19:47 GMT+08:00 Pablo Neira Ayuso :
> diff --git a/net/netfilter/nf_conntrack_core.c 
> b/net/netfilter/nf_conntrack_core.c
> index dd2c43a..22558b7 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -161,10 +161,7 @@ static void nf_conntrack_all_unlock(void)
>  }
>
>  unsigned int nf_conntrack_htable_size __read_mostly;
> -EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);

We still need to export nf_conntrack_htable_size, otherwise:

ERROR: "nf_conntrack_htable_size"
[net/netfilter/nfnetlink_cttimeout.ko] undefined!
ERROR: "nf_conntrack_htable_size"
[net/netfilter/nf_conntrack_netlink.ko] undefined!
ERROR: "nf_conntrack_htable_size"
[net/ipv4/netfilter/nf_conntrack_ipv4.ko] undefined!
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next V2] netfilter: conntrack: simplify the code by using nf_conntrack_get_ht

2016-08-12 Thread Liping Zhang
2016-08-12 19:49 GMT+08:00 Pablo Neira Ayuso <pa...@netfilter.org>:
> On Fri, Aug 12, 2016 at 07:12:32PM +0800, Liping Zhang wrote:
>> 2016-08-12 18:34 GMT+08:00 Pablo Neira Ayuso <pa...@netfilter.org>:
> [...]
>> >
>> > I think it is a good time to kill compat /proc/net/ip_conntrack*. That
>> > has been there for so long already. So we can inline this function,
>> > this is the only one that needs it to export it, right?
>>
>> If just for the purpose of using nf_conntrack_get_ht to simply the source 
>> code,
>> I'm not sure is it worth to delete the compat /proc/net/ip_conntrack*?
>>
>> So I'm inclined to keep the original source codes unchanged :)
>
> Just sent a patch to kill that compat code. It is also missing new
> supported layer 4 protocols, as well as IPv6. We have too many
> interfaces already, actually I'd be happy to kill nf_conntrack sysctl
> entries at some point and leave just the ctnetlink interface.
>
> I'm attaching an incomplete patch that moves nf_conntrack_get_ht() as
> inline. It applies on top of:
>
> http://patchwork.ozlabs.org/patch/658620/
>
> Feel free to take it over and finish it. Thanks.

OK. Will be happy to follow up on this:)

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] netfilter: nf_tables: add hash expression

2016-08-09 Thread Liping Zhang
Hi Laura,

2016-08-10 2:22 GMT+08:00 Laura Garcia Liebana :
> This patch adds a new hash expression, this provides jhash support but
> this can be extended to support for other hash functions.
>
> The modulus and seed already comes embedded into this new expression.
>
> Use case example:
> meta mark set hash ip saddr mod 10
>
> +static int nft_hash_init(const struct nft_ctx *ctx,
> +const struct nft_expr *expr,
> +const struct nlattr * const tb[])
> +{
> +   struct nft_hash *priv = nft_expr_priv(expr);
> +   u32 len;
> +
> +   if (!tb[NFTA_HASH_SREG] ||
> +   !tb[NFTA_HASH_DREG] ||
> +   !tb[NFTA_HASH_LEN])
> +   return -EINVAL;

I think tb[NFTA_HASH_MODULUS] and tb[NFTA_HASH_SEED] should also be
checked is NULL or not? :)

> +
> +   priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
> +   priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);

Should we use nft_validate_register_load and
nft_validate_register_store here to check the validity ?

> +
> +   len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
> +   if (len == 0 || len > U8_MAX)
> +   return -EINVAL;
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nfnetlink_queue: reject verdict request from different portid

2016-08-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Like NFQNL_MSG_VERDICT_BATCH do, we should also reject the verdict
request when the portid is not same with the initial portid(maybe
from another process).

Fixes: 97d32cf9440d ("netfilter: nfnetlink_queue: batch verdict support")
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_queue.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5d36a09..f49f450 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct 
sock *ctnl,
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int err;
 
-   queue = instance_lookup(q, queue_num);
-   if (!queue)
-   queue = verdict_instance_lookup(q, queue_num,
-   NETLINK_CB(skb).portid);
+   queue = verdict_instance_lookup(q, queue_num,
+   NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: ctnetlink: reject new conntrack request with different l4proto

2016-08-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Currently, user can add a conntrack with different l4proto via nfnetlink.
For example, original tuple is TCP while reply tuple is SCTP. This is
invalid combination, we should report EINVAL to userspace.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index b9bfe64..fdfc71f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, 
struct sock *ctnl,
 
if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
return -EINVAL;
+   if (otuple.dst.protonum != rtuple.dst.protonum)
+   return -EINVAL;
 
ct = ctnetlink_create_conntrack(net, , cda, 
,
, u3);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nfnetlink_queue: fix memory leak when attach expectation successfully

2016-08-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

User can use NFQA_EXP to attach expectations to conntracks, but we
forget to put back nf_conntrack_expect when it is inserted successfully,
i.e. in this normal case, expect's use refcnt will be 3. So even we
unlink it and put it back later, the use refcnt is still 1, then the
memory will be leaked forever.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_netlink.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index 050bb34..b9bfe64 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2362,12 +2362,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, 
struct nf_conn *ct,
return PTR_ERR(exp);
 
err = nf_ct_expect_related_report(exp, portid, report);
-   if (err < 0) {
-   nf_ct_expect_put(exp);
-   return err;
-   }
-
-   return 0;
+   nf_ct_expect_put(exp);
+   return err;
 }
 
 static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nf_expect_proc: remove the redundant slash when policy name is empty

2016-08-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

The 'name' filed in struct nf_conntrack_expect_policy{} is not a
pointer, so check it is NULL or not will always return true. Even if the
name is empty, slash will always be displayed like follows:
  # cat /proc/net/nf_conntrack_expect
  297 l3proto = 2 proto=6 src=1.1.1.1 dst=2.2.2.2 sport=1 dport=1025 ftp/
^

Fixes: 3a8fc53a45c4 ("netfilter: nf_ct_helper: allocate 16 bytes for the helper 
and policy names")
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_expect.c 
b/net/netfilter/nf_conntrack_expect.c
index 9e36931..a4f3cc8 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
helper = rcu_dereference(nfct_help(expect->master)->helper);
if (helper) {
seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
-   if (helper->expect_policy[expect->class].name)
+   if (helper->expect_policy[expect->class].name[0] != '\0')
seq_printf(s, "/%s",
   helper->expect_policy[expect->class].name);
}
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nf_dup4: remove redundant checksum recalculation

2016-07-30 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

IP header checksum will be recalculated at ip_local_out, so
there's no need to calculated it here, remove it. Also update
code comments to illustrate it, and delete the misleading
comments about checksum recalculation.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/ipv4/netfilter/nf_dup_ipv4.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index ceb1873..cf986e1 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, 
unsigned int hooknum,
nf_conntrack_get(skb->nfct);
 #endif
/*
-* If we are in PREROUTING/INPUT, the checksum must be recalculated
-* since the length could have changed as a result of defragmentation.
-*
-* We also decrease the TTL to mitigate potential loops between two
-* hosts.
+* If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
+* loops between two hosts.
 *
 * Set %IP_DF so that the original source is notified of a potentially
 * decreased MTU on the clone route. IPv6 does this too.
+*
+* IP header checksum will be recalculated at ip_local_out.
 */
iph = ip_hdr(skb);
iph->frag_off |= htons(IP_DF);
if (hooknum == NF_INET_PRE_ROUTING ||
hooknum == NF_INET_LOCAL_IN)
--iph->ttl;
-   ip_send_check(iph);
 
if (nf_dup_ipv4_route(net, skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nf_ct_h323: do not re-activate already expired timer

2016-07-23 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Commit 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of
set_expect_timeout") just simplify the source codes
if (!del_timer(>timeout))
return 0;
add_timer(>timeout);
to mod_timer(>timeout, jiffies + info->timeout * HZ);

This is not correct, and introduce a race codition:
CPU0 CPU1
 - timer expire
  process_rcf  expectation_timed_out
  lock(exp_lock)  -
  find_exp waiting exp_lock...
  re-activate timer!!  waiting exp_lock...
  unlock(exp_lock) lock(exp_lock)
 - unlink expect
 - free(expect)
 - unlock(exp_lock)
So when the timer expires again, we will access the memory that
was already freed.

Replace mod_timer with mod_timer_pending here to fix this problem.

Fixes: 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of 
set_expect_timeout")
Cc: Gao Feng <f...@ikuai8.com>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
When I found this problem and want to report it, it was a little too late.
I find that this commit was already pushed to the cgit :(

 net/netfilter/nf_conntrack_h323_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_h323_main.c 
b/net/netfilter/nf_conntrack_h323_main.c
index bb77a97..5c0db5c 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct 
nf_conn *ct,
 "timeout to %u seconds for",
 info->timeout);
nf_ct_dump_tuple(>tuple);
-   mod_timer(>timeout, jiffies + info->timeout * HZ);
+   mod_timer_pending(>timeout,
+ jiffies + info->timeout * HZ);
}
spin_unlock_bh(_conntrack_expect_lock);
}
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 2/2] netfilter: nft_compat: fix crash when related match/target module is removed

2016-07-23 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We "cache" the loaded match/target modules and reuse them, but when the
modules are removed, we still point to them. Then we may end up with
invalid memory references when using iptables-compat to add rules later.

Input the following commands will reproduce the kernel crash:
  # iptables-compat -A INPUT -j LOG
  # iptables-compat -D INPUT -j LOG
  # rmmod xt_LOG
  # iptables-compat -A INPUT -j LOG
  BUG: unable to handle kernel paging request at a05a9010
  IP: [] strcmp+0xe/0x30
  Call Trace:
  [] nft_target_select_ops+0x83/0x1f0 [nft_compat]
  [] nf_tables_expr_parse+0x147/0x1f0 [nf_tables]
  [] nf_tables_newrule+0x301/0x810 [nf_tables]
  [] ? nla_parse+0x20/0x100
  [] nfnetlink_rcv+0x33f/0x53d [nfnetlink]
  [] ? nfnetlink_rcv+0x1fb/0x53d [nfnetlink]
  [] netlink_unicast+0x178/0x220
  [] netlink_sendmsg+0x2fb/0x3a0
  [] sock_sendmsg+0x38/0x50
  [] ___sys_sendmsg+0x28e/0x2a0
  [] ? release_sock+0x1e/0xb0
  [] ? _raw_spin_unlock_bh+0x35/0x40
  [] ? release_sock+0x82/0xb0
  [] __sys_sendmsg+0x54/0x90
  [] SyS_sendmsg+0x12/0x20
  [] entry_SYSCALL_64_fastpath+0x1a/0xa9

So when nobody use the related match/target module, there's no need to
"cache" it. And nft_[match|target]_release are useless anymore, remove
them.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_compat.c | 43 ---
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index d74adf4..9b5fc7e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -23,6 +23,20 @@
 #include 
 #include 
 
+struct nft_xt {
+   struct list_headhead;
+   struct nft_expr_ops ops;
+   unsigned intrefcnt;
+};
+
+static void nft_xt_put(struct nft_xt *xt)
+{
+   if (--xt->refcnt == 0) {
+   list_del(>head);
+   kfree(xt);
+   }
+}
+
 static int nft_compat_chain_validate_dependency(const char *tablename,
const struct nft_chain *chain)
 {
@@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct 
nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy();
 
+   nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(target->me);
 }
 
@@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct 
nft_expr *expr)
if (par.match->destroy != NULL)
par.match->destroy();
 
+   nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(match->me);
 }
 
@@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys 
= {
 
 static LIST_HEAD(nft_match_list);
 
-struct nft_xt {
-   struct list_headhead;
-   struct nft_expr_ops ops;
-};
-
 static struct nft_expr_type nft_match_type;
 
 static bool nft_match_cmp(const struct xt_match *match,
@@ -653,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(match->me))
return ERR_PTR(-ENOENT);
 
+   nft_match->refcnt++;
return _match->ops;
}
}
@@ -673,6 +685,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
goto err;
}
 
+   nft_match->refcnt = 1;
nft_match->ops.type = _match_type;
nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
@@ -691,14 +704,6 @@ err:
return ERR_PTR(err);
 }
 
-static void nft_match_release(void)
-{
-   struct nft_xt *nft_match, *tmp;
-
-   list_for_each_entry_safe(nft_match, tmp, _match_list, head)
-   kfree(nft_match);
-}
-
 static struct nft_expr_type nft_match_type __read_mostly = {
.name   = "match",
.select_ops = nft_match_select_ops,
@@ -745,6 +750,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(target->me))
return ERR_PTR(-ENOENT);
 
+   nft_target->refcnt++;
return _target->ops;
}
}
@@ -765,6 +771,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
goto err;
}
 
+   nft_target->refcnt = 1;
nft_target->ops.type = _target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.init = nft_target_init;
@@ -787,14 +794,6 @@ err:
return ERR_PTR(err);
 }
 
-static void nft_target_release(void)
-{
-   struct nft_xt *nft_target, *tmp;
-
-   list_for_each_entry_safe(nft_target, tmp, _target_list, head)
-   kfree(nft_target);
-}

[PATCH nf-next 1/2] netfilter: nft_compat: put back match/target module if init fail

2016-07-23 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

If the user specify the invalid NFTA_MATCH_INFO/NFTA_TARGET_INFO attr
or memory alloc fail, we should call module_put to the related match
or target. Otherwise, we cannot remove the module even nobody use it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_compat.c | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 6228c42..d74adf4 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -634,6 +634,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
struct xt_match *match;
char *mt_name;
u32 rev, family;
+   int err;
 
if (tb[NFTA_MATCH_NAME] == NULL ||
tb[NFTA_MATCH_REV] == NULL ||
@@ -660,13 +661,17 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
 
-   if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
-   return ERR_PTR(-EINVAL);
+   if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) {
+   err = -EINVAL;
+   goto err;
+   }
 
/* This is the first time we use this match, allocate operations */
nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-   if (nft_match == NULL)
-   return ERR_PTR(-ENOMEM);
+   if (nft_match == NULL) {
+   err = -ENOMEM;
+   goto err;
+   }
 
nft_match->ops.type = _match_type;
nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
@@ -680,6 +685,10 @@ nft_match_select_ops(const struct nft_ctx *ctx,
list_add(_match->head, _match_list);
 
return _match->ops;
+
+err:
+   module_put(match->me);
+   return ERR_PTR(err);
 }
 
 static void nft_match_release(void)
@@ -717,6 +726,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
struct xt_target *target;
char *tg_name;
u32 rev, family;
+   int err;
 
if (tb[NFTA_TARGET_NAME] == NULL ||
tb[NFTA_TARGET_REV] == NULL ||
@@ -743,13 +753,17 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
 
-   if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
-   return ERR_PTR(-EINVAL);
+   if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
+   err = -EINVAL;
+   goto err;
+   }
 
/* This is the first time we use this target, allocate operations */
nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-   if (nft_target == NULL)
-   return ERR_PTR(-ENOMEM);
+   if (nft_target == NULL) {
+   err = -ENOMEM;
+   goto err;
+   }
 
nft_target->ops.type = _target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
@@ -767,6 +781,10 @@ nft_target_select_ops(const struct nft_ctx *ctx,
list_add(_target->head, _target_list);
 
return _target->ops;
+
+err:
+   module_put(target->me);
+   return ERR_PTR(err);
 }
 
 static void nft_target_release(void)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libxt_connlabel: add unit test

2016-07-23 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Add some unit tests for connlabel match extension:
  # ./iptables-test.py extensions/libxt_connlabel.t
  extensions/libxt_connlabel.t: OK
  1 test files, 7 unit tests, 7 passed

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_connlabel.t | 18 ++
 1 file changed, 18 insertions(+)
 create mode 100644 extensions/libxt_connlabel.t

diff --git a/extensions/libxt_connlabel.t b/extensions/libxt_connlabel.t
new file mode 100644
index 000..aad1032
--- /dev/null
+++ b/extensions/libxt_connlabel.t
@@ -0,0 +1,18 @@
+:INPUT,FORWARD,OUTPUT
+# Backup the connlabel.conf, then add some label maps for test
+@[ -f /etc/xtables/connlabel.conf ] && mv /etc/xtables/connlabel.conf 
/tmp/connlabel.conf.bak
+@mkdir -p /etc/xtables
+@echo "40 bit40" > /etc/xtables/connlabel.conf
+@echo "41 bit41" >> /etc/xtables/connlabel.conf
+@echo "128 bit128" >> /etc/xtables/connlabel.conf
+-m connlabel --label "bit40";=;OK
+-m connlabel ! --label "bit40";=;OK
+-m connlabel --label "bit41" --set;=;OK
+-m connlabel ! --label "bit41" --set;=;OK
+-m connlabel --label "bit128";;FAIL
+@echo > /etc/xtables/connlabel.conf
+-m connlabel --label "abc";;FAIL
+@rm -f /etc/xtables/connlabel.conf
+-m connlabel --label "abc";;FAIL
+# Restore the original connlabel.conf
+@[ -f /tmp/connlabel.conf.bak ] && mv /tmp/connlabel.conf.bak 
/etc/xtables/connlabel.conf
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libxt_NFLOG: add unit test to cover nflog-size with zero

2016-07-20 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

"--nflog-size 0" is valid and we must display it appropriately.

Suggested-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_NFLOG.t | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extensions/libxt_NFLOG.t b/extensions/libxt_NFLOG.t
index 78076b5..933fa22 100644
--- a/extensions/libxt_NFLOG.t
+++ b/extensions/libxt_NFLOG.t
@@ -7,6 +7,7 @@
 -j NFLOG --nflog-range 4294967295;=;OK
 -j NFLOG --nflog-range 4294967296;;FAIL
 -j NFLOG --nflog-range -1;;FAIL
+-j NFLOG --nflog-size 0;=;OK
 -j NFLOG --nflog-size 1;=;OK
 -j NFLOG --nflog-size 4294967295;=;OK
 -j NFLOG --nflog-size 4294967296;;FAIL
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next 3/3] netfilter: nft_log: fix snaplen does not truncate packets

2016-07-20 Thread Liping Zhang
Hi Pablo,

2016-07-20 16:25 GMT+08:00 Pablo Neira Ayuso <pa...@netfilter.org>:
> On Wed, Jul 20, 2016 at 07:00:13AM +0800, Liping Zhang wrote:
>> I find that nftables already support this feature, the following command 
>> mean to truncate packets
>> to 100 bytes before logging to the userspace:
>>   #nft add rule filter input log group 0 snaplen 100
>>
>> Before my patch, it does not work.
>> And after apply my patch, it works as expected.
>
> If I git grep NF_LOG_F_COPY_LEN from the nftables.git tree, I don't
> see any reference to this flag being set.
>

I use this NF_LOG_F_COPY_LEN flag as internal, and when the user specify
the NFTA_LOG_SNAPLEN attrribute, it will be enabled automatically.
See my codes:

   if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+   li->u.ulog.flags |= NF_LOG_F_COPY_LEN;

-   if (li->u.ulog.copy_len) {
+   if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
 if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
 htonl(li->u.ulog.copy_len)))

So this flag will not be setted from userspace explicitly and will not
be dumped to the userspace.

> Then, nft_log kernel has been actually working fine since the
> beginning. It would be good anyway if we set this flag on from
> userspace to leave things in consistent state.

Do you mean this is something similar to "nflog-size" and
"nflog-range" in xt_NFLOG?
"nft log snaplen 100" cannot work since the beginning, so we should
keep it unchanged? And
maybe we should introduce a new option like "nft log snapsize 100"?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] netfilter: Add helper array register/unregister functions

2016-07-19 Thread Liping Zhang
2016-07-18 11:39 GMT+08:00  :
> From: Gao Feng 
>
> Add nf_ct_helper_init, nf_conntrack_helpers_register/unregister
> functions to enhance the conntrack helper codes.

I think this patch is breaking something ...

This irc:
> -   if (ports[i] == IRC_PORT)
> -   sprintf(irc[i].name, "irc");
> -   else
> -   sprintf(irc[i].name, "irc-%u", i);
> -
> -   ret = nf_conntrack_helper_register([i]);
> +   nf_ct_helper_init([i], AF_INET, IPPROTO_TCP, "irc",
> + IRC_PORT, ports[i], _exp_policy, 0, 0,
> + help, NULL, THIS_MODULE);
> +   }

This sip:
> -   if (ports[i] == SIP_PORT)
> -   sprintf(sip[i][j].name, "sip");
> -   else
> -   sprintf(sip[i][j].name, "sip-%u", i);

And this tftp:
> -   if (ports[i] == TFTP_PORT)
> -   sprintf(tftp[i][j].name, "tftp");
> -   else
> -   sprintf(tftp[i][j].name, "tftp-%u", i);

For example, if the user install the nf_conntrack_tftp module an
specify the ports to "69,10069",
then the helper name is "tftp" and "tftp-1".

But apply this patch, the helper name will be changed to "tftp" and
"tftp-10069", this may break the
existing iptables rules which used the helper match or CT target.

And this was already discussed  at https://patchwork.ozlabs.org/patch/622238/
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 0/3] netfilter: fix some small bugs related to nft_log

2016-07-18 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

This patchset is very small, aim to fix some bugs related to nftables log expr.

patch#1 fix a possible memory leak if the user specify the log prefix but the 
log
expr init fail.

patch#2 add a validity check of log level, otherwise user can specify an invalid
log level.

patch#3 fix "nft add rule filter input log snaplen 100" cannot work 
successfully,
the reason is similar to xt_NFLOG.

Liping Zhang (3):
  netfilter: nft_log: fix possible memory leak if log expr init fail
  netfilter: nft_log: check the validity of log level
  netfilter: nft_log: fix snaplen does not truncate packets

 net/netfilter/nft_log.c | 34 +-
 1 file changed, 25 insertions(+), 9 deletions(-)

-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 1/3] netfilter: nft_log: fix possible memory leak if log expr init fail

2016-07-18 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Suppose that we specify the NFTA_LOG_PREFIX, then NFTA_LOG_LEVEL
and NFTA_LOG_GROUP are specified together or nf_logger_find_get
call returns fail, i.e. expr init fail, memory leak will happen.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_log.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 713d668..e1b34ff 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,6 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = >loginfo;
const struct nlattr *nla;
+   int err;
+
+   li->type = NF_LOG_TYPE_LOG;
+   if (tb[NFTA_LOG_LEVEL] != NULL &&
+   tb[NFTA_LOG_GROUP] != NULL)
+   return -EINVAL;
+   if (tb[NFTA_LOG_GROUP] != NULL)
+   li->type = NF_LOG_TYPE_ULOG;
 
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -63,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
priv->prefix = (char *)nft_log_null_prefix;
}
 
-   li->type = NF_LOG_TYPE_LOG;
-   if (tb[NFTA_LOG_LEVEL] != NULL &&
-   tb[NFTA_LOG_GROUP] != NULL)
-   return -EINVAL;
-   if (tb[NFTA_LOG_GROUP] != NULL)
-   li->type = NF_LOG_TYPE_ULOG;
-
switch (li->type) {
case NF_LOG_TYPE_LOG:
if (tb[NFTA_LOG_LEVEL] != NULL) {
@@ -96,7 +97,16 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
 
-   return nf_logger_find_get(ctx->afi->family, li->type);
+   err = nf_logger_find_get(ctx->afi->family, li->type);
+   if (err < 0)
+   goto err1;
+
+   return 0;
+
+err1:
+   if (priv->prefix != nft_log_null_prefix)
+   kfree(priv->prefix);
+   return err;
 }
 
 static void nft_log_destroy(const struct nft_ctx *ctx,
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 2/3] netfilter: nft_log: check the validity of log level

2016-07-18 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

User can specify the log level larger than 7(debug level) via
nfnetlink, this is invalid. So in this case, we should report
EINVAL to the userspace.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_log.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index e1b34ff..5f6f088 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -79,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
} else {
li->u.log.level = LOGLEVEL_WARNING;
}
+   if (li->u.log.level > LOGLEVEL_DEBUG) {
+   err = -EINVAL;
+   goto err1;
+   }
+
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 3/3] netfilter: nft_log: fix snaplen does not truncate packets

2016-07-18 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

There's a similar problem in xt_NFLOG, and was fixed by commit 7643507fe8b5
("netfilter: xt_NFLOG: nflog-range does not truncate packets"). Only set
copy_len here does not work, so we should enable NF_LOG_F_COPY_LEN also.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_log.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 5f6f088..24a73bb 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -92,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
case NF_LOG_TYPE_ULOG:
li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+   li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
li->u.ulog.copy_len =
ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
}
@@ -149,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct 
nft_expr *expr)
if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
goto nla_put_failure;
 
-   if (li->u.ulog.copy_len) {
+   if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
 htonl(li->u.ulog.copy_len)))
goto nla_put_failure;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2 iptables] extensions: libxt_connlabel: Add translation to nft

2016-07-16 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Add translation for connlabel to nftables.
For examples:

  # iptables-translate -A INPUT -m connlabel --label bit40
  nft add rule ip filter INPUT ct label bit40 counter

  # iptables-translate -A INPUT -m connlabel ! --label bit40 --set
  nft add rule ip filter INPUT ct label set bit40 ct label and bit40 != bit40 
counter

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: fix wrong translation when "!" is set, pointed by Florian Westphal.
 extensions/libxt_connlabel.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/extensions/libxt_connlabel.c b/extensions/libxt_connlabel.c
index 1f83095..96b9aec 100644
--- a/extensions/libxt_connlabel.c
+++ b/extensions/libxt_connlabel.c
@@ -118,6 +118,27 @@ connlabel_mt_save(const void *ip, const struct 
xt_entry_match *match)
connlabel_mt_print_op(info, "--");
 }
 
+static int
+connlabel_mt_xlate(const void *ip, const struct xt_entry_match *match,
+  struct xt_xlate *xl, int numeric)
+{
+   const struct xt_connlabel_mtinfo *info = (const void *)match->data;
+   const char *name = connlabel_get_name(info->bit);
+
+   if (name == NULL)
+   return 0;
+
+   if (info->options & XT_CONNLABEL_OP_SET)
+   xt_xlate_add(xl, "ct label set %s ", name);
+
+   xt_xlate_add(xl, "ct label ");
+   if (info->options & XT_CONNLABEL_OP_INVERT)
+   xt_xlate_add(xl, "and %s != ", name);
+   xt_xlate_add(xl, "%s", name);
+
+   return 1;
+}
+
 static struct xtables_match connlabel_mt_reg = {
.family= NFPROTO_UNSPEC,
.name  = "connlabel",
@@ -129,6 +150,7 @@ static struct xtables_match connlabel_mt_reg = {
.save  = connlabel_mt_save,
.x6_parse  = connlabel_mt_parse,
.x6_options= connlabel_mt_opts,
+   .xlate = connlabel_mt_xlate,
 };
 
 void _init(void)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH iptables] extensions: libxt_connlabel: Add translation to nft

2016-07-16 Thread Liping Zhang
At 2016-07-16 17:04:39, "Florian Westphal" <f...@strlen.de> wrote:
>Liping Zhang <zlpnob...@163.com> wrote:
>> 
>>   # iptables-translate -A INPUT -m connlabel ! --label bit40 --set
>>   nft add rule ip filter INPUT ct label set bit40 ct label != bit40 counter
>
>Should probably be:
>
>... ct label and bit40 != bit40 ...
>
>!= bit40 will be true if bit40 and another bit is set.

Right, "ct label bit40" and "ct label != bit40" have the different semantics:

# nft add rule filter input ct label bit40 --debug=netlink
ip filter input 
  [ ct load label => reg 1 ]
  [ bitwise reg 1 = (reg=1 & 0x 0x0100 0x 0x ) ^ 
0x 0x 0x 0x ]
  [ cmp neq reg 1 0x 0x 0x 0x ]

# nft add rule filter input ct label != bit40 --debug=netlink
ip filter input 
  [ ct load label => reg 1 ]
  [ cmp neq reg 1 0x 0x0100 0x 0x ]

Will send V2 later, Thanks.

[PATCH nf] netfilter: nft_ct: fix unpaired nf_connlabels_get/put call

2016-07-16 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We only get nf_connlabels if the user add ct label set expr successfully,
but we will also put nf_connlabels if the user delete ct lable get expr.
This is mismathced, and will cause ct label expr cannot work properly.

Also, if we init something fail, we should put nf_connlabels back.
Otherwise, we may waste to alloc the memory that will never be used.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_ct.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 81fbb45..4726a2e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -359,6 +359,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
   const struct nlattr * const tb[])
 {
struct nft_ct *priv = nft_expr_priv(expr);
+   bool label_got = false;
unsigned int len;
int err;
 
@@ -377,6 +378,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
+   label_got = true;
break;
 #endif
default:
@@ -386,17 +388,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
-   return err;
+   goto err1;
 
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
-   return err;
+   goto err1;
 
return 0;
+
+err1:
+   if (label_got)
+   nf_connlabels_put(ctx->net);
+   return err;
+}
+
+static void nft_ct_get_destroy(const struct nft_ctx *ctx,
+  const struct nft_expr *expr)
+{
+   nft_ct_l3proto_module_put(ctx->afi->family);
 }
 
-static void nft_ct_destroy(const struct nft_ctx *ctx,
-  const struct nft_expr *expr)
+static void nft_ct_set_destroy(const struct nft_ctx *ctx,
+  const struct nft_expr *expr)
 {
struct nft_ct *priv = nft_expr_priv(expr);
 
@@ -468,7 +481,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.size   = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval   = nft_ct_get_eval,
.init   = nft_ct_get_init,
-   .destroy= nft_ct_destroy,
+   .destroy= nft_ct_get_destroy,
.dump   = nft_ct_get_dump,
 };
 
@@ -477,7 +490,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.size   = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval   = nft_ct_set_eval,
.init   = nft_ct_set_init,
-   .destroy= nft_ct_destroy,
+   .destroy= nft_ct_set_destroy,
.dump   = nft_ct_set_dump,
 };
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libxt_connlabel: Add translation to nft

2016-07-16 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Add translation for connlabel to nftables.
For examples:

  # iptables-translate -A INPUT -m connlabel --label bit40
  nft add rule ip filter INPUT ct label bit40 counter

  # iptables-translate -A INPUT -m connlabel ! --label bit40 --set
  nft add rule ip filter INPUT ct label set bit40 ct label != bit40 counter

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_connlabel.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/extensions/libxt_connlabel.c b/extensions/libxt_connlabel.c
index 1f83095..e9aeca8 100644
--- a/extensions/libxt_connlabel.c
+++ b/extensions/libxt_connlabel.c
@@ -118,6 +118,27 @@ connlabel_mt_save(const void *ip, const struct 
xt_entry_match *match)
connlabel_mt_print_op(info, "--");
 }
 
+static int
+connlabel_mt_xlate(const void *ip, const struct xt_entry_match *match,
+  struct xt_xlate *xl, int numeric)
+{
+   const struct xt_connlabel_mtinfo *info = (const void *)match->data;
+   const char *name = connlabel_get_name(info->bit);
+
+   if (name == NULL)
+   return 0;
+
+   if (info->options & XT_CONNLABEL_OP_SET)
+   xt_xlate_add(xl, "ct label set %s ", name);
+
+   xt_xlate_add(xl, "ct label ");
+   if (info->options & XT_CONNLABEL_OP_INVERT)
+   xt_xlate_add(xl, "!= ");
+   xt_xlate_add(xl, "%s", name);
+
+   return 1;
+}
+
 static struct xtables_match connlabel_mt_reg = {
.family= NFPROTO_UNSPEC,
.name  = "connlabel",
@@ -129,6 +150,7 @@ static struct xtables_match connlabel_mt_reg = {
.save  = connlabel_mt_save,
.x6_parse  = connlabel_mt_parse,
.x6_options= connlabel_mt_opts,
+   .xlate = connlabel_mt_xlate,
 };
 
 void _init(void)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next 2/2] netfilter: conntrack: simplify the code by using nf_conntrack_get_ht

2016-07-13 Thread Liping Zhang
Hi Florian,

At 2016-07-12 21:03:03, "Florian Westphal" <f...@strlen.de> wrote:
>Liping Zhang <zlpnob...@163.com> wrote:
>> +inline void
>> +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
>
>Which "inline void"?  This is very unusual.
>
>I would suggest to not add it, and ...

Yes, but we can still find a very few sample codes using inline without static 
in current kernel source tree.
For example:
inline void raise_softirq_irqoff(unsigned int nr) { ... }
inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) { ... }

And from the description of the gcc's 
document(https://gcc.gnu.org/onlinedocs/gcc/Inline.html):
"When an inline function is not static, then the compiler must assume that 
there may be calls from
other source files; since a global symbol can be defined only once in any 
program, the function must
not be defined in the other source files, so the calls therein cannot be 
integrated". 

We can find that "inline void nf_conntrack_get_ht() { ... }" here will be 
integrated in nf_conntrack_core.c
and in other places will be a function call. And I test this on my x86 mechain, 
it works as expected.

>leave nf_conntrack_find alone, but convert
>
>> @@ -801,18 +799,15 @@ nf_conntrack_tuple_taken(const struct 
>> nf_conntrack_tuple *tuple,
>
>This one ..
>
>> @@ -878,14 +873,11 @@ static noinline int early_drop(struct net *net, 
>> unsigned int _hash)
>
>... and this one too.
>

IMO, leave nf_conntrack_find alone seems not very good. If you strongly 
dislike "inline void",
I can keep nf_conntrack_get_ht unchanged. But I will convert all these three to 
use nf_conntrack_get_ht.
I think even in nf_conntrack_find, inline nf_conntrack_get_ht will earn 
very very little performace
here, almost none.


[PATCH nf-next 1/2] netfilter: conntrack: protect early_drop by rcu read lock

2016-07-12 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

User can add ct entry via nfnetlink(IPCTNL_MSG_CT_NEW), and if the total
number reach the nf_conntrack_max, we will try to drop some ct entries.

But in this case(the main function call path is ctnetlink_create_conntrack
-> nf_conntrack_alloc -> early_drop), rcu_read_lock is not held, so race
with hash resize will happen.

Fixes: 242922a02717 ("netfilter: conntrack: simplify early_drop")
Cc: Florian Westphal <f...@strlen.de>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index e0e9c9a..2d46225 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -880,6 +880,7 @@ static noinline int early_drop(struct net *net, unsigned 
int _hash)
struct hlist_nulls_head *ct_hash;
unsigned hash, sequence, drops;
 
+   rcu_read_lock();
do {
sequence = 
read_seqcount_begin(_conntrack_generation);
hash = scale_hash(_hash++);
@@ -887,6 +888,8 @@ static noinline int early_drop(struct net *net, unsigned 
int _hash)
} while (read_seqcount_retry(_conntrack_generation, 
sequence));
 
drops = early_drop_list(net, _hash[hash]);
+   rcu_read_unlock();
+
if (drops) {
NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
return true;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 2/2] netfilter: conntrack: simplify the code by using nf_conntrack_get_ht

2016-07-12 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Since Commit 64b87639c9cb ("netfilter: conntrack: fix race between
nf_conntrack proc read and hash resize") introdue the
nf_conntrack_get_ht, so there's no need to check nf_conntrack_generation
again and again to get the hash table and hash size.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_core.c | 30 +++---
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 2d46225..bafebf7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -461,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 }
 
 /* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+inline void
+nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
 {
struct hlist_nulls_head *hptr;
unsigned int sequence, hsz;
@@ -489,14 +490,11 @@ nf_conntrack_find(struct net *net, const struct 
nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
-   unsigned int bucket, sequence;
+   unsigned int bucket, hsize;
 
 begin:
-   do {
-   sequence = read_seqcount_begin(_conntrack_generation);
-   bucket = scale_hash(hash);
-   ct_hash = nf_conntrack_hash;
-   } while (read_seqcount_retry(_conntrack_generation, sequence));
+   nf_conntrack_get_ht(_hash, );
+   bucket = reciprocal_scale(hash, hsize);
 
hlist_nulls_for_each_entry_rcu(h, n, _hash[bucket], hnnode) {
if (nf_ct_key_equal(h, tuple, zone, net)) {
@@ -801,18 +799,15 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple 
*tuple,
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
-   unsigned int hash, sequence;
+   unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
 
zone = nf_ct_zone(ignored_conntrack);
 
rcu_read_lock();
-   do {
-   sequence = read_seqcount_begin(_conntrack_generation);
-   hash = hash_conntrack(net, tuple);
-   ct_hash = nf_conntrack_hash;
-   } while (read_seqcount_retry(_conntrack_generation, sequence));
+   nf_conntrack_get_ht(_hash, );
+   hash = __hash_conntrack(net, tuple, hsize);
 
hlist_nulls_for_each_entry_rcu(h, n, _hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -878,14 +873,11 @@ static noinline int early_drop(struct net *net, unsigned 
int _hash)
 
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
-   unsigned hash, sequence, drops;
+   unsigned int hash, hsize, drops;
 
rcu_read_lock();
-   do {
-   sequence = 
read_seqcount_begin(_conntrack_generation);
-   hash = scale_hash(_hash++);
-   ct_hash = nf_conntrack_hash;
-   } while (read_seqcount_retry(_conntrack_generation, 
sequence));
+   nf_conntrack_get_ht(_hash, );
+   hash = reciprocal_scale(_hash++, hsize);
 
drops = early_drop_list(net, _hash[hash]);
rcu_read_unlock();
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nft_ct: make byte/packet expr more friendly

2016-07-05 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

If we want to use ct packets expr, and add a rule like follows:
  # nft add rule filter input ct packets gt 1 counter

We will find that no packets will hit it, because
nf_conntrack_acct is disabled by default. So It will
not work until we enable it manually via
"echo 1 > /proc/sys/net/netfilter/nf_conntrack_acct".

This is not friendly, so like xt_connbytes do, if the user
want to use ct byte/packet expr, enable nf_conntrack_acct
automatically.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_ct.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 137e308..7ce8fd7 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -355,6 +355,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
 
+   if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS)
+   nf_ct_set_acct(ctx->net, true);
+
return 0;
 }
 
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq

2016-07-04 Thread Liping Zhang
2016-07-04 14:14 GMT+08:00 Christophe Leroy :
>> I think there is no need to convert simple_strtoul to kstrtouint, add
>> a further check seems better?
>> Like this:
>>  -   if (!cseq) {
>> +   if (!cseq && *(*dptr + matchoff) != '0') {
>>
>
> And what about an invalid CSeq that would look like CSeq: 0abzk852 ?
> Should we check it is 0 + space instead ?

In this case, i.e. some stupid sip clients set CSeq to "0abzk852",
your patch will also fail to detect this "error".

Because for "Cseq", int (*match_len)(...) point to digits_len(see
struct sip_header ct_sip_hdrs definition).
So in this case match_len will just be setted to ONE (not
sizeof("0abzk852")-1), then cseq will be parsed
as 0 by  kstrtouint, not as an error.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq

2016-07-03 Thread Liping Zhang
2016-07-01 17:48 GMT+08:00 Christophe Leroy :
> Do not drop packet when CSeq is 0 as 0 is also a valid value for CSeq.
>
> --- a/net/netfilter/nf_conntrack_sip.c
> +++ b/net/netfilter/nf_conntrack_sip.c
> @@ -1368,6 +1368,7 @@ static int process_sip_response(struct sk_buff *skb, 
> unsigned int protoff,
> struct nf_conn *ct = nf_ct_get(skb, );
> unsigned int matchoff, matchlen, matchend;
> unsigned int code, cseq, i;
> +   char buf[21];
>
> if (*datalen < strlen("SIP/2.0 200"))
> return NF_ACCEPT;
> @@ -1382,8 +1383,13 @@ static int process_sip_response(struct sk_buff *skb, 
> unsigned int protoff,
> nf_ct_helper_log(skb, ct, "cannot parse cseq");
> return NF_DROP;
> }
> -   cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
> -   if (!cseq) {

I think there is no need to convert simple_strtoul to kstrtouint, add
a further check seems better?
Like this:
 -   if (!cseq) {
+   if (!cseq && *(*dptr + matchoff) != '0') {

> +   if (matchlen > sizeof(buf) - 1) {
> +   nf_ct_helper_log(skb, ct, "cannot parse cseq (too big)");
> +   return NF_DROP;
> +   }
> +   memcpy(buf, *dptr + matchoff, matchlen);
> +   buf[matchlen] = 0;
> +   if (kstrtouint(buf, 10, )) {
> nf_ct_helper_log(skb, ct, "cannot get cseq");
> return NF_DROP;
> }
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2,nf 1/3] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack
hash table via /sys/module/nf_conntrack/parameters/hashsize, race will
happen, because reader can observe a newly allocated hash but the old size
(or vice versa). So oops will happen like follows:

  BUG: unable to handle kernel NULL pointer dereference at 0017
  IP: [] seq_print_acct+0x11/0x50 [nf_conntrack]
  Call Trace:
  [] ? ct_seq_show+0x14e/0x340 [nf_conntrack]
  [] seq_read+0x2cc/0x390
  [] proc_reg_read+0x42/0x70
  [] __vfs_read+0x37/0x130
  [] ? security_file_permission+0xa0/0xc0
  [] vfs_read+0x95/0x140
  [] SyS_read+0x55/0xc0
  [] entry_SYSCALL_64_fastpath+0x1a/0xa4

It is very easy to reproduce this kernel crash.
1. open one shell and input the following cmds:
  while : ; do
echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize
  done
2. open more shells and input the following cmds:
  while : ; do
cat /proc/net/nf_conntrack
  done
3. just wait a monent, oops will happen soon.

The solution in this patch is based on Florian's Commit 5e3c61f98175
("netfilter: conntrack: fix lookup race during hash resize"). And
add a wrapper function nf_conntrack_get_ht to get hash and hsize
suggested by Florian Westphal.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: Add nf_conntrack_get_ht wrapper function. I think in nf_conntrack_find
 and nf_conntrack_tuple_taken, we can also use nf_conntrack_get_ht to 
simplify
 the source code, but it is unrelated to this bug fix. So I'd rather take
 another patch to do this thing.

 include/net/netfilter/nf_conntrack_core.h |  2 ++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 ++
 net/netfilter/nf_conntrack_core.c | 17 +
 net/netfilter/nf_conntrack_standalone.c   | 14 +-
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h 
b/include/net/netfilter/nf_conntrack_core.h
index 3e2f332..79d7ac5 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
 
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
+
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index c6f3c40..6392371 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -26,6 +26,8 @@
 
 struct ct_iter_state {
struct seq_net_private p;
+   struct hlist_nulls_head *hash;
+   unsigned int htable_size;
unsigned int bucket;
 };
 
@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct 
seq_file *seq)
struct hlist_nulls_node *n;
 
for (st->bucket = 0;
-st->bucket < nf_conntrack_htable_size;
+st->bucket < st->htable_size;
 st->bucket++) {
n = rcu_dereference(
-   hlist_nulls_first_rcu(_conntrack_hash[st->bucket]));
+   hlist_nulls_first_rcu(>hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file 
*seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
-   if (++st->bucket >= nf_conntrack_htable_size)
+   if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
-   hlist_nulls_first_rcu(_conntrack_hash[st->bucket]));
+   hlist_nulls_first_rcu(>hash[st->bucket]));
}
return head;
 }
@@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file 
*seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
 {
+   struct ct_iter_state *st = seq->private;
+
rcu_read_lock();
+
+   nf_conntrack_get_ht(>hash, >htable_size);
return ct_get_idx(seq, *pos);
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index f204274..2d2472b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -466,6 +466,23 @@ nf_ct_key_equal(struct nf_conntrack_tupl

[PATCH V2,nf 2/3] netfilter: cttimeout: unlink timeout obj again when hash resize happen

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Imagine such situation, nf_conntrack_htable_size now is 4096, we are doing
ctnl_untimeout, and iterate on 3000# bucket.

Meanwhile, another user try to reduce hash size to 2048, then all nf_conn
are removed to the new hashtable. When this hash resize operation finished,
we still try to itreate ct begin from 3000# bucket, find nothing to do and
just return.

We may miss unlinking some timeout objects. And later we will end up with
invalid references to timeout object that are already gone.

So when we find that hash resize happened, try to unlink timeout objects
from the 0# bucket again.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: no need to use nf_conntrack_generation to check hash resize happen.
 net/netfilter/nfnetlink_cttimeout.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 3c84f14..4cdcd96 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct 
ctnl_timeout *timeout)
 {
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
+   unsigned int last_hsize;
+   spinlock_t *lock;
int i;
 
local_bh_disable();
-   for (i = 0; i < nf_conntrack_htable_size; i++) {
-   nf_conntrack_lock(_conntrack_locks[i % CONNTRACK_LOCKS]);
-   if (i < nf_conntrack_htable_size) {
-   hlist_nulls_for_each_entry(h, nn, 
_conntrack_hash[i], hnnode)
-   untimeout(h, timeout);
+restart:
+   last_hsize = nf_conntrack_htable_size;
+   for (i = 0; i < last_hsize; i++) {
+   lock = _conntrack_locks[i % CONNTRACK_LOCKS];
+   nf_conntrack_lock(lock);
+   if (last_hsize != nf_conntrack_htable_size) {
+   spin_unlock(lock);
+   goto restart;
}
-   spin_unlock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+
+   hlist_nulls_for_each_entry(h, nn, _conntrack_hash[i], hnnode)
+   untimeout(h, timeout);
+   spin_unlock(lock);
}
local_bh_enable();
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2,nf 3/3] netfilter: nf_ct_helper: unlink helper again when hash resize happen

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Similar to ctnl_untimeout, when hash resize happened, we should try
to do unhelp from the 0# bucket again.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 V2: no need to use nf_conntrack_generation to check hash resize happen.
 net/netfilter/nf_conntrack_helper.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 196cb39..db63a79 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -392,6 +392,8 @@ static void __nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me,
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
+   unsigned int last_hsize;
+   spinlock_t *lock;
unsigned int i;
int cpu;
 
@@ -422,14 +424,20 @@ static void __nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me,
unhelp(h, me);
spin_unlock_bh(>lock);
}
+
local_bh_disable();
-   for (i = 0; i < nf_conntrack_htable_size; i++) {
-   nf_conntrack_lock(_conntrack_locks[i % CONNTRACK_LOCKS]);
-   if (i < nf_conntrack_htable_size) {
-   hlist_nulls_for_each_entry(h, nn, 
_conntrack_hash[i], hnnode)
-   unhelp(h, me);
+restart:
+   last_hsize = nf_conntrack_htable_size;
+   for (i = 0; i < last_hsize; i++) {
+   lock = _conntrack_locks[i % CONNTRACK_LOCKS];
+   nf_conntrack_lock(lock);
+   if (last_hsize != nf_conntrack_htable_size) {
+   spin_unlock(lock);
+   goto restart;
}
-   spin_unlock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+   hlist_nulls_for_each_entry(h, nn, _conntrack_hash[i], hnnode)
+   unhelp(h, me);
+   spin_unlock(lock);
}
local_bh_enable();
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf 1/3] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize

2016-07-02 Thread Liping Zhang
>Good catch, but ...
>
>> diff --git a/include/net/netfilter/nf_conntrack_core.h 
>> b/include/net/netfilter/nf_conntrack_core.h
>> index 3e2f332..4f6453a 100644
>> --- a/include/net/netfilter/nf_conntrack_core.h
>> +++ b/include/net/netfilter/nf_conntrack_core.h
>> @@ -82,6 +82,7 @@ print_tuple(struct seq_file *s, const struct 
>> nf_conntrack_tuple *tuple,
>>  #define CONNTRACK_LOCKS 1024
>>  
>>  extern struct hlist_nulls_head *nf_conntrack_hash;
>> +extern seqcount_t nf_conntrack_generation;
>
>instead of this and the proliferation of this:
>
>> +do {
>> +sequence = read_seqcount_begin(_conntrack_generation);
>> +st->htable_size = nf_conntrack_htable_size;
>> +st->hash = nf_conntrack_hash;
>> +} while (read_seqcount_retry(_conntrack_generation, sequence));
>> +
>>  return ct_get_idx(seq, *pos);
>>  }
>
>I think it might be better to do something like
>
>/* must be called with rcu read lock held */
>unsigned int nf_conntrack_get_ht(struct hlist_nulls_head *h,
>unsigned int *buckets)
>{
>   do {
>   s = read_seq ...
>   size = nf_conntrack_htable_size;
>   ptr = nf_conntrack_hash;
>   } while ...
>
>   *h = ptr;
>   *buckets = size;
>
>   return s;

Agree.

And I also find there's no need to use nf_conntrack_generation in my patch #2 
and #3.
Will send V2 later.

Thanks

[PATCH nf 2/3] netfilter: cttimeout: unlink timeout obj again when hash resize happen

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Imagine such situation, nf_conntrack_htable_size now is 4096, we are doing
ctnl_untimeout, and iterate on 3000# bucket.

Meanwhile, another user try to reduce hash size to 2048, then all nf_conn
are removed to the new hashtable. When this hash resize operation finished,
we still try to itreate ct begin from 3000# bucket, find nothing to do and
just return.

Then we may miss unlinking some timeout objects. And later we will end up
with invalid references to timeout object that are already gone.

So when we find that hash resize happened, try to unlink timeout objects
from the 0# bucket again.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nfnetlink_cttimeout.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c 
b/net/netfilter/nfnetlink_cttimeout.c
index 3c84f14..1d0bc86 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -303,16 +303,26 @@ static void ctnl_untimeout(struct net *net, struct 
ctnl_timeout *timeout)
 {
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
+   unsigned int sequence;
+   spinlock_t *lock;
int i;
 
local_bh_disable();
+restart:
+   sequence = read_seqcount_begin(_conntrack_generation);
for (i = 0; i < nf_conntrack_htable_size; i++) {
-   nf_conntrack_lock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+   lock = _conntrack_locks[i % CONNTRACK_LOCKS];
+   nf_conntrack_lock(lock);
+   if (read_seqcount_retry(_conntrack_generation, sequence)) {
+   spin_unlock(lock);
+   goto restart;
+   }
+
if (i < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, nn, 
_conntrack_hash[i], hnnode)
untimeout(h, timeout);
}
-   spin_unlock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+   spin_unlock(lock);
}
local_bh_enable();
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 0/3] netfilter: conntrack: fix race condition associated with hash resize

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When user adjust the hash size via /sys/module/nf_conntrack/parameters/hashsize,
something will break because race condition happened.

This patch set aim to fix these bugs.

When we do "cat /proc/net/nf_conntrack", and at the same time do hash resize,
nf_conntrack_htable_size and nf_conntrack_hash may become unrelated if we
read them separately, so oops will happen. Fix this in patch #1.

When we do unlink help or timeout objects, and at the same time do hash resize,
we may miss unlinking some objects, later we will end up with invalid 
references.
Fix this in patch #2 and #3.

Liping Zhang (3):
  netfilter: conntrack: fix race between nf_conntrack proc read and hash
resize
  netfilter: cttimeout: unlink timeout obj again when hash resize happen
  netfilter: nf_ct_helper: unlink helper again when hash resize happen

 include/net/netfilter/nf_conntrack_core.h|  1 +
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 20 
 net/netfilter/nf_conntrack_core.c|  4 +++-
 net/netfilter/nf_conntrack_helper.c  | 14 --
 net/netfilter/nf_conntrack_standalone.c  | 20 +++-
 net/netfilter/nfnetlink_cttimeout.c  | 14 --
 6 files changed, 59 insertions(+), 14 deletions(-)

-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf 1/3] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack
hash table via /sys/module/nf_conntrack/parameters/hashsize, race will
happen, because reader can observe a newly allocated hash but the old size
(or vice versa). So oops will happen like follows:

  BUG: unable to handle kernel NULL pointer dereference at 0017
  IP: [] seq_print_acct+0x11/0x50 [nf_conntrack]
  Call Trace:
  [] ? ct_seq_show+0x14e/0x340 [nf_conntrack]
  [] seq_read+0x2cc/0x390
  [] proc_reg_read+0x42/0x70
  [] __vfs_read+0x37/0x130
  [] ? security_file_permission+0xa0/0xc0
  [] vfs_read+0x95/0x140
  [] SyS_read+0x55/0xc0
  [] entry_SYSCALL_64_fastpath+0x1a/0xa4

It is very easy to reproduce this kernel crash.
1. open one shell and input the following cmds:
  while : ; do
echo $RANDOM > hashsize
  done
2. open more shells and input the following cmds:
  while : ; do
cat /proc/net/nf_conntrack
  done
3. just wait a monent, oops will happen soon.

The solution in this patch is based on Florian's Commit 5e3c61f98175
("netfilter: conntrack: fix lookup race during hash resize").

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/net/netfilter/nf_conntrack_core.h|  1 +
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 20 
 net/netfilter/nf_conntrack_core.c|  4 +++-
 net/netfilter/nf_conntrack_standalone.c  | 20 +++-
 4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h 
b/include/net/netfilter/nf_conntrack_core.h
index 3e2f332..4f6453a 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -82,6 +82,7 @@ print_tuple(struct seq_file *s, const struct 
nf_conntrack_tuple *tuple,
 #define CONNTRACK_LOCKS 1024
 
 extern struct hlist_nulls_head *nf_conntrack_hash;
+extern seqcount_t nf_conntrack_generation;
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 void nf_conntrack_lock(spinlock_t *lock);
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 
b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index c6f3c40..584899f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -26,6 +26,8 @@
 
 struct ct_iter_state {
struct seq_net_private p;
+   struct hlist_nulls_head *hash;
+   unsigned int htable_size;
unsigned int bucket;
 };
 
@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct 
seq_file *seq)
struct hlist_nulls_node *n;
 
for (st->bucket = 0;
-st->bucket < nf_conntrack_htable_size;
+st->bucket < st->htable_size;
 st->bucket++) {
n = rcu_dereference(
-   hlist_nulls_first_rcu(_conntrack_hash[st->bucket]));
+   hlist_nulls_first_rcu(>hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file 
*seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
-   if (++st->bucket >= nf_conntrack_htable_size)
+   if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
-   hlist_nulls_first_rcu(_conntrack_hash[st->bucket]));
+   hlist_nulls_first_rcu(>hash[st->bucket]));
}
return head;
 }
@@ -75,7 +77,17 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file 
*seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
 {
+   struct ct_iter_state *st = seq->private;
+   unsigned int sequence;
+
rcu_read_lock();
+
+   do {
+   sequence = read_seqcount_begin(_conntrack_generation);
+   st->htable_size = nf_conntrack_htable_size;
+   st->hash = nf_conntrack_hash;
+   } while (read_seqcount_retry(_conntrack_generation, sequence));
+
return ct_get_idx(seq, *pos);
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index f204274..1c39697 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,9 +72,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
+seqcount_t nf_conntrack_generation __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_generation);
+
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static __read_mo

[PATCH nf 3/3] netfilter: nf_ct_helper: unlink helper again when hash resize happen

2016-07-02 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Similar to ctnl_untimeout, when hash resize happened, we should try
to do unhelp from the 0# bucket again.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_conntrack_helper.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 196cb39..9d02b75 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -392,6 +392,8 @@ static void __nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me,
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
+   unsigned int sequence;
+   spinlock_t *lock;
unsigned int i;
int cpu;
 
@@ -422,14 +424,22 @@ static void __nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me,
unhelp(h, me);
spin_unlock_bh(>lock);
}
+
local_bh_disable();
+restart:
+   sequence = read_seqcount_begin(_conntrack_generation);
for (i = 0; i < nf_conntrack_htable_size; i++) {
-   nf_conntrack_lock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+   lock = _conntrack_locks[i % CONNTRACK_LOCKS];
+   nf_conntrack_lock(lock);
+   if (read_seqcount_retry(_conntrack_generation, sequence)) {
+   spin_unlock(lock);
+   goto restart;
+   }
if (i < nf_conntrack_htable_size) {
hlist_nulls_for_each_entry(h, nn, 
_conntrack_hash[i], hnnode)
unhelp(h, me);
}
-   spin_unlock(_conntrack_locks[i % CONNTRACK_LOCKS]);
+   spin_unlock(lock);
}
local_bh_enable();
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libipt_realm: fix order of mask and id when do nft translation

2016-06-27 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Before:
  # iptables-translate -A INPUT -m realm --realm 1/0xf
  nft add rule ip filter INPUT rtclassid and 0x1 == 0xf counter

Apply this patch:
  # iptables-translate -A INPUT -m realm --realm 1/0xf
  nft add rule ip filter INPUT rtclassid and 0xf == 0x1 counter

Cc: Shivani Bhardwaj <shivanib...@gmail.com>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libipt_realm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/libipt_realm.c b/extensions/libipt_realm.c
index beb2491..0a4bc3b 100644
--- a/extensions/libipt_realm.c
+++ b/extensions/libipt_realm.c
@@ -115,8 +115,8 @@ print_realm_xlate(unsigned long id, unsigned long mask,
const char *name = NULL;
 
if (mask != 0x)
-   xt_xlate_add(xl, " and 0x%lx %s 0x%lx ", id,
-  op == XT_OP_EQ ? "==" : "!=", mask);
+   xt_xlate_add(xl, " and 0x%lx %s 0x%lx ", mask,
+  op == XT_OP_EQ ? "==" : "!=", id);
else {
if (numeric == 0)
name = xtables_lmap_id2name(realms, id);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next] netfilter: xt_cpu: no need to check the validity of invert flag

2016-06-23 Thread Liping Zhang
Hi Pablo,

2016-06-23 19:11 GMT+08:00 Pablo Neira Ayuso :
>> -static int cpu_mt_check(const struct xt_mtchk_param *par)
>> -{
>> - const struct xt_cpu_info *info = par->matchinfo;
>> -
>> - if (info->invert & ~1)
>> - return -EINVAL;
>> - return 0;
>> -}
>
> This trick is there so we can convert info->invert to info->flags in
> the future without a new revision (given the binary interface did not
> change). I'm not convinced there is much of benefit from getting rid
> of this little extra _check() code that runs from the control plane
> path.
>

Thanks for pointing this out. At my first glace, I think this _check
is tricky and a little ugly,
so I try to remove it and send this patch.

As you said, if we add new flags in the future, for example, we
support a new flag like this
"iptables -A INPUT -m cpu --cpu 0 --flagXXX". When the user use the
new iptables utility
but the kernel is old, currently kernel will reject this request,
because we don't recognize the
"flagXXX".

But apply my patch, kernel will just ignore this unknown flag, this
will confuse the user.
And change a new revision seems unworthy.

So I'd rather not apply this pacth.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH -next v2] netfilter: conntrack: allow increasing bucket size via sysctl too

2016-06-22 Thread Liping Zhang
Hi Florian,

2016-06-22 2:46 GMT+08:00 Florian Westphal :
> @@ -1650,11 +1646,31 @@ int nf_conntrack_set_hashsize(const char *val, struct 
> kernel_param *kp)
> write_seqcount_end(_conntrack_generation);
> nf_conntrack_all_unlock();
> local_bh_enable();
> +   synchronize_net();

synchronize_net()  call here is redundant?

>
> synchronize_net();
> nf_ct_free_hashtable(old_hash, old_size);
> return 0;
>  }

And according to your patch version 1, it seems that you missed to update the
Documentation/networking/nf_conntrack-sysctl.txt this time.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PACTH nf-next] netfilter: nf_reject_ipv4: don't send tcp RST if the packet is non-TCP

2016-06-20 Thread Liping Zhang
Hi Marcelo,

2016-06-20 23:48 GMT+08:00 Marcelo Ricardo Leitner :
>
> A different check/log is made for ip6:
> nf_reject_ip6_tcphdr_get():
> /* IP header checks: fragment, too short. */
> if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) {
> pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n",
>  proto, *otcplen);
> return NULL;
> }
>
> Would be nice to have some consistency on this log message as it
> increases debug-ability.
>

Thanks for your opinion.

But you can see, there are many inconsistent things between
nf_reject_ip6_tcphdr_get and nf_reject_ip_tcphdr_get.

For example, when tcp->rst is set, reject_ip6 will call
pr_debug("RST is set\n"), while there's nothing in reject_ip4.

IMO, these debug informations are almost useless, so there's
no need to add this debug info only for consistent with nf_reject_ip6.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PACTH nf-next] netfilter: nf_reject_ipv4: don't send tcp RST if the packet is non-TCP

2016-06-20 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

In iptables, if the user add a rule to send tcp RST and specify the
non-TCP protocol, such as UDP, kernel will reject this request. But
in nftables, this validity check only occurs in nft tool, i.e. only
in userspace.

This means that user can add such a rule like follows via nfnetlink:
  "nft add rule filter forward ip protocol udp reject with tcp reset"

This will generate some confusing tcp RST packets. So we should send
tcp RST only when it is TCP packet.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/ipv4/netfilter/nf_reject_ipv4.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c 
b/net/ipv4/netfilter/nf_reject_ipv4.c
index b6ea57e..fd82202 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff 
*oldskb,
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return NULL;
 
+   if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
+   return NULL;
+
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 sizeof(struct tcphdr), _oth);
if (oth == NULL)
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf] netfilter: nf_tables: fix memory leak if expr init fails

2016-06-20 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

If expr init fails then we need to free it.

So when the user add a nft rule as follows:
  # nft add rule filter input tcp dport 22 flow table ssh \
{ ip saddr limit rate 0/second }
memory leak will happen.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_api.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2c88187..cf7c745 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1724,9 +1724,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 
err = nf_tables_newexpr(ctx, , expr);
if (err < 0)
-   goto err2;
+   goto err3;
 
return expr;
+err3:
+   kfree(expr);
 err2:
module_put(info.ops->type->owner);
 err1:
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nf-next 4/4] netfilter: nft_meta: add explicitly nf_logger_find_get call

2016-06-14 Thread Liping Zhang
Hi Florian,

At 2016-06-08 20:59:32, "Florian Westphal"  wrote:
>
>With nftables we have a new infrastructure in place that emits trace info via
>nfnetlink.
>
>So loading nf_log_ipX isn't needed anymore in nft.

Yes, in nftables, user can use "nft monitor" to get the trace info.
But I think it is a little choas now, sometimes we can see trace info 
in kmsg(when nf_log_ipX is loaded), sometimes there's nothing in
kmsg(when nf_log_ipX is not installed).

This is confusing, especially for newbie.
N嫥叉靣笡y氊b瞂千v豝�)藓{.n�+壏租栕玼朕�)韰骅w*jg�秹殠娸/侁鋤罐枈�2娹櫒璀�&�)摺玜囤瓽珴閔�鎗:+v墾妛鑶佶

[PATCH nf-next] netfilter: nf_tables: fix a wrong check to skip the inactive rules

2016-06-14 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

nft_genmask_cur has already done left-shift operator on the gencursor,
so there's no need to do left-shift operator on it again.

Fixes: ea4bd995b0f2 ("netfilter: nf_tables: add transaction helper functions")
Cc: Patrick McHardy <ka...@trash.net>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index e9f8dff..fb8b589 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -143,7 +143,7 @@ next_rule:
list_for_each_entry_continue_rcu(rule, >rules, list) {
 
/* This rule is not active, skip. */
-   if (unlikely(rule->genmask & (1 << gencursor)))
+   if (unlikely(rule->genmask & gencursor))
continue;
 
rulenum++;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] tests: shell: add endless jump loop tests

2016-06-13 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Add some tests for endless jump loop validation.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 tests/shell/testcases/chains/0010endless_jump_loop_1 |  9 +
 tests/shell/testcases/chains/0011endless_jump_loop_1 | 14 ++
 2 files changed, 23 insertions(+)
 create mode 100755 tests/shell/testcases/chains/0010endless_jump_loop_1
 create mode 100755 tests/shell/testcases/chains/0011endless_jump_loop_1

diff --git a/tests/shell/testcases/chains/0010endless_jump_loop_1 
b/tests/shell/testcases/chains/0010endless_jump_loop_1
new file mode 100755
index 000..dba70e1
--- /dev/null
+++ b/tests/shell/testcases/chains/0010endless_jump_loop_1
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -e
+
+$NFT add table t
+$NFT add chain t c
+# kernel should return ELOOP
+$NFT add rule t c tcp dport vmap {1 : jump c} 2>/dev/null
+echo "E: accepted endless jump loop in a vmap" >&2
diff --git a/tests/shell/testcases/chains/0011endless_jump_loop_1 
b/tests/shell/testcases/chains/0011endless_jump_loop_1
new file mode 100755
index 000..adbff8d
--- /dev/null
+++ b/tests/shell/testcases/chains/0011endless_jump_loop_1
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+set -e
+
+$NFT add table t
+$NFT add chain t c1
+$NFT add chain t c2
+$NFT add map t m {type inet_service : verdict \;}
+$NFT add element t m {2 : jump c2}
+$NFT add rule t c1 tcp dport vmap @m
+
+# kernel should return ELOOP
+$NFT add element t m {1 : jump c1} 2>/dev/null
+echo "E: accepted endless jump loop in a vmap" >&2
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] tests: shell: make testcases which using tcp/udp port more rubost

2016-06-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

In my mechain, port 12345 is mapped to italk in /etc/services:
  italk   12345/tcp   # Italk Chat System

So when we add nft rule with udp port "12345", nft list ruleset
will displayed it as "italk", that cause the result is not same
with expected, then testcase fail.

Add "-nn" option when dump the rulesets from the kernel, make
testcases which using tcp/udp port more rubost.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 tests/shell/testcases/netns/0001nft-f_0 | 2 +-
 tests/shell/testcases/netns/0002loosecommands_0 | 2 +-
 tests/shell/testcases/netns/0003many_0  | 2 +-
 tests/shell/testcases/nft-f/0002rollback_rule_0 | 2 +-
 tests/shell/testcases/nft-f/0003rollback_jump_0 | 2 +-
 tests/shell/testcases/nft-f/0004rollback_set_0  | 2 +-
 tests/shell/testcases/nft-f/0005rollback_map_0  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/shell/testcases/netns/0001nft-f_0 
b/tests/shell/testcases/netns/0001nft-f_0
index e616363..663167d 100755
--- a/tests/shell/testcases/netns/0001nft-f_0
+++ b/tests/shell/testcases/netns/0001nft-f_0
@@ -99,7 +99,7 @@ if [ $? -ne 0 ] ; then
exit 1
 fi
 
-KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset)"
+KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset -nn)"
 $IP netns del $NETNS_NAME
 if [ "$RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
diff --git a/tests/shell/testcases/netns/0002loosecommands_0 
b/tests/shell/testcases/netns/0002loosecommands_0
index 1600d94..fbaa386 100755
--- a/tests/shell/testcases/netns/0002loosecommands_0
+++ b/tests/shell/testcases/netns/0002loosecommands_0
@@ -53,7 +53,7 @@ RULESET="table ip t {
}
 }"
 
-KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset)"
+KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset -nn)"
 $IP netns del $NETNS_NAME
 if [ "$RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
diff --git a/tests/shell/testcases/netns/0003many_0 
b/tests/shell/testcases/netns/0003many_0
index ad71ae3..f8853ee 100755
--- a/tests/shell/testcases/netns/0003many_0
+++ b/tests/shell/testcases/netns/0003many_0
@@ -104,7 +104,7 @@ function test_netns()
exit 1
fi
 
-   KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset)"
+   KERNEL_RULESET="$($IP netns exec $NETNS_NAME $NFT list ruleset -nn)"
if [ "$RULESET" != "$KERNEL_RULESET" ] ; then
echo "E: ruleset in netns $NETNS_NAME differs from the loaded" 
>&2
DIFF="$(which diff)"
diff --git a/tests/shell/testcases/nft-f/0002rollback_rule_0 
b/tests/shell/testcases/nft-f/0002rollback_rule_0
index b1e224c..5518c0b 100755
--- a/tests/shell/testcases/nft-f/0002rollback_rule_0
+++ b/tests/shell/testcases/nft-f/0002rollback_rule_0
@@ -49,7 +49,7 @@ if [ $? -eq 0 ]   ; then
exit 1
 fi
 
-KERNEL_RULESET="$($NFT list ruleset)"
+KERNEL_RULESET="$($NFT list ruleset -nn)"
 
 if [ "$GOOD_RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
diff --git a/tests/shell/testcases/nft-f/0003rollback_jump_0 
b/tests/shell/testcases/nft-f/0003rollback_jump_0
index 567a70e..5c8c685 100755
--- a/tests/shell/testcases/nft-f/0003rollback_jump_0
+++ b/tests/shell/testcases/nft-f/0003rollback_jump_0
@@ -49,7 +49,7 @@ if [ $? -eq 0 ]   ; then
exit 1
 fi
 
-KERNEL_RULESET="$($NFT list ruleset)"
+KERNEL_RULESET="$($NFT list ruleset -nn)"
 
 if [ "$GOOD_RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
diff --git a/tests/shell/testcases/nft-f/0004rollback_set_0 
b/tests/shell/testcases/nft-f/0004rollback_set_0
index 3521aeb..db1c84c 100755
--- a/tests/shell/testcases/nft-f/0004rollback_set_0
+++ b/tests/shell/testcases/nft-f/0004rollback_set_0
@@ -49,7 +49,7 @@ if [ $? -eq 0 ]   ; then
exit 1
 fi
 
-KERNEL_RULESET="$($NFT list ruleset)"
+KERNEL_RULESET="$($NFT list ruleset -nn)"
 
 if [ "$GOOD_RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
diff --git a/tests/shell/testcases/nft-f/0005rollback_map_0 
b/tests/shell/testcases/nft-f/0005rollback_map_0
index 21b6a63..13bb907 100755
--- a/tests/shell/testcases/nft-f/0005rollback_map_0
+++ b/tests/shell/testcases/nft-f/0005rollback_map_0
@@ -52,7 +52,7 @@ if [ $? -eq 0 ]   ; then
exit 1
 fi
 
-KERNEL_RULESET="$($NFT list ruleset)"
+KERNEL_RULESET="$($NFT list ruleset -nn)"
 
 if [ "$GOOD_RULESET" != "$KERNEL_RULESET" ] ; then
 DIFF="$(which diff)"
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 2/3] netfilter: nf_tables: fix a endless jump loop when use vmap

2016-06-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Currently, user can add such a wrong nft rules successfully, which
will cause an endless jump loop:
  # nft add rule filter test tcp dport vmap {1: jump test}

This is because before we commit, the element in the current anonymous
set is inactive, so osp->walk will skip this element and miss the
validate check.

So we should also check the inactive element when we do loops check,
and ignore the inactive element when we do dump.

This patch also fix the testcase fail in
nftables/tests/shell/testcases/chains/0009masquerade_jump_1.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 include/net/netfilter/nf_tables.h |  1 +
 net/netfilter/nf_tables_api.c | 31 +--
 net/netfilter/nft_hash.c  |  3 ++-
 net/netfilter/nft_rbtree.c|  3 ++-
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h 
b/include/net/netfilter/nf_tables.h
index 0922354..e35194a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -167,6 +167,7 @@ struct nft_set_elem {
 
 struct nft_set;
 struct nft_set_iter {
+   boolignore_inactive;
unsigned intcount;
unsigned intskip;
int err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e4a6d7e..d5f3602 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2949,10 +2949,11 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, 
struct nft_set *set,
goto bind;
}
 
-   iter.skip   = 0;
-   iter.count  = 0;
-   iter.err= 0;
-   iter.fn = nf_tables_bind_check_setelem;
+   iter.ignore_inactive= false;
+   iter.skip   = 0;
+   iter.count  = 0;
+   iter.err= 0;
+   iter.fn = nf_tables_bind_check_setelem;
 
set->ops->walk(ctx, set, );
if (iter.err < 0) {
@@ -3190,12 +3191,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, 
struct netlink_callback *cb)
if (nest == NULL)
goto nla_put_failure;
 
-   args.cb = cb;
-   args.skb= skb;
-   args.iter.skip  = cb->args[0];
-   args.iter.count = 0;
-   args.iter.err   = 0;
-   args.iter.fn= nf_tables_dump_setelem;
+   args.cb = cb;
+   args.skb= skb;
+   args.iter.ignore_inactive   = true;
+   args.iter.skip  = cb->args[0];
+   args.iter.count = 0;
+   args.iter.err   = 0;
+   args.iter.fn= nf_tables_dump_setelem;
set->ops->walk(, set, );
 
nla_nest_end(skb, nest);
@@ -4282,10 +4284,11 @@ static int nf_tables_check_loops(const struct nft_ctx 
*ctx,
binding->chain != chain)
continue;
 
-   iter.skip   = 0;
-   iter.count  = 0;
-   iter.err= 0;
-   iter.fn = nf_tables_loop_check_setelem;
+   iter.ignore_inactive= false;
+   iter.skip   = 0;
+   iter.count  = 0;
+   iter.err= 0;
+   iter.fn = nf_tables_loop_check_setelem;
 
set->ops->walk(ctx, set, );
if (iter.err < 0)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 6fa0165..7337580 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -218,7 +218,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const 
struct nft_set *set,
goto cont;
if (nft_set_elem_expired(>ext))
goto cont;
-   if (!nft_set_elem_active(>ext, genmask))
+   if (iter->ignore_inactive &&
+   !nft_set_elem_active(>ext, genmask))
goto cont;
 
elem.priv = he;
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index f762094..b948b72 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -219,7 +219,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
if (iter->count < iter->skip)
goto cont;
-   if (!nft_set_elem_active(>ext, genmask))
+   if (iter->ignore_inactive &&
+   !nft_set_elem_active(>ext, genmask))
goto cont;
 
elem.priv = rbe;
-- 
2.5

[PATCH nf-next 0/3] netfilter: fix a endless jump loop bug

2016-06-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

This patch set mainly fix a endless jump loop bug, for example, user
can add the following nft rules successfully:
  # nft add table filter
  # nft add chain filter test
  # nft add rule filter test tcp dport vmap {1: jump test}

This is because we skip the inactive elements in set, and miss the validate
check. Fix it in patch #2.

And after apply patch#2, I also find that there is a redundant 
nf_tables_set_destroy call when set bind fails, which cause my
mechain enter into deadlock. Fix it in patch #3.

Also fix a typo in patch #1.

Liping Zhang (3):
  netfilter: nf_tables: fix wrong check of NFT_SET_MAP in
nf_tables_bind_set
  netfilter: nf_tables: fix a endless jump loop when use vmap
  netfilter: nf_tables: fix wrong destroy anonymous sets if binding
fails

 include/net/netfilter/nf_tables.h |  1 +
 net/netfilter/nf_tables_api.c | 40 +++
 net/netfilter/nft_hash.c  |  3 ++-
 net/netfilter/nft_rbtree.c|  3 ++-
 4 files changed, 24 insertions(+), 23 deletions(-)

-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 1/3] netfilter: nf_tables: fix wrong check of NFT_SET_MAP in nf_tables_bind_set

2016-06-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

We should check "i" is used as a dictionary or not, "binding" is already
checked before.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4d292b9..e4a6d7e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2944,7 +2944,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct 
nft_set *set,
 * jumps are already validated for that chain.
 */
list_for_each_entry(i, >bindings, list) {
-   if (binding->flags & NFT_SET_MAP &&
+   if (i->flags & NFT_SET_MAP &&
i->chain == binding->chain)
goto bind;
}
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 3/3] netfilter: nf_tables: fix wrong destroy anonymous sets if binding fails

2016-06-10 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When we add a nft rule like follows:
  # nft add rule filter test tcp dport vmap {1: jump test}
-ELOOP error will be returned, and the anonymous set will be
destroyed.

But after that, nf_tables_abort will also try to remove the
element and destroy the set, which was already destroyed and
freed.

If we add a nft wrong rule, nft_tables_abort will do the cleanup
work rightly, so nf_tables_set_destroy call here is redundant and
wrong, remove it.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_tables_api.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d5f3602..ba2dad4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2956,13 +2956,8 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct 
nft_set *set,
iter.fn = nf_tables_bind_check_setelem;
 
set->ops->walk(ctx, set, );
-   if (iter.err < 0) {
-   /* Destroy anonymous sets if binding fails */
-   if (set->flags & NFT_SET_ANONYMOUS)
-   nf_tables_set_destroy(ctx, set);
-
+   if (iter.err < 0)
return iter.err;
-   }
}
 bind:
binding->chain = ctx->chain;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libxt_TRACE: Add translation to nft

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

For example:

  # iptables-translate -t raw -A PREROUTING -j TRACE
  nft add rule ip raw PREROUTING counter nftrace set 1

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_TRACE.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/extensions/libxt_TRACE.c b/extensions/libxt_TRACE.c
index 0282e6f..7cb3585 100644
--- a/extensions/libxt_TRACE.c
+++ b/extensions/libxt_TRACE.c
@@ -7,12 +7,20 @@
 #include 
 #include 
 
+static int trace_xlate(const void *ip, const struct xt_entry_target *target,
+  struct xt_xlate *xl, int numeric)
+{
+   xt_xlate_add(xl, "nftrace set 1");
+   return 1;
+}
+
 static struct xtables_target trace_target = {
.family = NFPROTO_UNSPEC,
.name   = "TRACE",
.version= XTABLES_VERSION,
.size   = XT_ALIGN(0),
.userspacesize  = XT_ALIGN(0),
+   .xlate  = trace_xlate,
 };
 
 void _init(void)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 3/4] netfilter: xt_TRACE: add explicitly nf_logger_find_get call

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Consider such situation, if nf_log_ipv4 kernel module is not installed,
and the user add a following iptables rule:
  # iptables -t raw -I PREROUTING -j TRACE

There will be no trace log generated until the user install nf_log_ipv4
module manully. So we should add request related nf_log module
appropriately here.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/xt_TRACE.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index df48967..858d189 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -4,12 +4,23 @@
 #include 
 
 #include 
+#include 
 
 MODULE_DESCRIPTION("Xtables: packet flow tracing");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+   return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+   nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
 static unsigned int
 trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
@@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param 
*par)
 }
 
 static struct xt_target trace_tg_reg __read_mostly = {
-   .name   = "TRACE",
-   .revision   = 0,
-   .family = NFPROTO_UNSPEC,
-   .table  = "raw",
-   .target = trace_tg,
-   .me = THIS_MODULE,
+   .name   = "TRACE",
+   .revision   = 0,
+   .family = NFPROTO_UNSPEC,
+   .table  = "raw",
+   .target = trace_tg,
+   .checkentry = trace_tg_check,
+   .destroy= trace_tg_destroy,
+   .me = THIS_MODULE,
 };
 
 static int __init trace_tg_init(void)
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 0/4] netfilter: request related nf_log module when we add TRACE rule

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

This patch set solve such problem, for example, if we add a following
iptables rule:
  # iptables -t raw -I PREROUTING -j TRACE
And nf_log_ipv4 kernel module is not installed, no trace log
will be generated, until we install the nf_log_ipv4 module manully.

This is not friendly, so we add nf_logger_find_get call explicitly
when xt_TRACE target is created. Nft nftrace meta has the same
problem.

And in order to avoid special treatment of NFPROTO_INET family again
and again, I move the special logic to the inside of nf_logger_find_get
and nf_logger_put, so caller can ignore it. 

Liping Zhang (4):
  netfilter: nf_log: handle NFPROTO_INET properly in
nf_logger_[find_get|put]
  netfilter: nft_log: no need to deal with NFPROTO_INET family
  netfilter: xt_TRACE: add explicitly nf_logger_find_get call
  netfilter: nft_meta: add explicitly nf_logger_find_get call

 net/netfilter/nf_log.c   | 20 
 net/netfilter/nft_log.c  | 21 +
 net/netfilter/nft_meta.c | 12 ++--
 net/netfilter/xt_TRACE.c | 25 +++--
 4 files changed, 50 insertions(+), 28 deletions(-)

-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 1/4] netfilter: nf_log: handle NFPROTO_INET properly in nf_logger_[find_get|put]

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When we request NFPROTO_INET, it means both NFPROTO_IPV4 and NFPROTO_IPV6.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nf_log.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a5d41df..73b845d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
struct nf_logger *logger;
int ret = -ENOENT;
 
+   if (pf == NFPROTO_INET) {
+   ret = nf_logger_find_get(NFPROTO_IPV4, type);
+   if (ret < 0)
+   return ret;
+
+   ret = nf_logger_find_get(NFPROTO_IPV6, type);
+   if (ret < 0) {
+   nf_logger_put(NFPROTO_IPV4, type);
+   return ret;
+   }
+
+   return 0;
+   }
+
if (rcu_access_pointer(loggers[pf][type]) == NULL)
request_module("nf-logger-%u-%u", pf, type);
 
@@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
 {
struct nf_logger *logger;
 
+   if (pf == NFPROTO_INET) {
+   nf_logger_put(NFPROTO_IPV4, type);
+   nf_logger_put(NFPROTO_IPV6, type);
+   return;
+   }
+
BUG_ON(loggers[pf][type] == NULL);
 
rcu_read_lock();
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: xt_cpu: no need to check the validity of invert flag

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

Instead, we can convert invert flag and ensure it is 1 or 0.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/xt_cpu.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index c7a2e54..ca1eaaf 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -25,27 +25,17 @@ MODULE_DESCRIPTION("Xtables: CPU match");
 MODULE_ALIAS("ipt_cpu");
 MODULE_ALIAS("ip6t_cpu");
 
-static int cpu_mt_check(const struct xt_mtchk_param *par)
-{
-   const struct xt_cpu_info *info = par->matchinfo;
-
-   if (info->invert & ~1)
-   return -EINVAL;
-   return 0;
-}
-
 static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
const struct xt_cpu_info *info = par->matchinfo;
 
-   return (info->cpu == smp_processor_id()) ^ info->invert;
+   return (info->cpu == smp_processor_id()) ^ !!info->invert;
 }
 
 static struct xt_match cpu_mt_reg __read_mostly = {
.name   = "cpu",
.revision   = 0,
.family = NFPROTO_UNSPEC,
-   .checkentry = cpu_mt_check,
.match  = cpu_mt,
.matchsize  = sizeof(struct xt_cpu_info),
.me = THIS_MODULE,
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next] netfilter: nft_meta: set skb->nf_trace appropriately

2016-06-08 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

When user add a nft rule to set nftrace to zero, for example:
  # nft add rule ip filter input nftrace set 0

We should set nf_trace to zero also.

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 net/netfilter/nft_meta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0..f4bad9d 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -227,7 +227,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
skb->pkt_type = value;
break;
case NFT_META_NFTRACE:
-   skb->nf_trace = 1;
+   skb->nf_trace = !!value;
break;
default:
WARN_ON(1);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] parser: fix crash if we add a chain with an error chain type

2016-05-29 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

If we add a chain and specify the nonexistent chain type, chain_type_name_lookup
will return a NULL pointer, and meet the assert condition in xstrdup.
Fix crash like this:

  # nft add chain filter input {type none hook input priority 0\;}
  nft: utils.c:63: xstrdup: Assertion `s != ((void *)0)' failed.
  Aborted (core dumped)

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 src/parser_bison.y | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/parser_bison.y b/src/parser_bison.y
index 0452b8f..ef10dee 100644
--- a/src/parser_bison.y
+++ b/src/parser_bison.y
@@ -1124,12 +1124,14 @@ type_identifier :   STRING  { $$ = $1; }
 
 hook_spec  :   TYPESTRING  HOOK
STRING  dev_specPRIORITYprio_spec
{
-   $0->type = 
xstrdup(chain_type_name_lookup($2));
-   if ($0->type == NULL) {
+   const char *chain_type = 
chain_type_name_lookup($2);
+
+   if (chain_type == NULL) {
erec_queue(error(&@2, "unknown chain 
type %s", $2),
   state->msgs);
YYERROR;
}
+   $0->type = xstrdup(chain_type);
xfree($2);
 
$0->hookstr  = 
chain_hookname_lookup($4);
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft 2/3] meta: fix endianness in priority

2016-05-29 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

For example, after we add rule to set priority 1:2, it will be displayed in 
network
byte order as 0200:0100, this is wrong:

  # nft add rule filter test meta priority set 1:2
  # nft list chain filter test
  table ip filter {
  chain test {
  meta priority set 0200:0100
  }
  }

Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 src/meta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/meta.c b/src/meta.c
index b8db0f8..74d2b4c 100644
--- a/src/meta.c
+++ b/src/meta.c
@@ -128,7 +128,7 @@ static const struct datatype tchandle_type = {
.type   = TYPE_TC_HANDLE,
.name   = "tc_handle",
.desc   = "TC handle",
-   .byteorder  = BYTEORDER_BIG_ENDIAN,
+   .byteorder  = BYTEORDER_HOST_ENDIAN,
.size   = 4 * BITS_PER_BYTE,
.basetype   = _type,
.print  = tchandle_type_print,
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables] extensions: libxt_limit: fix a wrong translation to nft rule

2016-05-21 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

The default burst value is 5 in iptables limit extension while it is 0 in
nft limit expression, if the burst value is default, it will not be
displayed when we dump the rules. But when we do translation from iptables
rules to nft rules, we should keep the limit burst value unchanged, even if
it is not displayed in iptables rules.

And now, if the limit-burst value in the iptables rule is 5 or 0, they are
all translated to nft rule without burst, this is wrong:

$ sudo iptables-translate -A INPUT -m limit --limit 10/s --limit-burst 5
nft add rule ip filter INPUT limit rate 10/second counter
$ sudo iptables-translate -A INPUT -m limit --limit 10/s --limit-burst 0
nft add rule ip filter INPUT limit rate 10/second burst 0 packets counter

Apply this patch, translation will become:

$ sudo iptables-translate -A INPUT -m limit --limit 10/s --limit-burst 5
nft add rule ip filter INPUT limit rate 10/second burst 5 packets counter
$ sudo iptables-translate -A INPUT -m limit --limit 10/s --limit-burst 0
nft add rule ip filter INPUT limit rate 10/second counter

Fixes: a8dfbe3a3acb ("extensions: libxt_limit: Add translation to nft")
Cc: Shivani Bhardwaj <shivanib...@gmail.com>
Signed-off-by: Liping Zhang <liping.zh...@spreadtrum.com>
---
 extensions/libxt_limit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/libxt_limit.c b/extensions/libxt_limit.c
index c88d26b..6652849 100644
--- a/extensions/libxt_limit.c
+++ b/extensions/libxt_limit.c
@@ -184,7 +184,7 @@ static int limit_xlate(const void *ip, const struct 
xt_entry_match *match,
 
xt_xlate_add(xl, "limit rate");
print_rate_xlate(r->avg, xl);
-   if (r->burst != XT_LIMIT_BURST)
+   if (r->burst != 0)
xt_xlate_add(xl, "burst %u packets ", r->burst);
 
return 1;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] evaluate: fix crash if we add an error format rule

2016-05-14 Thread Liping Zhang
From: Liping Zhang <liping.zh...@spreadtrum.com>

If we add a such nft rule:
  nft add rule filter input ip protocol icmp tcp dport 0

we will always meet the assert condition:
  nft: evaluate.c:536: resolve_protocol_conflict: Assertion `base < 
(__PROTO_BASE_MAX - 1)' failed.
  Aborted (core dumped)
---
 src/evaluate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/evaluate.c b/src/evaluate.c
index 53f19b2..c317761 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -533,7 +533,7 @@ static int resolve_protocol_conflict(struct eval_ctx *ctx,
list_add_tail(>list, >stmt->list);
}
 
-   assert(base < PROTO_BASE_MAX);
+   assert(base <= PROTO_BASE_MAX);
/* This payload and the existing context don't match, conflict. */
if (ctx->pctx.protocol[base + 1].desc != NULL)
return 1;
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


<    1   2   3   4   >