[PATCH nftables 0/7] TCP option matching

2017-02-06 Thread Manuel Messner
This patch set is part of the TCP option matching implementation for nftables.

These patch sets enable nft to match against the following TCP options:
* End of Option List
* No-Operation
* Maximum Segment Size
* Window Scale
* SACK
* SACK Permitted
* Timestamps

Florian Westphal (1):
  payload: insert implicit meta tcp dependency when matching tcp options

Manuel Messner (6):
  include: linux: netfilter: nf_tables: copy file from nf-next
  exthdr: prepare for tcp support
  exthdr: prepare exthdr_gen_dependency for tcp support
  src: add TCP option matching
  payload: automatically kill dependencies for exthdr and tcpopt
  tests: py: Add basic tests for ip, ip6 and inet

 doc/nft.xml | 178 +++-
 include/expression.h|   1 +
 include/exthdr.h|   5 +-
 include/linux/netfilter/nf_tables.h |  17 ++-
 include/payload.h   |   5 +-
 include/tcpopt.h|  26 
 src/Makefile.am |   1 +
 src/evaluate.c  |  42 +-
 src/exthdr.c|  36 -
 src/netlink_delinearize.c   |   7 +-
 src/netlink_linearize.c |   5 +-
 src/parser_bison.y  |  46 +-
 src/payload.c   |  39 +-
 src/scanner.l   |   1 +
 src/tcpopt.c| 269 
 tests/py/inet/tcpopt.t  |  38 +
 tests/py/inet/tcpopt.t.payload.inet | 181 
 tests/py/ip/tcpopt.t|  38 +
 tests/py/ip/tcpopt.t.payload| 181 
 tests/py/ip6/tcpopt.t   |  37 +
 tests/py/ip6/tcpopt.t.payload   | 181 
 21 files changed, 1303 insertions(+), 31 deletions(-)
 create mode 100644 include/tcpopt.h
 create mode 100644 src/tcpopt.c
 create mode 100644 tests/py/inet/tcpopt.t
 create mode 100644 tests/py/inet/tcpopt.t.payload.inet
 create mode 100644 tests/py/ip/tcpopt.t
 create mode 100644 tests/py/ip/tcpopt.t.payload
 create mode 100644 tests/py/ip6/tcpopt.t
 create mode 100644 tests/py/ip6/tcpopt.t.payload

--
2.11.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 1/1] net: netfilter: nft_exthdr: add TCP option matching

2017-02-06 Thread Manuel Messner
This patch implements the kernel side of the TCP option patch.

Signed-off-by: Manuel Messner 
Reviewed-by: Florian Westphal 
---
 include/uapi/linux/netfilter/nf_tables.h |  17 -
 net/netfilter/Kconfig|   4 +-
 net/netfilter/nft_exthdr.c   | 120 +++
 3 files changed, 125 insertions(+), 16 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index b00a05d1ee56..6c0c9efddad6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -705,12 +705,13 @@ enum nft_payload_attributes {
 #define NFTA_PAYLOAD_MAX   (__NFTA_PAYLOAD_MAX - 1)
 
 /**
- * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression 
netlink attributes
+ * enum nft_exthdr_attributes - nf_tables extension header expression netlink 
attributes
  *
  * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
  * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
  * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
  * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U8)
  */
 enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@@ -718,11 +719,25 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_TYPE,
NFTA_EXTHDR_OFFSET,
NFTA_EXTHDR_LEN,
+   NFTA_EXTHDR_OP,
__NFTA_EXTHDR_MAX
 };
 #define NFTA_EXTHDR_MAX(__NFTA_EXTHDR_MAX - 1)
 
 /**
+ * enum nft_exthdr_op - nf_tables match options
+ *
+ * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers
+ * @NFT_EXTHDR_OP_TCP: match against tcp options
+ */
+enum nft_exthdr_op {
+   NFT_EXTHDR_OP_IPV6,
+   NFT_EXTHDR_OP_TCPOPT,
+   __NFT_EXTHDR_OP_MAX
+};
+#define NFT_EXTHDR_OP_MAX  (__NFT_EXTHDR_OP_MAX - 1)
+
+/**
  * enum nft_meta_keys - nf_tables meta expression keys
  *
  * @NFT_META_LEN: packet length (skb->len)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c69eee3a0e54..e44c0e1ec0c5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -467,10 +467,10 @@ config NF_TABLES_NETDEV
  This option enables support for the "netdev" table.
 
 config NFT_EXTHDR
-   tristate "Netfilter nf_tables IPv6 exthdr module"
+   tristate "Netfilter nf_tables exthdr module"
help
  This option adds the "exthdr" expression that you can use to match
- IPv6 extension headers.
+ IPv6 extension headers and tcp options.
 
 config NFT_META
tristate "Netfilter nf_tables meta module"
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 47beb3abcc9d..bc6518a8fe6c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -15,19 +15,28 @@
 #include 
 #include 
 #include 
-// FIXME:
-#include 
+#include 
 
 struct nft_exthdr {
u8  type;
u8  offset;
u8  len;
+   u8  op;
enum nft_registers  dreg:8;
 };
 
-static void nft_exthdr_eval(const struct nft_expr *expr,
-   struct nft_regs *regs,
-   const struct nft_pktinfo *pkt)
+static unsigned int optlen(const u8 *opt, unsigned int offset)
+{
+   /* Beware zero-length options: make finite progress */
+   if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
+   return 1;
+   else
+   return opt[offset + 1];
+}
+
+static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+struct nft_regs *regs,
+const struct nft_pktinfo *pkt)
 {
struct nft_exthdr *priv = nft_expr_priv(expr);
u32 *dest = >data[priv->dreg];
@@ -47,11 +56,59 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
 }
 
+static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+   struct nft_regs *regs,
+   const struct nft_pktinfo *pkt)
+{
+   u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+   struct nft_exthdr *priv = nft_expr_priv(expr);
+   unsigned int i, optl, tcphdr_len, offset;
+   u32 *dest = >data[priv->dreg];
+   struct tcphdr *tcph;
+   u8 *opt;
+
+   if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+   goto err;
+
+   tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
+   if (!tcph)
+   goto err;
+
+   tcphdr_len = __tcp_hdrlen(tcph);
+   if (tcphdr_len < sizeof(*tcph))
+   goto err;
+
+   tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+   if (!tcph)
+   goto err;
+
+   opt = (u8 *)tcph;
+   for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+   optl = optlen(opt, i);
+
+   

[PATCH nftables 6/7] payload: automatically kill dependencies for exthdr and tcpopt

2017-02-06 Thread Manuel Messner
This patch automatically removes the dependencies for exthdr and tcpopt.

 # nft add rule filter input tcp option maxseg kind 3 counter.
 # nft list table filter input

Before:

 # ip protocol 6 tcp option maxseg kind 3 counter

After:

 # tcp option maxseg kind 3 counter

Thus allowing to write tests as follows:

 # tcp option maxseg kind 3;ok

Signed-off-by: Florian Westphal 
Signed-off-by: Manuel Messner 
---
 include/payload.h |  2 ++
 src/netlink_delinearize.c |  2 +-
 src/payload.c | 14 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/payload.h b/include/payload.h
index 5952b24..a3d2309 100644
--- a/include/payload.h
+++ b/include/payload.h
@@ -42,6 +42,8 @@ extern void __payload_dependency_kill(struct payload_dep_ctx 
*ctx,
  enum proto_bases base);
 extern void payload_dependency_kill(struct payload_dep_ctx *ctx,
struct expr *expr);
+extern void exthdr_dependency_kill(struct payload_dep_ctx *ctx,
+  struct expr *expr);
 
 extern bool payload_can_merge(const struct expr *e1, const struct expr *e2);
 extern struct expr *payload_expr_join(const struct expr *e1,
diff --git a/src/netlink_delinearize.c b/src/netlink_delinearize.c
index 87010f1..e23c48b 100644
--- a/src/netlink_delinearize.c
+++ b/src/netlink_delinearize.c
@@ -1841,7 +1841,7 @@ static void expr_postprocess(struct rule_pp_ctx *ctx, 
struct expr **exprp)
expr_postprocess(ctx, >key);
break;
case EXPR_EXTHDR:
-   __payload_dependency_kill(>pdctx, PROTO_BASE_NETWORK_HDR);
+   exthdr_dependency_kill(>pdctx, expr);
break;
case EXPR_SET_REF:
case EXPR_META:
diff --git a/src/payload.c b/src/payload.c
index 0207296..169954b 100644
--- a/src/payload.c
+++ b/src/payload.c
@@ -410,6 +410,20 @@ void payload_dependency_kill(struct payload_dep_ctx *ctx, 
struct expr *expr)
__payload_dependency_kill(ctx, expr->payload.base);
 }
 
+void exthdr_dependency_kill(struct payload_dep_ctx *ctx, struct expr *expr)
+{
+   switch (expr->exthdr.op) {
+   case NFT_EXTHDR_OP_TCPOPT:
+   __payload_dependency_kill(ctx, PROTO_BASE_TRANSPORT_HDR);
+   break;
+   case NFT_EXTHDR_OP_IPV6:
+   __payload_dependency_kill(ctx, PROTO_BASE_NETWORK_HDR);
+   break;
+   default:
+   break;
+   }
+}
+
 /**
  * payload_expr_complete - fill in type information of a raw payload expr
  *
-- 
2.11.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nftables 3/7] exthdr: prepare exthdr_gen_dependency for tcp support

2017-02-06 Thread Manuel Messner
currently exthdr always needs ipv6 dependency (i.e. link layer), but
with upcomming TCP option matching we also need to include TCP at the
network layer.

This patch prepares this change by adding two parameters to
exthdr_gen_dependency.

Signed-off-by: Manuel Messner 
Signed-off-by: Florian Westphal 
---
 include/payload.h | 3 ++-
 src/evaluate.c| 9 +
 src/payload.c | 9 +
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/payload.h b/include/payload.h
index bda3188..5952b24 100644
--- a/include/payload.h
+++ b/include/payload.h
@@ -16,7 +16,8 @@ struct stmt;
 extern int payload_gen_dependency(struct eval_ctx *ctx, const struct expr 
*expr,
  struct stmt **res);
 extern int exthdr_gen_dependency(struct eval_ctx *ctx, const struct expr *expr,
- struct stmt **res);
+const struct proto_desc *dependency,
+enum proto_bases pb, struct stmt **res);
 
 /**
  * struct payload_dep_ctx - payload protocol dependency tracking
diff --git a/src/evaluate.c b/src/evaluate.c
index 94412f2..0e02548 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -448,19 +448,20 @@ static int __expr_evaluate_exthdr(struct eval_ctx *ctx, 
struct expr **exprp)
  */
 static int expr_evaluate_exthdr(struct eval_ctx *ctx, struct expr **exprp)
 {
-   const struct proto_desc *base;
+   const struct proto_desc *base, *dependency = _ip6;
+   enum proto_bases pb = PROTO_BASE_NETWORK_HDR;
struct expr *expr = *exprp;
struct stmt *nstmt;
 
-   base = ctx->pctx.protocol[PROTO_BASE_NETWORK_HDR].desc;
-   if (base == _ip6)
+   base = ctx->pctx.protocol[pb].desc;
+   if (base == dependency)
return __expr_evaluate_exthdr(ctx, exprp);
 
if (base)
return expr_error(ctx->msgs, expr,
  "cannot use exthdr with %s", base->name);
 
-   if (exthdr_gen_dependency(ctx, expr, ) < 0)
+   if (exthdr_gen_dependency(ctx, expr, dependency, pb - 1, ) < 0)
return -1;
 
list_add(>list, >rule->stmts);
diff --git a/src/payload.c b/src/payload.c
index 74f8254..efd1960 100644
--- a/src/payload.c
+++ b/src/payload.c
@@ -317,18 +317,19 @@ int payload_gen_dependency(struct eval_ctx *ctx, const 
struct expr *expr,
 }
 
 int exthdr_gen_dependency(struct eval_ctx *ctx, const struct expr *expr,
- struct stmt **res)
+ const struct proto_desc *dependency,
+ enum proto_bases pb, struct stmt **res)
 {
const struct proto_desc *desc;
 
-   desc = ctx->pctx.protocol[PROTO_BASE_LL_HDR].desc;
+   desc = ctx->pctx.protocol[pb].desc;
if (desc == NULL)
return expr_error(ctx->msgs, expr,
  "Cannot generate dependency: "
  "no %s protocol specified",
- proto_base_names[PROTO_BASE_LL_HDR]);
+ proto_base_names[pb]);
 
-   return payload_add_dependency(ctx, desc, _ip6, expr, res);
+   return payload_add_dependency(ctx, desc, dependency, expr, res);
 }
 
 /**
-- 
2.11.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nftables 1/7] include: linux: netfilter: nf_tables: copy file from nf-next

2017-02-06 Thread Manuel Messner
Signed-off-by: Manuel Messner 
Reviewed-by: Florian Westphal 
---
 include/linux/netfilter/nf_tables.h | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/netfilter/nf_tables.h 
b/include/linux/netfilter/nf_tables.h
index b00a05d..6c0c9ef 100644
--- a/include/linux/netfilter/nf_tables.h
+++ b/include/linux/netfilter/nf_tables.h
@@ -705,12 +705,13 @@ enum nft_payload_attributes {
 #define NFTA_PAYLOAD_MAX   (__NFTA_PAYLOAD_MAX - 1)
 
 /**
- * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression 
netlink attributes
+ * enum nft_exthdr_attributes - nf_tables extension header expression netlink 
attributes
  *
  * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
  * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
  * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
  * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U8)
  */
 enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@@ -718,11 +719,25 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_TYPE,
NFTA_EXTHDR_OFFSET,
NFTA_EXTHDR_LEN,
+   NFTA_EXTHDR_OP,
__NFTA_EXTHDR_MAX
 };
 #define NFTA_EXTHDR_MAX(__NFTA_EXTHDR_MAX - 1)
 
 /**
+ * enum nft_exthdr_op - nf_tables match options
+ *
+ * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers
+ * @NFT_EXTHDR_OP_TCP: match against tcp options
+ */
+enum nft_exthdr_op {
+   NFT_EXTHDR_OP_IPV6,
+   NFT_EXTHDR_OP_TCPOPT,
+   __NFT_EXTHDR_OP_MAX
+};
+#define NFT_EXTHDR_OP_MAX  (__NFT_EXTHDR_OP_MAX - 1)
+
+/**
  * enum nft_meta_keys - nf_tables meta expression keys
  *
  * @NFT_META_LEN: packet length (skb->len)
-- 
2.11.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nf-next 0/1] TCP option matching

2017-02-06 Thread Manuel Messner
This patch set is part of the TCP option matching implementation for nftables.

Manuel Messner (1):
  src: add TCP option matching requirements

 include/libnftnl/expr.h |  1 +
 include/linux/netfilter/nf_tables.h | 17 -
 src/expr/exthdr.c   | 49 ++---
 3 files changed, 62 insertions(+), 5 deletions(-)

--
2.11.1

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nftables 4/7] src: add TCP option matching

2017-02-06 Thread Manuel Messner
This patch enables nft to match against TCP options.

Currently these TCP options are supported:
* End of Option List (eol)
* No-Operation (noop)
* Maximum Segment Size (maxseg)
* Window Scale (window)
* SACK Permitted (sack_permitted)
* SACK (sack)
* Timestamps (timestamp)

Syntax: tcp options $option_name [$offset] $field_name
Example:

 # count all incoming packets with a specific maximum segment size `x`
 # nft add rule filter input tcp option maxseg size x counter

 # count all incoming packets with a SACK TCP option where the third
 # (counted from zero) left field is greater `x`.
 # nft add rule filter input tcp option sack 2 left \> x counter

If the offset (the `2` in the example above) is zero, it can optionally
be omitted.
For all non-SACK TCP options it is always zero, thus can be left out.

Option names and field names are parsed from templates, similar to meta
and ct options rather than via keywords to prevent adding more keywords
than necessary.

Signed-off-by: Manuel Messner 
Reviewed-by: Florian Westphal 
---
 doc/nft.xml   | 178 +-
 include/expression.h  |   1 +
 include/exthdr.h  |   2 +
 include/tcpopt.h  |  26 +
 src/Makefile.am   |   1 +
 src/evaluate.c|  35 +-
 src/exthdr.c  |  31 +-
 src/netlink_delinearize.c |   3 +-
 src/netlink_linearize.c   |   3 +-
 src/parser_bison.y|  46 +++-
 src/scanner.l |   1 +
 src/tcpopt.c  | 269 ++
 12 files changed, 580 insertions(+), 16 deletions(-)
 create mode 100644 include/tcpopt.h
 create mode 100644 src/tcpopt.c

diff --git a/doc/nft.xml b/doc/nft.xml
index 1455086..08ecdfa 100644
--- a/doc/nft.xml
+++ b/doc/nft.xml
@@ -2105,14 +2105,182 @@ inet filter meta nfproto ipv6 output rt nexthop fd00::1



-   
 
-   
-   bla

-   IPv6 extension header expressions
+   Extension header expressions
+   
+   Extension header expressions refer to data from 
variable-sized protocol headers, such as IPv6 extension headers and
+   TCPs options.
+   
+   
+   nftables currently supports matching (finding) 
a given ipv6 extension header or TCP option.
+   
+   
+   hbh
+   
+   nexthdr
+   hdrlength
+   
+   
+   
+   frag
+   
+   nexthdr
+   frag-off
+   more-fragments
+   id
+   
+   
+
+   
+   rt
+   
+   nexthdr
+   hdrlength
+   type
+   seg-left
+   
+   
+   
+   dst
+   
+   nexthdr
+   hdrlength
+   
+   
+   
+   mh
+   
+   nexthdr
+   hdrlength
+   checksum
+   type
+   
+   
+   
+   tcp option
+   
+   eol
+   noop
+   maxseg
+   window
+   sack_permitted
+   sack
+   timestamp
+   
+offset
+   tcp_option_field
+   
+   
+   
+   IPv6 extension headers
+   
+   
+   
+   
+  

[PATCH v2] netfilter: xt_hashlimit: Fix integer divide round to zero.

2017-02-06 Thread Alban Browaeys
Diving the divider by the multiplier before applying to the input.
When this would "divide by zero", divide the multiplier by the divider
first then multiply the input by this value.

Currently user2creds outputs zero when input value is bigger than the
number of slices and  lower than scale.
This as then user input is applied an integer divide operation to
a number greater than itself (scale).
That rounds up to zero, then we mulitply zero by the credits slice size.
  iptables -t filter -I INPUT --protocol tcp --match hashlimit
  --hashlimit 40/second --hashlimit-burst 20 --hashlimit-mode srcip
  --hashlimit-name syn-flood --jump RETURN
thus trigger the overflow detection code:
xt_hashlimit: overflow, try lower: 25000/20
(25000 as hashlimit avd and 20 the burst)
Here:
134217 slices of (HZ * CREDITS_PER_JIFFY) size.
50 is user input value
100 is XT_HASHLIMIT_SCALE_v2
gives: 0 as user2creds output
Setting burst to "1" typically solve the issue ...
but setting it to "40" does too !

This is on 32bit arch calling into revision 2 of hashlimit.

Signed-off-by: Alban Browaeys 
---
v2:
- fix missing conditional statement in revision 1 case
.

I removed the code duplication between revision 1 and 2.

v1: https://lkml.org/lkml/2017/2/4/173
---
 net/netfilter/xt_hashlimit.c | 25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 10063408141d..84ad5ab34558 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -463,23 +463,16 @@ static u32 xt_hashlimit_len_to_chunks(u32 len)
 /* Precision saver. */
 static u64 user2credits(u64 user, int revision)
 {
-   if (revision == 1) {
-   /* If multiplying would overflow... */
-   if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
-   /* Divide first. */
-   return div64_u64(user, XT_HASHLIMIT_SCALE)
-   * HZ * CREDITS_PER_JIFFY_v1;
-
-   return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
-XT_HASHLIMIT_SCALE);
-   } else {
-   if (user > 0xULL / (HZ*CREDITS_PER_JIFFY))
-   return div64_u64(user, XT_HASHLIMIT_SCALE_v2)
-   * HZ * CREDITS_PER_JIFFY;
+   u64 scale = (revision == 1) ?
+   XT_HASHLIMIT_SCALE : XT_HASHLIMIT_SCALE_v2;
+   u64 cpj = (revision == 1) ?
+   CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
 
-   return div64_u64(user * HZ * CREDITS_PER_JIFFY,
-XT_HASHLIMIT_SCALE_v2);
-   }
+   /* Avoid overflow: divide the constant operands first */
+   if (scale >= HZ * cpj)
+   return div64_u64(user, div64_u64(scale, HZ * cpj));
+
+   return user * div64_u64(HZ * cpj, scale);
 }
 
 static u32 user2credits_byte(u32 user)
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nftables 5/9] src: add host byte order integer type

2017-02-06 Thread Florian Westphal
Pablo Neira Ayuso  wrote:
> On Fri, Feb 03, 2017 at 01:35:52PM +0100, Florian Westphal wrote:
> > diff --git a/include/datatype.h b/include/datatype.h
> > index 9f127f2954e3..8c1c827253be 100644
> > --- a/include/datatype.h
> > +++ b/include/datatype.h
> > @@ -82,6 +82,7 @@ enum datatypes {
> > TYPE_DSCP,
> > TYPE_ECN,
> > TYPE_FIB_ADDR,
> > +   TYPE_U32,
> > __TYPE_MAX
> >  };
> >  #define TYPE_MAX   (__TYPE_MAX - 1)
> 
> Right, this is a real problem with host byteorder integer, the
> bytecode that we generate is not correct.
> 
> I have a patch to avoid this, it's still incomplete. I'm attaching it.
> 
> Note this is still incomplete, since this doesn't solve the netlink
> delinearize path. We can use the NFT_SET_USERDATA area and the tlv
> infrastructure that Carlos made in summer to store this
> metainformation that is only useful to
> 
> This shouldn't be a showstopper to get kernel patches in, we have a
> bit of time ahead to solve this userspace issue.

I don't understand why all this fuss is required.

The type always enocodes/decides the endianess, so I fail to see why we
need to store endianess also in the templates (f.e. meta_templates[],
it just seems 100% redundant ...)

Thats why it I added this host endian thing here, we could then replace

[NFT_META_CPU]  = META_TEMPLATE("cpu", _type, 4 * BITS_PER_BYTE, 
BYTEORDER_HOST_ENDIAN),
with
[NFT_META_CPU]  = META_TEMPLATE("cpu", _u32, 4 * BITS_PER_BYTE),

and don't need this 'endianess override' in the templates anymore.
We might even be able to get rid of the endianess storage in the eval
context this way.

What am I missing?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nft] datatype: Replace getaddrinfo() by internal lookup table

2017-02-06 Thread Pablo Neira Ayuso
On Mon, Feb 06, 2017 at 01:53:40PM -0200, Elise Lennion wrote:
> Nftables uses a internal service table to print service names. This
> very table should be used when parsing new rules, to avoid conflicts
> between nft service table and the local /etc/services, when loading
> an exported ruleset.
> 
> Complements the commit:
> (ccc5da4: datatype: Replace getnameinfo() by internal lookup table)
> 
> Fixes(Bug 1118 - nft: nft -f and nft list ruleset use different sets of
> service -> port mappings)

Applied, thanks Elise.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH] audit: normalize NETFILTER_PKT

2017-02-06 Thread Paul Moore
On Sat, Feb 4, 2017 at 8:25 AM, Steve Grubb  wrote:
> On Friday, February 3, 2017 6:44:16 PM EST Paul Moore wrote:
>> I'm still trying to understand what purpose this record actually
>> serves, and what requirements may exist.  In an earlier thread
>> somewhere Steve mentioned some broad requirements around data
>> import/export, and I really wonder if the NETFILTER_PKT record
>> provides anything useful here when it really isn't connecting the
>> traffic to the sender/receiver without a lot of additional logging and
>> post-processing smarts.  If you were interested in data import/export
>> I think auditing the socket syscalls would provide a much more useful
>> set of records in the audit log.
>
> The problem here is we cannot be selective enough through the syscall
> interface to get exactly what we want. For example, any auditing of connect
> and accept will also get af_unix traffic which is likely to be uid/gid lookups
> through sssd or glibc. Typically we want the IPv4/6 traffic. The netfilter 
> rules
> are better suited to describing which packets are of interest.

Okay, but how useful are these NETFILTER_PKT records, really?  The
only linkage you have back to the process on the local machine is via
the addr/proto/port tuple and that seems far from ideal.

>> Considering that one of the primary motivations for the audit
>> subsystem is to enable compliance with various security
>> specifications, let's get the ones we know about listed in this thread
>> and then figure out how best to meet those requirements.
>
> Common Criteria calls out for the ability to detect any attempt at information
> flow. Everything else leverages the CC requirements.

Yes, you've mentioned this previously.  This is good, but we need to
make these requirements a bit more concrete; we need something we can
use to arrive at a working implementation that satisfies these
requirements.

If this is purely about information flowing from A to B, would the
source and destination addr/proto/port for TCP and UDP suffice?  Do we
need anything else?

-- 
paul moore
www.paul-moore.com
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nftables 5/9] src: add host byte order integer type

2017-02-06 Thread Pablo Neira Ayuso
On Mon, Feb 06, 2017 at 06:31:10PM +0100, Pablo Neira Ayuso wrote:
> On Fri, Feb 03, 2017 at 01:35:52PM +0100, Florian Westphal wrote:
> > diff --git a/include/datatype.h b/include/datatype.h
> > index 9f127f2954e3..8c1c827253be 100644
> > --- a/include/datatype.h
> > +++ b/include/datatype.h
> > @@ -82,6 +82,7 @@ enum datatypes {
> > TYPE_DSCP,
> > TYPE_ECN,
> > TYPE_FIB_ADDR,
> > +   TYPE_U32,
> > __TYPE_MAX
> >  };
> >  #define TYPE_MAX   (__TYPE_MAX - 1)
> 
> Right, this is a real problem with host byteorder integer, the
> bytecode that we generate is not correct.
> 
> I have a patch to avoid this, it's still incomplete. I'm attaching it.
> 
> Note this is still incomplete, since this doesn't solve the netlink
> delinearize path. We can use the NFT_SET_USERDATA area and the tlv
> infrastructure that Carlos made in summer to store this
> metainformation that is only useful to
> 
> This shouldn't be a showstopper to get kernel patches in, we have a
> bit of time ahead to solve this userspace issue.

Forgot attachment, here it comes.
>From 52594fb4130560e2072524193296019d209e3bf8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso 
Date: Tue, 6 Sep 2016 19:49:05 +0200
Subject: [PATCH] evaluate: store byteorder for set keys

Selectors that rely on the integer type and expect host endian
byteorder don't work properly.

We need to keep the byteorder around based on the left hand size
expression that provides the context, so store the byteorder when
evaluating the map.

Before this patch.

 # nft --debug=netlink add rule x y meta mark set meta cpu map { 0 : 1, 1 : 2 }
 __map%d x b
 __map%d x 0
element   : 0001 0 [end]element 0100  : 0002 0 
[end]


This is expressed in network byteorder, because the invalid byteorder
defaults on this.

After this patch:

 # nft --debug=netlink add rule x y meta mark set meta cpu map { 0 : 1, 1 : 2 }
 __map%d x b
 __map%d x 0
element   : 0001 0 [end]element 0001  : 0002 0 
[end]


This is in host byteorder, as the key selector in the map mandates.

Signed-off-by: Pablo Neira Ayuso 
---
 include/rule.h |  3 +++
 src/evaluate.c | 23 +++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/rule.h b/include/rule.h
index 99e92ee..ae9e462 100644
--- a/include/rule.h
+++ b/include/rule.h
@@ -209,6 +209,8 @@ enum set_flags {
SET_F_EVAL  = 0x20,
 };
 
+#include 
+
 /**
  * struct set - nftables set
  *
@@ -237,6 +239,7 @@ struct set {
uint64_ttimeout;
const struct datatype   *keytype;
unsigned intkeylen;
+   enum byteorder  keybyteorder;
const struct datatype   *datatype;
unsigned intdatalen;
struct expr *init;
diff --git a/src/evaluate.c b/src/evaluate.c
index 45af329..b64dfc1 100644
--- a/src/evaluate.c
+++ b/src/evaluate.c
@@ -63,6 +63,7 @@ static struct expr *implicit_set_declaration(struct eval_ctx 
*ctx,
 const char *name,
 const struct datatype *keytype,
 unsigned int keylen,
+enum byteorder byteorder,
 struct expr *expr)
 {
struct cmd *cmd;
@@ -70,11 +71,12 @@ static struct expr *implicit_set_declaration(struct 
eval_ctx *ctx,
struct handle h;
 
set = set_alloc(>location);
-   set->flags  = SET_F_ANONYMOUS | expr->set_flags;
-   set->handle.set = xstrdup(name),
-   set->keytype= keytype;
-   set->keylen = keylen;
-   set->init   = expr;
+   set->flags= SET_F_ANONYMOUS | expr->set_flags;
+   set->handle.set   = xstrdup(name),
+   set->keytype  = keytype;
+   set->keylen   = keylen;
+   set->keybyteorder = byteorder;
+   set->init = expr;
 
if (ctx->table != NULL)
list_add_tail(>list, >table->sets);
@@ -1104,7 +1106,9 @@ static int expr_evaluate_map(struct eval_ctx *ctx, struct 
expr **expr)
case EXPR_SET:
mappings = implicit_set_declaration(ctx, "__map%d",
ctx->ectx.dtype,
-   ctx->ectx.len, mappings);
+   ctx->ectx.len,
+   ctx->ectx.byteorder,
+   mappings);
mappings->set->datatype = ectx.dtype;
mappings->set->datalen  = ectx.len;
 
@@ -1159,7 +1163,8 @@ static int expr_evaluate_mapping(struct eval_ctx *ctx, 
struct 

Re: Chain priorities for NAT

2017-02-06 Thread Pablo Neira Ayuso
On Thu, Feb 02, 2017 at 01:52:18PM +0100, Christoph Pleger wrote:
> Hello,
> 
> On 2017-01-11, I wrote:
> 
> > The Wiki on https://wiki.nftables.org mentions two priorities
> specifically available for NAT, -100 and 100. But of these two, the
> wiki's example for NAT only uses the value 100 for the postrouting
> chain. The prerouting chain has priority 0, and there is no difference
> between SNAT and DNAT.
> >
> > When I look at the ipv4-nat example which is shipped together with my
> nftables package, both chains use priority -150, though due to the Wiki,
> that value is used for mangling.
> >
> > And when I look at some online-exmaples, they use 0 for prerouting and
> postrouting.
> >
> > So, what are really the best values to use for priority in snat
> prerouting and postrouting and dnat prerouting and postrouting?
> 
> Does "No answer in three weeks" mean that nobody here knows how to use
> these priority values for NAT chains? Though probably netfilter developers
> are reading this list?

Sorry, I overlooked this email.

See nf_ip_hook_priorities:
http://lxr.free-electrons.com/source/include/uapi/linux/netfilter_ipv4.h

See nf_ip6_hook_priorities:
http://lxr.free-electrons.com/source/include/uapi/linux/netfilter_ipv6.h

Yes, I'm pointing to source code, I know I should not be doing this ;-)

Probably we can add the 'default' label, so:

add chain x y { type filter hook input priority default\; }

In this case, default translates to 0.

add chain x y { type nat hook prerouting priority default\; }

In this case this would be -100.

Then:

add chain x y { type nat hook postrouting priority default\; }

This results in priority 100.

We would still need explicit labels though, eg. raw and security at
least. These are special type of filter chains.

Comments welcome. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH nftables 5/9] src: add host byte order integer type

2017-02-06 Thread Pablo Neira Ayuso
On Fri, Feb 03, 2017 at 01:35:52PM +0100, Florian Westphal wrote:
> diff --git a/include/datatype.h b/include/datatype.h
> index 9f127f2954e3..8c1c827253be 100644
> --- a/include/datatype.h
> +++ b/include/datatype.h
> @@ -82,6 +82,7 @@ enum datatypes {
>   TYPE_DSCP,
>   TYPE_ECN,
>   TYPE_FIB_ADDR,
> + TYPE_U32,
>   __TYPE_MAX
>  };
>  #define TYPE_MAX (__TYPE_MAX - 1)

Right, this is a real problem with host byteorder integer, the
bytecode that we generate is not correct.

I have a patch to avoid this, it's still incomplete. I'm attaching it.

Note this is still incomplete, since this doesn't solve the netlink
delinearize path. We can use the NFT_SET_USERDATA area and the tlv
infrastructure that Carlos made in summer to store this
metainformation that is only useful to

This shouldn't be a showstopper to get kernel patches in, we have a
bit of time ahead to solve this userspace issue.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nft PATCH 0/3] Boolean comparison and exthdr existence match support

2017-02-06 Thread Pablo Neira Ayuso
On Mon, Feb 06, 2017 at 03:26:20PM +0100, Phil Sutter wrote:
[...]
> On Mon, Jan 23, 2017 at 01:57:47PM +0100, Pablo Neira Ayuso wrote:
> > On Tue, Jan 17, 2017 at 11:10:04PM +0100, Phil Sutter wrote:
> > > The following series adds two distinct features to nftables, though
> > > since the second one depends on presence of the first one this is
> > > submitted as a series.
> > > 
> > > Patch 1 adds support for a boolean variant of relational expression.
> > > It's OP is strictly implicit and determined by RHS being a boolean
> > > expression. It depends on a related kernel patch adding support for
> > > NFT_CMP_BOOL to nft_cmp.c.
> > 
> > If the problem is that we lack of context from the delinearize path,
> > then I would prefer if you scratch one bit from the fib flags to
> > indicate that we want a true (1)/false (0) return value. Just like we
> > plan to do with exthdr. This should require a small kernel patch for
> > nft_fib I think.
> > 
> > Thus, we can skip this ad hoc update for nft_cmp which seems to me
> > that it's only there to help us get the context that we lack from the
> > delinearize step.
> 
> This is not ad hoc updating nft_cmp but instead support for a new
> operation. Did you maybe reply having the first approach from my RFC in
> mind? Because I scratched that and went with the second one since it's
> more complete.

I think nft_cmp kernel doesn't need the specific boolean operation,
this is something that belongs to userspace. The existing kernel code
already allows us to match 0 and 1 which is sufficient for what we
need.

> > Then, from the delinearize path, if this fib/exthdr flag is set, we
> > attach the corresponding datatype to the expression based on this new
> > flag.
> 
> The point in NFT_CMP_BOOL is that it's LHS expression agnostic. Whatever
> provides a value there can be checked for being eq/neq zero using the
> boolean operation.
> 
> The context use in delinearize path is implicit (LHS defines RHS dtype)
> and for convenience only: It merely allows printing different "flavors"
> of boolean keywords depending on LHS and could easily be dropped.

I think we already have the context depending on the delinearize path
we follow, ie. netlink_delinearize_fib() may attach flavour A of
boolean, while netlink_delinearize_exthdr() attaches flavour B. BTW, I
would actually prefer to avoid flavouring at this stage, isn't it
found / not found enough for the usecase we have so far?

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH nft] datatype: Replace getaddrinfo() by internal lookup table

2017-02-06 Thread Elise Lennion
Nftables uses a internal service table to print service names. This
very table should be used when parsing new rules, to avoid conflicts
between nft service table and the local /etc/services, when loading
an exported ruleset.

Complements the commit:
(ccc5da4: datatype: Replace getnameinfo() by internal lookup table)

Fixes(Bug 1118 - nft: nft -f and nft list ruleset use different sets of
service -> port mappings)

Signed-off-by: Elise Lennion 
---
 src/datatype.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/datatype.c b/src/datatype.c
index d697a07..f1388dc 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -597,10 +597,9 @@ static void inet_service_type_print(const struct expr 
*expr)
 static struct error_record *inet_service_type_parse(const struct expr *sym,
struct expr **res)
 {
-   struct addrinfo *ai;
+   const struct symbolic_constant *s;
uint16_t port;
uintmax_t i;
-   int err;
char *end;
 
errno = 0;
@@ -611,13 +610,16 @@ static struct error_record *inet_service_type_parse(const 
struct expr *sym,
 
port = htons(i);
} else {
-   err = getaddrinfo(NULL, sym->identifier, NULL, );
-   if (err != 0)
-   return error(>location, "Could not resolve 
service: %s",
-gai_strerror(err));
+   for (s = inet_service_tbl.symbols; s->identifier != NULL; s++) {
+   if (!strcmp(sym->identifier, s->identifier))
+   break;
+   }
 
-   port = ((struct sockaddr_in *)ai->ai_addr)->sin_port;
-   freeaddrinfo(ai);
+   if (s->identifier == NULL)
+   return error(>location, "Could not resolve 
service: "
+"Servname not found in nft services list");
+
+   port = s->value;
}
 
*res = constant_expr_alloc(>location, _service_type,
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] netfilter: xt_hashlimit: Fix integer divide round to zero.

2017-02-06 Thread Alban Browaeys
Le lundi 06 février 2017 à 14:04 +0100, Pablo Neira Ayuso a écrit :
> On Sat, Feb 04, 2017 at 11:47:31PM +0100, Alban Browaeys wrote:
> > diff --git a/net/netfilter/xt_hashlimit.c
> > b/net/netfilter/xt_hashlimit.c
> > index 10063408141d..df75ad643eef 100644
> > --- a/net/netfilter/xt_hashlimit.c
> > +++ b/net/netfilter/xt_hashlimit.c
> > @@ -463,21 +463,19 @@ static u32 xt_hashlimit_len_to_chunks(u32 len)
> >  /* Precision saver. */
> >  static u64 user2credits(u64 user, int revision)
> >  {
> > > > +   /* Avoid overflow: divide the constant operands first */
> > > >     if (revision == 1) {
> > > > -   /* If multiplying would overflow... */
> > > > -   if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
> > > > -   /* Divide first. */
> > > > -   return div64_u64(user, XT_HASHLIMIT_SCALE)
> > > > -   * HZ * CREDITS_PER_JIFFY_v1;
> > > > +   return div64_u64(user, div64_u64(XT_HASHLIMIT_SCALE,
> > > > +   HZ * CREDITS_PER_JIFFY_v1));
> >  
> > > > -   return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
> > > > +   return user * div64_u64(HZ * CREDITS_PER_JIFFY_v1,
> >      XT_HASHLIMIT_SCALE);
> 
> I see two return statements here, the one coming later is
> shadowed, this can't be right.

I fixed revision 2 case then copy the code to revision 1
 and lost the condition in the process.
The code duplication is useless. I will rework it in v2.

Thank you for the review.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nft PATCH 0/3] Boolean comparison and exthdr existence match support

2017-02-06 Thread Phil Sutter
Hi Pablo,

On Mon, Jan 23, 2017 at 01:57:47PM +0100, Pablo Neira Ayuso wrote:
> On Tue, Jan 17, 2017 at 11:10:04PM +0100, Phil Sutter wrote:
> > The following series adds two distinct features to nftables, though
> > since the second one depends on presence of the first one this is
> > submitted as a series.
> > 
> > Patch 1 adds support for a boolean variant of relational expression.
> > It's OP is strictly implicit and determined by RHS being a boolean
> > expression. It depends on a related kernel patch adding support for
> > NFT_CMP_BOOL to nft_cmp.c.
> 
> If the problem is that we lack of context from the delinearize path,
> then I would prefer if you scratch one bit from the fib flags to
> indicate that we want a true (1)/false (0) return value. Just like we
> plan to do with exthdr. This should require a small kernel patch for
> nft_fib I think.
> 
> Thus, we can skip this ad hoc update for nft_cmp which seems to me
> that it's only there to help us get the context that we lack from the
> delinearize step.

This is not ad hoc updating nft_cmp but instead support for a new
operation. Did you maybe reply having the first approach from my RFC in
mind? Because I scratched that and went with the second one since it's
more complete.

> Then, from the delinearize path, if this fib/exthdr flag is set, we
> attach the corresponding datatype to the expression based on this new
> flag.

The point in NFT_CMP_BOOL is that it's LHS expression agnostic. Whatever
provides a value there can be checked for being eq/neq zero using the
boolean operation.

The context use in delinearize path is implicit (LHS defines RHS dtype)
and for convenience only: It merely allows printing different "flavors"
of boolean keywords depending on LHS and could easily be dropped.

Cheers, Phil
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [nf-next PATCH] netfilter: nf_tables: exthdr: Add support for existence check

2017-02-06 Thread Pablo Neira Ayuso
On Tue, Jan 17, 2017 at 10:51:26PM +0100, Phil Sutter wrote:
> If NFT_EXTHDR_F_PRESENT is set, exthdr will not copy any header field
> data into *dest, but instead set it to 1 if the header is found and 0
> otherwise.

Applied this one, thanks Phil.

I have fixed this here, via sparse (make C=2):

net/netfilter/nft_exthdr.c:90:24: warning: cast to restricted __be32

> @@ -80,6 +87,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
>   priv->offset = offset;
>   priv->len= len;
>   priv->dreg   = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
> + priv->flags  = ntohl(nla_get_u32(tb[NFTA_EXTHDR_FLAGS]));

This needed to be nla_get_be32(). No problem, I fixed this here.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] netfilter: xt_hashlimit: Fix integer divide round to zero.

2017-02-06 Thread Pablo Neira Ayuso
On Sat, Feb 04, 2017 at 11:47:31PM +0100, Alban Browaeys wrote:
> Diving the divider by the multiplier before applying to the input.
> When this would "divide by zero", divide the multiplier by the divider
> first then multiply the input by this value.
> 
> Currently user2creds outputs zero when input value is bigger than the
> number of slices and  lower than scale.
> This as then user input is applied an integer divide operation to
> a number greater than itself (scale).
> That rounds up to zero, then we mulitply zero by the credits slice size.
>   iptables -t filter -I INPUT --protocol tcp --match hashlimit
>   --hashlimit 40/second --hashlimit-burst 20 --hashlimit-mode srcip
>   --hashlimit-name syn-flood --jump RETURN
> thus trigger the overflow detection code:
> xt_hashlimit: overflow, try lower: 25000/20
> (25000 as hashlimit avd and 20 the burst)
> Here:
> 134217 slices of (HZ * CREDITS_PER_JIFFY) size.
> 50 is user input value
> 100 is XT_HASHLIMIT_SCALE_v2
> gives: 0 as user2creds output
> Setting burst to "1" typically solve the issue ...
> but setting it to "40" does too !
> 
> This is on 32bit arch calling into revision 2 of hashlimit.
> 
> Signed-off-by: Alban Browaeys 
> ---
>  net/netfilter/xt_hashlimit.c | 18 --
>  1 file changed, 8 insertions(+), 10 deletions(-)
> 
> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
> index 10063408141d..df75ad643eef 100644
> --- a/net/netfilter/xt_hashlimit.c
> +++ b/net/netfilter/xt_hashlimit.c
> @@ -463,21 +463,19 @@ static u32 xt_hashlimit_len_to_chunks(u32 len)
>  /* Precision saver. */
>  static u64 user2credits(u64 user, int revision)
>  {
> + /* Avoid overflow: divide the constant operands first */
>   if (revision == 1) {
> - /* If multiplying would overflow... */
> - if (user > 0x / (HZ*CREDITS_PER_JIFFY_v1))
> - /* Divide first. */
> - return div64_u64(user, XT_HASHLIMIT_SCALE)
> - * HZ * CREDITS_PER_JIFFY_v1;
> + return div64_u64(user, div64_u64(XT_HASHLIMIT_SCALE,
> + HZ * CREDITS_PER_JIFFY_v1));
>  
> - return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
> + return user * div64_u64(HZ * CREDITS_PER_JIFFY_v1,
>XT_HASHLIMIT_SCALE);

I see two return statements here, the one coming later is
shadowed, this can't be right.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


ANNOUNCE: Netdev 2.1 update Feb 06

2017-02-06 Thread Jamal Hadi Salim


A few announcements:
- We expect to open up registration and announce hotel and location
this week.

- We are pleased to announce the first netdev 2.1 sponsor!
Many thanks to Mellanox who has been a strong supporter of the netdev
community. Mellanox is first to cross the netdev2.1 sponsor line with a
silver.

The Call for Proposals is still open, submit early
to avoid the hazards of last minute traffic. Refer to:
http://netdevconf.org/2.1/submit-proposal.html

cheers,
jamal
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iptables V2 2/2] xshared: using the blocking file lock request when we wait indefinitely

2017-02-06 Thread Liping Zhang
From: Liping Zhang 

When using "-w" to avoid concurrent instances, we try to do flock() every
one second until it success. But one second maybe too long in some
situations, and it's hard to select a suitable interval time. So when
using "iptables -w" to wait indefinitely, it's better to block until
it become success.

Now do some performance tests. First, flush all the iptables rules in
filter table, and run "iptables -w -S" endlessly:
  # iptables -F
  # iptables -X
  # while : ; do
  iptables -w -S >&- &
  done

Second, after adding and deleting the iptables rules 100 times, measure
the time cost:
  # time for i in $(seq 100); do
  iptables -w -A INPUT
  iptables -w -D INPUT
  done

Before this patch:
  real  1m15.962s
  user  0m0.224s
  sys   0m1.475s

Apply this patch:
  real  0m1.830s
  user  0m0.168s
  sys   0m1.130s

Signed-off-by: Liping Zhang 
---
 V2: use flock() instead of fcntl(). It is more clean.
 (This patch is based on http://patchwork.ozlabs.org/patch/724242/)

 iptables/xshared.c | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/iptables/xshared.c b/iptables/xshared.c
index 055acf2..f0a5ddd 100644
--- a/iptables/xshared.c
+++ b/iptables/xshared.c
@@ -1,4 +1,5 @@
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -258,27 +259,29 @@ bool xtables_lock(int wait, struct timeval *wait_interval)
if (fd < 0)
return true;
 
+   if (wait == -1) {
+   if (flock(fd, LOCK_EX) == 0)
+   return true;
+
+   fprintf(stderr, "Can't lock %s: %s\n", XT_LOCK_NAME,
+   strerror(errno));
+   return false;
+   }
+
while (1) {
if (flock(fd, LOCK_EX | LOCK_NB) == 0)
return true;
-   else if (wait >= 0 && timercmp(_left, wait_interval, <))
+   else if (timercmp(_left, wait_interval, <))
return false;
 
if (++i % 10 == 0) {
-   if (wait != -1)
-   fprintf(stderr, "Another app is currently 
holding the xtables lock; "
-   "still %lds %ldus time ahead to have a 
chance to grab the lock...\n",
-   time_left.tv_sec, time_left.tv_usec);
-   else
-   fprintf(stderr, "Another app is currently 
holding the xtables lock; "
-   "waiting for it to exit...\n");
+   fprintf(stderr, "Another app is currently holding the 
xtables lock; "
+   "still %lds %ldus time ahead to have a chance 
to grab the lock...\n",
+   time_left.tv_sec, time_left.tv_usec);
}
 
wait_time = *wait_interval;
select(0, NULL, NULL, NULL, _time);
-   if (wait == -1)
-   continue;
-
timersub(_left, wait_interval, _left);
}
 }
-- 
2.5.5


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 2/2] netfilter: ctnetlink: Fix regression in CTA_HELP processing

2017-02-06 Thread Pablo Neira Ayuso
On Thu, Jan 26, 2017 at 02:49:44PM -0800, Kevin Cernekee wrote:
> Prior to Linux 4.4, it was usually harmless to send a CTA_HELP attribute
> containing the name of the current helper.  That is no longer the case:
> as of Linux 4.4, if ctnetlink_change_helper() returns an error from
> the ct->master check, processing of the request will fail, skipping the
> NFQA_EXP attribute (if present).
> 
> This patch changes the behavior to improve compatibility with user
> programs that expect the kernel interface to work the way it did prior
> to Linux 4.4.  If a user program specifies CTA_HELP but the argument
> matches the current conntrack helper name, ignore it instead of generating
> an error.

Also applied, thanks Kevin.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 1/2] netfilter: ctnetlink: Fix regression in CTA_STATUS processing

2017-02-06 Thread Pablo Neira Ayuso
On Thu, Jan 26, 2017 at 02:49:43PM -0800, Kevin Cernekee wrote:
> The libnetfilter_conntrack userland library always sets IPS_CONFIRMED
> when building a CTA_STATUS attribute.  If this toggles the bit from
> 0->1, the parser will return an error.  On Linux 4.4+ this will cause any
> NFQA_EXP attribute in the packet to be ignored.  This breaks conntrackd's
> userland helpers because they operate on unconfirmed connections.
> 
> Instead of returning -EBUSY if the user program asks to modify an
> unchangeable bit, simply ignore the change.
> 
> Also, fix the logic so that user programs are allowed to clear
> the bits that they are allowed to change.

Applied, thanks Kevin.

I have manually fixed here this compilation warning, btw:

net/netfilter/nf_conntrack_netlink.c:1449:1: warning:
‘ctnetlink_update_status’ defined but not used [-Wunused-function] 
ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
 ^
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 24/27] netfilter: guarantee 8 byte minalign for template addresses

2017-02-06 Thread David Laight
From: Pablo Neira Ayuso
> Sent: 03 February 2017 12:26
> The next change will merge skb->nfct pointer and skb->nfctinfo
> status bits into single skb->_nfct (unsigned long) area.
> 
> For this to work nf_conn addresses must always be aligned at least on
> an 8 byte boundary since we will need the lower 3bits to store nfctinfo.

Don't do it then.
You are using a bit somewhere else to allow you to use a 'hack'
that stores 3 status bits into the bottom of an address where there
are only 2 'available' bits.
That sounds like it just adds code everywhere.
I suspect you'll regret doing it later on as well.

David

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html