RE: [patch iproute2 v2] tc: add -bs option for batch mode

Chris Mi Fri, 22 Dec 2017 01:28:28 -0800

> -----Original Message-----
> From: David Ahern [mailto:dsah...@gmail.com]
> Sent: Friday, December 22, 2017 6:04 AM
> To: Chris Mi <chr...@mellanox.com>; netdev@vger.kernel.org
> Cc: gerlitz...@gmail.com
> Subject: Re: [patch iproute2 v2] tc: add -bs option for batch mode
> 
> On 12/20/17 2:26 AM, Chris Mi wrote:
> > Currently in tc batch mode, only one command is read from the batch
> > file and sent to kernel to process. With this patch, we can accumulate
> > several commands before sending to kernel. The batch size is specified
> > using option -bs or -batchsize.
> >
> > To accumulate the commands in tc, we allocate an array of struct iovec.
> > If batchsize is bigger than 1 and we haven't accumulated enough
> > commands, rtnl_talk() will return without actually sending the message.
> > One exception is that there is no more command in the batch file.
> >
> > But please note that kernel still processes the requests one by one.
> > To process the requests in parallel in kernel is another effort.
> > The time we're saving in this patch is the user mode and kernel mode
> > context switch. So this patch works on top of the current kernel.
> >
> > Using the following script in kernel, we can generate 1,000,000 rules.
> >         tools/testing/selftests/tc-testing/tdc_batch.py
> >
> > Without this patch, 'tc -b $file' exection time is:
> >
> > real    0m14.916s
> > user    0m6.808s
> > sys     0m8.046s
> >
> > With this patch, 'tc -b $file -bs 10' exection time is:
> >
> > real    0m12.286s
> > user    0m5.903s
> > sys     0m6.312s
> >
> > The insertion rate is improved more than 10%.
> >
> > Signed-off-by: Chris Mi <chr...@mellanox.com>
> > ---
> >  include/libnetlink.h |  6 ++++
> >  include/utils.h      |  4 +++
> >  lib/libnetlink.c     | 25 ++++++++++-----
> >  lib/utils.c          | 20 ++++++++++++
> >  man/man8/tc.8        |  5 +++
> >  tc/m_action.c        | 62 +++++++++++++++++++++++++++---------
> >  tc/tc.c              | 24 ++++++++++++--
> >  tc/tc_common.h       |  3 ++
> >  tc/tc_filter.c       | 88 ++++++++++++++++++++++++++++++++++++---------
> -------
> >  9 files changed, 186 insertions(+), 51 deletions(-)
> >
> > diff --git a/include/libnetlink.h b/include/libnetlink.h index
> > a4d83b9e..775f830b 100644
> > --- a/include/libnetlink.h
> > +++ b/include/libnetlink.h
> > @@ -13,6 +13,8 @@
> >  #include <linux/netconf.h>
> >  #include <arpa/inet.h>
> >
> > +#define MSG_IOV_MAX 256
> > +
> >  struct rtnl_handle {
> >     int                     fd;
> >     struct sockaddr_nl      local;
> > @@ -93,6 +95,10 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
> >                     void *arg, __u16 nc_flags);
> >  #define rtnl_dump_filter(rth, filter, arg) \
> >     rtnl_dump_filter_nc(rth, filter, arg, 0)
> > +
> > +extern int msg_iov_index;
> > +extern int batch_size;
> > +
> >  int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> >           struct nlmsghdr **answer)
> >     __attribute__((warn_unused_result));
> > diff --git a/include/utils.h b/include/utils.h index
> > d3895d56..113a8c31 100644
> > --- a/include/utils.h
> > +++ b/include/utils.h
> > @@ -235,6 +235,10 @@ void print_nlmsg_timestamp(FILE *fp, const struct
> > nlmsghdr *n);
> >
> >  extern int cmdlineno;
> >  ssize_t getcmdline(char **line, size_t *len, FILE *in);
> > +
> > +extern int cmdlinetotal;
> > +void setcmdlinetotal(const char *name);
> > +
> >  int makeargs(char *line, char *argv[], int maxargs);  int
> > inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6);
> >
> > diff --git a/lib/libnetlink.c b/lib/libnetlink.c index
> > 00e6ce0c..7ff32d64 100644
> > --- a/lib/libnetlink.c
> > +++ b/lib/libnetlink.c
> > @@ -24,6 +24,7 @@
> >  #include <sys/uio.h>
> >
> >  #include "libnetlink.h"
> > +#include "utils.h"
> >
> >  #ifndef SOL_NETLINK
> >  #define SOL_NETLINK 270
> > @@ -581,6 +582,10 @@ static void rtnl_talk_error(struct nlmsghdr *h,
> struct nlmsgerr *err,
> >             strerror(-err->error));
> >  }
> >
> > +static struct iovec msg_iov[MSG_IOV_MAX]; int msg_iov_index; int
> > +batch_size = 1;
> > +
> >  static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
> >                    struct nlmsghdr **answer,
> >                    bool show_rtnl_err, nl_ext_ack_fn_t errfn) @@ -589,23
> > +594,29 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr
> *n,
> >     unsigned int seq;
> >     struct nlmsghdr *h;
> >     struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
> > -   struct iovec iov = {
> > -           .iov_base = n,
> > -           .iov_len = n->nlmsg_len
> > -   };
> > +   struct iovec *iov = &msg_iov[msg_iov_index];
> > +   char *buf;
> > +
> > +   iov->iov_base = n;
> > +   iov->iov_len = n->nlmsg_len;
> > +
> >     struct msghdr msg = {
> >             .msg_name = &nladdr,
> >             .msg_namelen = sizeof(nladdr),
> > -           .msg_iov = &iov,
> > -           .msg_iovlen = 1,
> > +           .msg_iov = msg_iov,
> > +           .msg_iovlen = ++msg_iov_index,
> >     };
> > -   char *buf;
> >
> >     n->nlmsg_seq = seq = ++rtnl->seq;
> >
> >     if (answer == NULL)
> >             n->nlmsg_flags |= NLM_F_ACK;
> >
> > +   msg_iov_index %= batch_size;
> > +   if (msg_iov_index != 0 && batch_size > 1 && cmdlineno !=
> cmdlinetotal &&
> > +       (n->nlmsg_type == RTM_NEWTFILTER || n->nlmsg_type ==
> RTM_NEWACTION))
> > +           return 3;
> > +
> >     status = sendmsg(rtnl->fd, &msg, 0);
> >     if (status < 0) {
> >             perror("Cannot talk to rtnetlink");
> 
> I have a fair idea of why you did it this way, but relying on global 
> variables and
> magic return codes is not a great solution.
> 
> Why not refactor rtnl_talk -- move the sendmsg piece to a helper that takes
> an iov or a msg as input arg. Then have the tc code piece together the iov and
> call rtnl_talk. If batch_size == 1 it calls rtnl_talk; > 1 it puts together 
> the iov
> and hands it off.
Thanks for your suggestion. I think it is better to add a new API, like 
rtnl_talk_msg.
It will take a struct msghdr * as input arg instead of a struct nlmsghdr *.
The caller should prepare for the msghdr, like tc_filter_modify.
After this changes, libnetlink needn't to know anything about the batching.


rtnl_talk will send a single iov like before. Existing users has no impact at 
all.
rtnl_talk_msg will send multiple iovs.

RE: [patch iproute2 v2] tc: add -bs option for batch mode

Reply via email to