On Tue, Feb 23, 2021 at 4:34 PM Gaëtan Rivet <gr...@u256.net> wrote:
>
>
>
> On Tue, Feb 23, 2021, at 22:55, William Tu wrote:
> > Hi Gaetan,
> >
> > Thanks for the patch, looks very useful.
> > I haven't tested it yet.
> > Minor question/comments inline.
>
> Hi William, thanks for taking a look!
>
> [...]
> > >
> > > +/* Timeouts: all the possible timeout states passed to 
> > > update_expiration()
> > > + * are listed here. The name will be prefix by CT_TM_ and the value is in
> > > + * milliseconds */
> > > +#define CT_TIMEOUTS \
> > > +    CT_TIMEOUT(TCP_FIRST_PACKET) \
> > > +    CT_TIMEOUT(TCP_OPENING) \
> > > +    CT_TIMEOUT(TCP_ESTABLISHED) \
> > > +    CT_TIMEOUT(TCP_CLOSING) \
> > > +    CT_TIMEOUT(TCP_FIN_WAIT) \
> > > +    CT_TIMEOUT(TCP_CLOSED) \
> > > +    CT_TIMEOUT(OTHER_FIRST) \
> > > +    CT_TIMEOUT(OTHER_MULTIPLE) \
> > > +    CT_TIMEOUT(OTHER_BIDIR) \
> > > +    CT_TIMEOUT(ICMP_FIRST) \
> > > +    CT_TIMEOUT(ICMP_REPLY)
> > > +
> > > +enum ct_timeout {
> > > +#define CT_TIMEOUT(NAME) CT_TM_##NAME,
> > > +    CT_TIMEOUTS
> > > +#undef CT_TIMEOUT
> > > +    N_CT_TM
> > > +};
> > > +
> > any reason for moving this macro to here?
> >
>
> I embed the enum ct_timeout within the conn expiration node, so I must have 
> it defined first.
> I did not want to put too much code between 'ct_conn_type' below and the 
> 'conn' struct itself,
> which is why I put 'ct_timeout' just above.
>
> Otherwise, this could be avoided by linking the rculist pointer directly 
> instead of storing its index.
> But I thought it would be unwise as it would more tightly couple the list 
> type to the expiration node, and it means taking a full pointer vs. storing 
> an enum.
>

I see, make sense, thanks!

> > >  enum OVS_PACKED_ENUM ct_conn_type {
> > >      CT_CONN_TYPE_DEFAULT,
> > >      CT_CONN_TYPE_UN_NAT,
> > >  };
> > >
> > > +struct conn_expire {
> > > +    /* Set once when initializing the expiration node. */
> > > +    struct conntrack *ct;
> > > +    /* Timeout state of the connection.
> > > +     * It follows the connection state updates.
> > > +     */
> > > +    enum ct_timeout tm;
> > > +    /* Insert and remove the expiration node only once per RCU syncs.
> > > +     * If multiple threads update the connection, its expiration should
> > > +     * be removed only once and added only once to timeout lists.
> > > +     */
> > > +    atomic_flag insert_once;
> > > +    atomic_flag remove_once;
> > > +    struct rculist node;
> > > +};
> > > +
> > >  struct conn {
> > >      /* Immutable data. */
> > >      struct conn_key key;
> > >      struct conn_key rev_key;
> > >      struct conn_key parent_key; /* Only used for orig_tuple support. */
> > > -    struct ovs_list exp_node;
> > >      struct cmap_node cm_node;
> > >      struct nat_action_info_t *nat_info;
> > >      char *alg;
> > > @@ -104,6 +143,7 @@ struct conn {
> > >
> > >      /* Mutable data. */
> > >      struct ovs_mutex lock; /* Guards all mutable fields. */
> > > +    struct conn_expire exp;
> > >      ovs_u128 label;
> > >      long long expiration;
> > >      uint32_t mark;
> > > @@ -132,33 +172,10 @@ enum ct_update_res {
> > >      CT_UPDATE_VALID_NEW,
> > >  };
> > >
> > > -/* Timeouts: all the possible timeout states passed to 
> > > update_expiration()
> > > - * are listed here. The name will be prefix by CT_TM_ and the value is in
> > > - * milliseconds */
> > > -#define CT_TIMEOUTS \
> > > -    CT_TIMEOUT(TCP_FIRST_PACKET) \
> > > -    CT_TIMEOUT(TCP_OPENING) \
> > > -    CT_TIMEOUT(TCP_ESTABLISHED) \
> > > -    CT_TIMEOUT(TCP_CLOSING) \
> > > -    CT_TIMEOUT(TCP_FIN_WAIT) \
> > > -    CT_TIMEOUT(TCP_CLOSED) \
> > > -    CT_TIMEOUT(OTHER_FIRST) \
> > > -    CT_TIMEOUT(OTHER_MULTIPLE) \
> > > -    CT_TIMEOUT(OTHER_BIDIR) \
> > > -    CT_TIMEOUT(ICMP_FIRST) \
> > > -    CT_TIMEOUT(ICMP_REPLY)
> > > -
> > > -enum ct_timeout {
> > > -#define CT_TIMEOUT(NAME) CT_TM_##NAME,
> > > -    CT_TIMEOUTS
> > > -#undef CT_TIMEOUT
> > > -    N_CT_TM
> > > -};
> > > -
> > >  struct conntrack {
> > >      struct ovs_mutex ct_lock; /* Protects 2 following fields. */
> > >      struct cmap conns OVS_GUARDED;
> > > -    struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
> > > +    struct rculist exp_lists[N_CT_TM] OVS_GUARDED;
> > >      struct hmap zone_limits OVS_GUARDED;
> > >      struct hmap timeout_policies OVS_GUARDED;
> > >      uint32_t hash_basis; /* Salt for hashing a connection key. */
> > > @@ -204,4 +221,13 @@ struct ct_l4_proto {
> > >                                 struct ct_dpif_protoinfo *);
> > >  };
> > >
> > > +static inline void
> > > +conn_expire_remove(struct conn_expire *exp)
> > > +{
> > > +    if (!atomic_flag_test_and_set(&exp->remove_once)
> > > +        && rculist_next(&exp->node)) {
> > > +        rculist_remove(&exp->node);
> > > +    }
> > > +}
> > > +
> > >  #endif /* conntrack-private.h */
> > > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
> > > index a586d3a8d..30ba4bda8 100644
> > > --- a/lib/conntrack-tp.c
> > > +++ b/lib/conntrack-tp.c
> > > @@ -230,6 +230,50 @@ tm_to_ct_dpif_tp(enum ct_timeout tm)
> > >      return CT_DPIF_TP_ATTR_MAX;
> > >  }
> > >
> > > +static void
> > > +conn_expire_init(struct conn *conn, struct conntrack *ct)
> > > +{
> > > +    struct conn_expire *exp = &conn->exp;
> > > +
> > > +    if (exp->ct != NULL) {
> > > +        return;
> > > +    }
> > > +
> > > +    exp->ct = ct;
> > > +    atomic_flag_clear(&exp->insert_once);
> > > +    atomic_flag_clear(&exp->remove_once);
> > > +    /* The expiration is initially unscheduled, flag it as 'removed'. */
> > > +    atomic_flag_test_and_set(&exp->remove_once);
> > > +}
> > > +
> > > +static void
> > > +conn_expire_insert(struct conn *conn)
> > > +{
> > > +    struct conn_expire *exp = &conn->exp;
> > > +
> > > +    ovs_mutex_lock(&exp->ct->ct_lock);
> > > +    ovs_mutex_lock(&conn->lock);
> > > +
> > > +    rculist_push_back(&exp->ct->exp_lists[exp->tm], &exp->node);
> > > +    atomic_flag_clear(&exp->insert_once);
> > > +    atomic_flag_clear(&exp->remove_once);
> > > +
> > > +    ovs_mutex_unlock(&conn->lock);
> > > +    ovs_mutex_unlock(&exp->ct->ct_lock);
> > > +}
> > > +
> > > +static void
> > > +conn_schedule_expiration(struct conntrack *ct, struct conn *conn,
> > > +                         enum ct_timeout tm, long long now, uint32_t 
> > > tp_value)
> > > +{
> > > +    conn_expire_init(conn, ct);
> > > +    conn->expiration = now + tp_value * 1000;
> > > +    conn->exp.tm = tm;
> > > +    if (!atomic_flag_test_and_set(&conn->exp.insert_once)) {
> >
> > Does this mean we only insert the oldest one, not the latest one?
> > So pkt1 arrives at t1, pkt2 arrives later at t2, then we update using t1
> >
>
> The expiration node is flagged for insertion at pkt1, which means registering 
> the insertion callback using RCU.
> Then pkt2 arrives and will update 'conn->exp.tm'. If the RCU did not yet 
> execute its callbacks, next time it does the ct_timeout list will correspond 
> to the latest packet received. If it did execute already, then the 
> 'insert_once' flag was cleared and the insertion callback is re-scheduled.
>
> This avoids repeatedly scheduling insertions through the RCU. Connections 
> could potentially be updated several times between RCU sync, for potentially 
> many connections: it would mean making the cbset in RCU reallocate many times 
> and grow unnecessarily.

I see, looks good to me, thanks.
William
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to