On Tue, Feb 23, 2021 at 4:34 PM Gaëtan Rivet <gr...@u256.net> wrote: > > > > On Tue, Feb 23, 2021, at 22:55, William Tu wrote: > > Hi Gaetan, > > > > Thanks for the patch, looks very useful. > > I haven't tested it yet. > > Minor question/comments inline. > > Hi William, thanks for taking a look! > > [...] > > > > > > +/* Timeouts: all the possible timeout states passed to > > > update_expiration() > > > + * are listed here. The name will be prefix by CT_TM_ and the value is in > > > + * milliseconds */ > > > +#define CT_TIMEOUTS \ > > > + CT_TIMEOUT(TCP_FIRST_PACKET) \ > > > + CT_TIMEOUT(TCP_OPENING) \ > > > + CT_TIMEOUT(TCP_ESTABLISHED) \ > > > + CT_TIMEOUT(TCP_CLOSING) \ > > > + CT_TIMEOUT(TCP_FIN_WAIT) \ > > > + CT_TIMEOUT(TCP_CLOSED) \ > > > + CT_TIMEOUT(OTHER_FIRST) \ > > > + CT_TIMEOUT(OTHER_MULTIPLE) \ > > > + CT_TIMEOUT(OTHER_BIDIR) \ > > > + CT_TIMEOUT(ICMP_FIRST) \ > > > + CT_TIMEOUT(ICMP_REPLY) > > > + > > > +enum ct_timeout { > > > +#define CT_TIMEOUT(NAME) CT_TM_##NAME, > > > + CT_TIMEOUTS > > > +#undef CT_TIMEOUT > > > + N_CT_TM > > > +}; > > > + > > any reason for moving this macro to here? > > > > I embed the enum ct_timeout within the conn expiration node, so I must have > it defined first. > I did not want to put too much code between 'ct_conn_type' below and the > 'conn' struct itself, > which is why I put 'ct_timeout' just above. > > Otherwise, this could be avoided by linking the rculist pointer directly > instead of storing its index. > But I thought it would be unwise as it would more tightly couple the list > type to the expiration node, and it means taking a full pointer vs. storing > an enum. >
I see, make sense, thanks! > > > enum OVS_PACKED_ENUM ct_conn_type { > > > CT_CONN_TYPE_DEFAULT, > > > CT_CONN_TYPE_UN_NAT, > > > }; > > > > > > +struct conn_expire { > > > + /* Set once when initializing the expiration node. */ > > > + struct conntrack *ct; > > > + /* Timeout state of the connection. > > > + * It follows the connection state updates. > > > + */ > > > + enum ct_timeout tm; > > > + /* Insert and remove the expiration node only once per RCU syncs. > > > + * If multiple threads update the connection, its expiration should > > > + * be removed only once and added only once to timeout lists. > > > + */ > > > + atomic_flag insert_once; > > > + atomic_flag remove_once; > > > + struct rculist node; > > > +}; > > > + > > > struct conn { > > > /* Immutable data. */ > > > struct conn_key key; > > > struct conn_key rev_key; > > > struct conn_key parent_key; /* Only used for orig_tuple support. */ > > > - struct ovs_list exp_node; > > > struct cmap_node cm_node; > > > struct nat_action_info_t *nat_info; > > > char *alg; > > > @@ -104,6 +143,7 @@ struct conn { > > > > > > /* Mutable data. */ > > > struct ovs_mutex lock; /* Guards all mutable fields. */ > > > + struct conn_expire exp; > > > ovs_u128 label; > > > long long expiration; > > > uint32_t mark; > > > @@ -132,33 +172,10 @@ enum ct_update_res { > > > CT_UPDATE_VALID_NEW, > > > }; > > > > > > -/* Timeouts: all the possible timeout states passed to > > > update_expiration() > > > - * are listed here. The name will be prefix by CT_TM_ and the value is in > > > - * milliseconds */ > > > -#define CT_TIMEOUTS \ > > > - CT_TIMEOUT(TCP_FIRST_PACKET) \ > > > - CT_TIMEOUT(TCP_OPENING) \ > > > - CT_TIMEOUT(TCP_ESTABLISHED) \ > > > - CT_TIMEOUT(TCP_CLOSING) \ > > > - CT_TIMEOUT(TCP_FIN_WAIT) \ > > > - CT_TIMEOUT(TCP_CLOSED) \ > > > - CT_TIMEOUT(OTHER_FIRST) \ > > > - CT_TIMEOUT(OTHER_MULTIPLE) \ > > > - CT_TIMEOUT(OTHER_BIDIR) \ > > > - CT_TIMEOUT(ICMP_FIRST) \ > > > - CT_TIMEOUT(ICMP_REPLY) > > > - > > > -enum ct_timeout { > > > -#define CT_TIMEOUT(NAME) CT_TM_##NAME, > > > - CT_TIMEOUTS > > > -#undef CT_TIMEOUT > > > - N_CT_TM > > > -}; > > > - > > > struct conntrack { > > > struct ovs_mutex ct_lock; /* Protects 2 following fields. */ > > > struct cmap conns OVS_GUARDED; > > > - struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; > > > + struct rculist exp_lists[N_CT_TM] OVS_GUARDED; > > > struct hmap zone_limits OVS_GUARDED; > > > struct hmap timeout_policies OVS_GUARDED; > > > uint32_t hash_basis; /* Salt for hashing a connection key. */ > > > @@ -204,4 +221,13 @@ struct ct_l4_proto { > > > struct ct_dpif_protoinfo *); > > > }; > > > > > > +static inline void > > > +conn_expire_remove(struct conn_expire *exp) > > > +{ > > > + if (!atomic_flag_test_and_set(&exp->remove_once) > > > + && rculist_next(&exp->node)) { > > > + rculist_remove(&exp->node); > > > + } > > > +} > > > + > > > #endif /* conntrack-private.h */ > > > diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c > > > index a586d3a8d..30ba4bda8 100644 > > > --- a/lib/conntrack-tp.c > > > +++ b/lib/conntrack-tp.c > > > @@ -230,6 +230,50 @@ tm_to_ct_dpif_tp(enum ct_timeout tm) > > > return CT_DPIF_TP_ATTR_MAX; > > > } > > > > > > +static void > > > +conn_expire_init(struct conn *conn, struct conntrack *ct) > > > +{ > > > + struct conn_expire *exp = &conn->exp; > > > + > > > + if (exp->ct != NULL) { > > > + return; > > > + } > > > + > > > + exp->ct = ct; > > > + atomic_flag_clear(&exp->insert_once); > > > + atomic_flag_clear(&exp->remove_once); > > > + /* The expiration is initially unscheduled, flag it as 'removed'. */ > > > + atomic_flag_test_and_set(&exp->remove_once); > > > +} > > > + > > > +static void > > > +conn_expire_insert(struct conn *conn) > > > +{ > > > + struct conn_expire *exp = &conn->exp; > > > + > > > + ovs_mutex_lock(&exp->ct->ct_lock); > > > + ovs_mutex_lock(&conn->lock); > > > + > > > + rculist_push_back(&exp->ct->exp_lists[exp->tm], &exp->node); > > > + atomic_flag_clear(&exp->insert_once); > > > + atomic_flag_clear(&exp->remove_once); > > > + > > > + ovs_mutex_unlock(&conn->lock); > > > + ovs_mutex_unlock(&exp->ct->ct_lock); > > > +} > > > + > > > +static void > > > +conn_schedule_expiration(struct conntrack *ct, struct conn *conn, > > > + enum ct_timeout tm, long long now, uint32_t > > > tp_value) > > > +{ > > > + conn_expire_init(conn, ct); > > > + conn->expiration = now + tp_value * 1000; > > > + conn->exp.tm = tm; > > > + if (!atomic_flag_test_and_set(&conn->exp.insert_once)) { > > > > Does this mean we only insert the oldest one, not the latest one? > > So pkt1 arrives at t1, pkt2 arrives later at t2, then we update using t1 > > > > The expiration node is flagged for insertion at pkt1, which means registering > the insertion callback using RCU. > Then pkt2 arrives and will update 'conn->exp.tm'. If the RCU did not yet > execute its callbacks, next time it does the ct_timeout list will correspond > to the latest packet received. If it did execute already, then the > 'insert_once' flag was cleared and the insertion callback is re-scheduled. > > This avoids repeatedly scheduling insertions through the RCU. Connections > could potentially be updated several times between RCU sync, for potentially > many connections: it would mean making the cbset in RCU reallocate many times > and grow unnecessarily. I see, looks good to me, thanks. William _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev