On Mon, Aug 19, 2019 at 7:41 PM Darrell Ball <dlu...@gmail.com> wrote:

>
>
> On Mon, Aug 19, 2019 at 12:42 PM Darrell Ball <dlu...@gmail.com> wrote:
>
>>
>>
>> On Mon, Aug 19, 2019 at 10:52 AM Yi-Hung Wei <yihung....@gmail.com>
>> wrote:
>>
>>> On Fri, Aug 16, 2019 at 5:07 PM Darrell Ball <dlu...@gmail.com> wrote:
>>> >
>>> > Thanks for the patch
>>> >
>>> > Pls let me know if this incremental works for you.
>>> > Main change is logging fix for timeout policy deletion.
>>> >
>>> > Darrell
>>> >
>>> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>> > index 1d4ee60..00d957b 100644
>>> > --- a/lib/dpif-netlink.c
>>> > +++ b/lib/dpif-netlink.c
>>> > @@ -2822,11 +2822,10 @@ dpif_netlink_ct_dump_done(struct dpif *dpif
>>> OVS_UNUSED,
>>> >                            struct ct_dpif_dump_state *dump_)
>>> >  {
>>> >      struct dpif_netlink_ct_dump_state *dump;
>>> > -    int err;
>>> >
>>> >      INIT_CONTAINER(dump, dump_, up);
>>> >
>>> > -    err = nl_ct_dump_done(dump->nl_ct_dump);
>>> > +    int err = nl_ct_dump_done(dump->nl_ct_dump);
>>> >      free(dump);
>>> >      return err;
>>> >  }
>>> > @@ -3335,7 +3334,8 @@ dpif_netlink_ct_del_timeout_policy(struct dpif
>>> *dpif OVS_UNUSED,
>>> >              err = 0;
>>> >          }
>>> >          if (err) {
>>> > -            VLOG_WARN_RL(&error_rl, "failed to delete timeout policy
>>> %s (%s)",
>>> > +            static struct vlog_rate_limit rl =
>>> VLOG_RATE_LIMIT_INIT(1, 1);
>>>
>>> Thanks for the diff.  It looks good in general.
>>>
>>> I agree on the main concern of the proposed diff which is the original
>>> rate limit in dpif-netlink (VLOG_RATE_LIMIT_INIT(9999, 5)) may log too
>>> much duplicated information.  However, since we may delete more than
>>> one one timeout policy in a minute, so lowering the rate limit to
>>> VLOG_RATE_LIMIT_INIT(1, 1) may miss some useful information.   I would
>>> use somewhere in the between (VLOG_RATE_LIMIT_INIT(5, 5)) in the next
>>> version.
>>>
>>
>> TBH, I am not sure we care lots about this information. I was even
>> debating changing it debug level.
>> We have 4 billion datapath timeout profile IDs, so it is unlikely we will
>> run out.
>> Eventually, they will get cleaned up by the retry thingy.
>>
>> Also, I am not sure what action we will take by seeing these logs anyhow.
>>
>> Spamming the log is more of a concern.
>>
>
> After more testing, I noticed there are a couple other aspects we might
> have overlooked.
>
> 1/ We don't have a log for a successful deletion attempt.
>
> 2/ When we try to delete 1 of the 6 associated Netfilter timeout policies
> and it fails, we don't try to delete
>  the remaining ones and bail out of the deletion loop early.
>
> Keeping the INFO level change (from WARN), addressing '1' and '2' above
> and also folding in
> your idea to keep the overall log generation rate a little higher, I ended
> up with:
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 1d4ee60..3926cfd 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -2822,11 +2822,10 @@ dpif_netlink_ct_dump_done(struct dpif *dpif
> OVS_UNUSED,
>                            struct ct_dpif_dump_state *dump_)
>  {
>      struct dpif_netlink_ct_dump_state *dump;
> -    int err;
>
>      INIT_CONTAINER(dump, dump_, up);
>
> -    err = nl_ct_dump_done(dump->nl_ct_dump);
> +    int err = nl_ct_dump_done(dump->nl_ct_dump);
>      free(dump);
>      return err;
>  }
> @@ -3334,14 +3333,12 @@ dpif_netlink_ct_del_timeout_policy(struct dpif
> *dpif OVS_UNUSED,
>          if (err == ENOENT) {
>              err = 0;
>          }
> -        if (err) {
> -            VLOG_WARN_RL(&error_rl, "failed to delete timeout policy %s
> (%s)",
> -                         ds_cstr(&nl_tp_name), ovs_strerror(err));
> -            goto out;
> -        }
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
> +        VLOG_INFO_RL(&rl, err ? "failed to delete timeout policy %s (%s)"
> +                              : "deleted timeout policy %s (%s)",
> +                     ds_cstr(&nl_tp_name), ovs_strerror(err));
>      }
>
> -out:
>      ds_destroy(&nl_tp_name);
>      return err;
>  }
> @@ -3392,10 +3389,9 @@ dpif_netlink_ct_timeout_policy_dump_start(struct
> dpif *dpif OVS_UNUSED,
>                                            void **statep)
>  {
>      struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
> -    int err;
>
>      *statep = dump_state = xzalloc(sizeof *dump_state);
> -    err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
> +    int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
>      if (err) {
>          free(dump_state);
>          return err;
>

> Running the system test (from Patch 9) and adding a deletion request for
> the in-use timeout policy yields
>
> > 2019-08-20T01:28:41.004Z|00203|dpif_netlink|INFO|deleted timeout policy
> ovs_tp_0_tcp4 (Success)
> > 2019-08-20T01:28:41.004Z|00204|dpif_netlink|INFO|failed to delete
> timeout policy ovs_tp_0_udp4 (Device or resource busy)
> > 2019-08-20T01:28:41.004Z|00205|dpif_netlink|INFO|failed to delete
> timeout policy ovs_tp_0_icmp4 (Device or resource busy)
> > 2019-08-20T01:28:41.004Z|00206|dpif_netlink|INFO|deleted timeout policy
> ovs_tp_0_tcp6 (Success)
> > 2019-08-20T01:28:41.004Z|00207|dpif_netlink|INFO|deleted timeout policy
> ovs_tp_0_udp6 (Success)
> > 2019-08-20T01:28:41.004Z|00208|dpif_netlink|INFO|deleted timeout policy
> ovs_tp_0_icmpv6 (Success)
>

After fixing a bug in my proposed incremental and adding tracking of an
already removed sub timeout policy:
Pls double check.

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 1d4ee60..cba4432 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2822,11 +2822,10 @@ dpif_netlink_ct_dump_done(struct dpif *dpif
OVS_UNUSED,
                           struct ct_dpif_dump_state *dump_)
 {
     struct dpif_netlink_ct_dump_state *dump;
-    int err;

     INIT_CONTAINER(dump, dump_, up);

-    err = nl_ct_dump_done(dump->nl_ct_dump);
+    int err = nl_ct_dump_done(dump->nl_ct_dump);
     free(dump);
     return err;
 }
@@ -3319,7 +3318,8 @@ out:
 }

 /* Returns 0 if all the sub timeout policies are deleted or
- * not exist in the kernel. */
+ * not exist in the kernel; returns 1 if any sub timeout policy deletion
+ * failed. */
 static int
 dpif_netlink_ct_del_timeout_policy(struct dpif *dpif OVS_UNUSED,
                                    uint32_t tp_id)
@@ -3330,18 +3330,19 @@ dpif_netlink_ct_del_timeout_policy(struct dpif
*dpif OVS_UNUSED,
     for (int i = 0; i < ARRAY_SIZE(tp_protos); ++i) {
         dpif_netlink_format_tp_name(tp_id, tp_protos[i].l3num,
                                     tp_protos[i].l4num, &nl_tp_name);
-        err = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name));
-        if (err == ENOENT) {
-            err = 0;
-        }
-        if (err) {
-            VLOG_WARN_RL(&error_rl, "failed to delete timeout policy %s
(%s)",
-                         ds_cstr(&nl_tp_name), ovs_strerror(err));
-            goto out;
+        int err2 = nl_ct_del_timeout_policy(ds_cstr(&nl_tp_name));
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(6, 6);
+        VLOG_INFO_RL(&rl, err2 == ENOENT
+                     ? "timeout policy already removed %s (%s)"
+                     : !err2 ? "deleted timeout policy %s (%s)"
+                             : "failed to delete timeout policy %s (%s)",
+                     ds_cstr(&nl_tp_name), ovs_strerror(err2));
+        if (err2 == ENOENT) {
+            err2 = 0;
         }
+        err = err || err2;
     }

-out:
     ds_destroy(&nl_tp_name);
     return err;
 }
@@ -3392,10 +3393,9 @@ dpif_netlink_ct_timeout_policy_dump_start(struct
dpif *dpif OVS_UNUSED,
                                           void **statep)
 {
     struct dpif_netlink_ct_timeout_policy_dump_state *dump_state;
-    int err;

     *statep = dump_state = xzalloc(sizeof *dump_state);
-    err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
+    int err = nl_ct_timeout_policy_dump_start(&dump_state->nl_dump_state);
     if (err) {
         free(dump_state);
         return err;
(END)

Running the system test (from Patch 9) and adding a deletion request for
the in-use timeout policy yields

> 2019-08-20T07:39:41.016Z|00211|dpif_netlink|INFO|deleted timeout policy
ovs_tp_0_tcp4 (Success)
> 2019-08-20T07:39:41.016Z|00212|dpif_netlink|INFO|failed to delete timeout
policy ovs_tp_0_udp4 (Device or resource busy)
> 2019-08-20T07:39:41.016Z|00213|dpif_netlink|INFO|failed to delete timeout
policy ovs_tp_0_icmp4 (Device or resource busy)
> 2019-08-20T07:39:41.016Z|00214|dpif_netlink|INFO|deleted timeout policy
ovs_tp_0_tcp6 (Success)
> 2019-08-20T07:39:41.016Z|00215|dpif_netlink|INFO|deleted timeout policy
ovs_tp_0_udp6 (Success)
> 2019-08-20T07:39:41.016Z|00216|dpif_netlink|INFO|deleted timeout policy
ovs_tp_0_icmpv6 (Success)


>
>
>
>
>>
>>
>>>
>>> Thanks,
>>>
>>> -Yi-Hung
>>>
>>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to