Thanks Mike,

On 3/1/22 20:14, Mike Pattrick wrote:
Add additional logging for debug and info level with a focus on code
related to bond members coming up, go down, and changing.

Several existing log messages were modified to handle sub 1kB log
messages with more grace. Instead of reporting 0kB of traffic load
shifting from one member to another, we now suppress these messages at
the INFO level and display exact byte count at the debug level.

Signed-off-by: Mike Pattrick <m...@redhat.com>
---
  lib/lacp.c     | 15 +++++++++++
  ofproto/bond.c | 72 ++++++++++++++++++++++++++++++++++++++++----------
  2 files changed, 73 insertions(+), 14 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index 89d711225..fa01cbbfe 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -387,6 +387,12 @@ lacp_process_packet(struct lacp *lacp, const void *member_,
          goto out;
      }
+ if (member->status == LACP_DEFAULTED) {
+        VLOG_INFO("%s: defaulted member %s reestablished connection with LACP 
partner.",
+                 lacp->name,
+                 member->name);

Nit: I think ovs coding conventions suggest you need an extra space in the above two lines.


+    }
+
      member->status = LACP_CURRENT;
      tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
      timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
@@ -465,9 +471,15 @@ lacp_member_register(struct lacp *lacp, void *member_,
          if (!lacp->key_member) {
              lacp->key_member = member;
          }
+
+        VLOG_DBG("LACP member %s added.", s->name);
      }
if (!member->name || strcmp(s->name, member->name)) {
+        if (member->name) {
+            VLOG_DBG("LACP member %s renamed from %s.", s->name, member->name);
+        }
+
          free(member->name);
          member->name = xstrdup(s->name);
      }
@@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const 
void *member_,
          member->count_carrier_changed++;
      }
+ VLOG_DBG("LACP member %s changed state to %s.",
+             member->name, carrier_up ? "up" : "down");
+
  out:
      lacp_unlock();
  }
diff --git a/ofproto/bond.c b/ofproto/bond.c
index cdfdf0b9d..55e335bfa 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -855,6 +855,8 @@ bond_check_admissibility(struct bond *bond, const void 
*member_,
      ovs_rwlock_rdlock(&rwlock);
      member = bond_member_lookup(bond, member_);
      if (!member) {
+        VLOG_DBG_RL(&rl, "bond %s: lookup for member on bond failed",
+                    bond->name);
          goto out;
      }
@@ -1127,8 +1129,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
              if (ds.length) {
                  ds_put_char(&ds, ',');
              }
-            ds_put_format(&ds, " %s %"PRIu64"kB",
-                          member->name, member->tx_bytes / 1024);
+            if (member->tx_bytes > 1024) {
+                ds_put_format(&ds, " %s %"PRIu64"kB",
+                              member->name, member->tx_bytes / 1024);
+            } else {
+                ds_put_format(&ds, " %s %"PRIu64"B",
+                              member->name, member->tx_bytes);
+            }
if (!member->enabled) {
                  ds_put_cstr(&ds, " (disabled)");
@@ -1141,13 +1148,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
                      if (&e->list_node != ovs_list_front(&member->entries)) {
                          ds_put_cstr(&ds, " + ");
                      }
-                    ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
-                                  e - bond->hash, e->tx_bytes / 1024);
+                    if (e->tx_bytes > 1024) {
+                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
+                                      e - bond->hash, e->tx_bytes / 1024);
+                    } else {
+                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"B",
+                                      e - bond->hash, e->tx_bytes);
+                    }
                  }
                  ds_put_cstr(&ds, ")");
              }
          }
-        VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
+        if (ds.length) {
+            VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
+        }
          ds_destroy(&ds);
      }
  }
@@ -1161,13 +1175,23 @@ bond_shift_load(struct bond_entry *hash, struct 
bond_member *to)
      struct bond *bond = from->bond;
      uint64_t delta = hash->tx_bytes;
- VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
-              "from %s to %s (now carrying %"PRIu64"kB and "
-              "%"PRIu64"kB load, respectively)",
-              bond->name, delta / 1024, hash - bond->hash,
-              from->name, to->name,
-              (from->tx_bytes - delta) / 1024,
-              (to->tx_bytes + delta) / 1024);
+    if (delta > 1024) {
+        VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
+                  "from %s to %s (now carrying %"PRIu64"kB and "
+                  "%"PRIu64"kB load, respectively)",
+                  bond->name, delta / 1024, hash - bond->hash,
+                  from->name, to->name,
+                  (from->tx_bytes - delta) / 1024,
+                  (to->tx_bytes + delta) / 1024);
+    } else {
+        VLOG_DBG("bond %s: shift %"PRIu64"B of load (with hash %"PRIdPTR") "
+                 "from %s to %s (now carrying %"PRIu64"kB and "
+                 "%"PRIu64"kB load, respectively)",
+                 bond->name, delta, hash - bond->hash,
+                 from->name, to->name,
+                 (from->tx_bytes - delta) / 1024,
+                 (to->tx_bytes + delta) / 1024);
+    }

Both versions should have the same log level. Original was INFO but I'm wondering if it's too verbose for an INFO. IIUC, this could be printed 256 times at a single rebalancing round.

Should we consider summarized version for INFO level?

Maybe one that sums up a migration, something like: "shifted N Kbps from member X (which now has Tx Kbps) to member Y (which now has Ty Kbps)". That should only be printed (N_Members -1) times at most on each rebalancing round. Or one that sums up the entire rebalancing round and prints the new tx_bytes of each member, only printed once per rebalancing round.


      /* Shift load away from 'from' to 'to'. */
      from->tx_bytes -= delta;
@@ -1345,6 +1369,12 @@ bond_rebalance(struct bond *bond)
              insert_bal(&bals, member);
          }
      }
+    if (ovs_list_is_empty(&bals)) {
+        VLOG_INFO("Bond %s cannot be balanced because no members are enabled.",
+                  bond->name);
+        goto done;
+    }
+

If both members get disabled (or before they get added to the bond), this could be printed in INFO level every second. Not sure if that's desirable considering we would already log:

        VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name);

in "bond_choose_active_member()". What do you think?


      log_bals(bond, &bals);
/* Shift load from the most-loaded members to the least-loaded members. */
@@ -1565,6 +1595,9 @@ bond_print_details(struct ds *ds, const struct bond *bond)
              if (be_tx_k) {
                  ds_put_format(ds, "  hash %d: %"PRIu64" kB load\n",
                            hash, be_tx_k);
+            } else if (be->tx_bytes) {
+                ds_put_format(ds, "  hash %d: %"PRIu64" B load\n",
+                          hash, be->tx_bytes);
              }
/* XXX How can we list the MACs assigned to hashes of SLB bonds? */
@@ -1959,7 +1992,7 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
           * unsuccussful. If lacp_fallback_ab is enabled use active-
           * backup mode else drop all traffic. */
          if (!bond->lacp_fallback_ab) {
-            return NULL;
+            goto error;
          }
          balance = BM_AB;
      }
@@ -1971,7 +2004,7 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
      case BM_TCP:
          if (bond->lacp_status != LACP_NEGOTIATED) {
              /* Must have LACP negotiations for TCP balanced bonds. */
-            return NULL;
+            goto error;
          }
          if (wc) {
              flow_mask_hash_fields(flow, wc, 
NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
@@ -1990,6 +2023,17 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
      default:
          OVS_NOT_REACHED();
      }
+error:
+    if (VLOG_IS_DBG_ENABLED()) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        flow_format(&ds, flow, NULL);
+        VLOG_DBG("Cannot choose output member on bond %s for flow %s with LACP 
status %s",
+                 bond->name,
+                 ds_cstr(&ds),
+                 lacp_status_description(bond->lacp_status));
+        ds_destroy(&ds);
+    }
+    return NULL;

Use rate limiting here?

  }
static struct bond_member *

--
Adrián Moreno

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to