On 3/4/22 17:19, Mike Pattrick wrote:
Add additional logging for debug and info level with a focus on code
related to bond members coming up, go down, and changing.

Several existing log messages were modified to handle sub 1kB log
messages with more grace. Instead of reporting 0kB of traffic load
shifting from one member to another, we now suppress these messages at
the INFO level and display exact byte count at the debug level.

Since v2:
  - Normalized log capitalization and periods
  - Summarized info level hash transfers
  - Added log if bond members were significantly deviant

Signed-off-by: Mike Pattrick <m...@redhat.com>
---
  lib/lacp.c     | 20 ++++++++++--
  ofproto/bond.c | 86 ++++++++++++++++++++++++++++++++++++++++----------
  2 files changed, 86 insertions(+), 20 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index 89d711225..5bdb56b6b 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void 
*member_,
              break;
          case SUBTYPE_MARKER:
              member->count_rx_pdus_marker++;
-            VLOG_DBG("%s: received a LACP marker PDU.", lacp->name);
+            VLOG_DBG("%s: received a LACP marker PDU", lacp->name);
              goto out;
          case SUBTYPE_UNUSED:
          default:
              member->count_rx_pdus_bad++;
-            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.",
+            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU",
                           lacp->name);
              goto out;
      }
@@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void 
*member_,
       * trigger re-checking of L1 state. */
      if (!member->carrier_up) {
          VLOG_INFO_RL(&rl, "%s: carrier state is DOWN,"
-                     " dropping received LACP PDU.", member->name);
+                     " dropping received LACP PDU", member->name);
          seq_change(connectivity_seq_get());
          goto out;
      }
+ if (member->status == LACP_DEFAULTED) {
+        VLOG_INFO("%s: defaulted member %s reestablished connection with "
+                  "LACP partner", lacp->name, member->name);
+    }
+
      member->status = LACP_CURRENT;
      tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
      timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
@@ -465,9 +470,15 @@ lacp_member_register(struct lacp *lacp, void *member_,
          if (!lacp->key_member) {
              lacp->key_member = member;
          }
+
+        VLOG_DBG("LACP member %s added", s->name);
      }
if (!member->name || strcmp(s->name, member->name)) {
+        if (member->name) {
+            VLOG_DBG("LACP member %s renamed from %s", s->name, member->name);
+        }
+
          free(member->name);
          member->name = xstrdup(s->name);
      }
@@ -531,6 +542,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, const 
void *member_,
          member->count_carrier_changed++;
      }
+ VLOG_DBG("LACP member %s changed state to %s",
+             member->name, carrier_up ? "up" : "down");
+
  out:
      lacp_unlock();
  }
diff --git a/ofproto/bond.c b/ofproto/bond.c
index cdfdf0b9d..14697a868 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -496,7 +496,7 @@ bond_reconfigure(struct bond *bond, const struct 
bond_settings *s)
          if (s->use_lb_output_action &&
              !ovs_lb_output_action_supported(bond->ofproto)) {
              VLOG_WARN("%s: Datapath does not support 'lb_output' action, "
-                      "disabled.", bond->name);
+                      "disabled", bond->name);
          } else {
              bond->use_lb_output_action = s->use_lb_output_action;
              if (!bond->use_lb_output_action) {
@@ -855,6 +855,8 @@ bond_check_admissibility(struct bond *bond, const void 
*member_,
      ovs_rwlock_rdlock(&rwlock);
      member = bond_member_lookup(bond, member_);
      if (!member) {
+        VLOG_DBG_RL(&rl, "bond %s: lookup for member on bond failed",
+                    bond->name);
          goto out;
      }
@@ -1127,8 +1129,13 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
              if (ds.length) {
                  ds_put_char(&ds, ',');
              }
-            ds_put_format(&ds, " %s %"PRIu64"kB",
-                          member->name, member->tx_bytes / 1024);
+            if (member->tx_bytes > 1024) {
+                ds_put_format(&ds, " %s %"PRIu64" kB",
+                              member->name, member->tx_bytes / 1024);
+            } else {
+                ds_put_format(&ds, " %s %"PRIu64" B",
+                              member->name, member->tx_bytes);
+            }
if (!member->enabled) {
                  ds_put_cstr(&ds, " (disabled)");
@@ -1141,13 +1148,20 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
                      if (&e->list_node != ovs_list_front(&member->entries)) {
                          ds_put_cstr(&ds, " + ");
                      }
-                    ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
-                                  e - bond->hash, e->tx_bytes / 1024);
+                    if (e->tx_bytes > 1024) {
+                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB",
+                                      e - bond->hash, e->tx_bytes / 1024);
+                    } else {
+                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B",
+                                      e - bond->hash, e->tx_bytes);
+                    }
                  }
                  ds_put_cstr(&ds, ")");
              }
          }
-        VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
+        if (ds.length) {
+            VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
+        }
          ds_destroy(&ds);
      }
  }
@@ -1161,14 +1175,6 @@ bond_shift_load(struct bond_entry *hash, struct 
bond_member *to)
      struct bond *bond = from->bond;
      uint64_t delta = hash->tx_bytes;
- VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
-              "from %s to %s (now carrying %"PRIu64"kB and "
-              "%"PRIu64"kB load, respectively)",
-              bond->name, delta / 1024, hash - bond->hash,
-              from->name, to->name,
-              (from->tx_bytes - delta) / 1024,
-              (to->tx_bytes + delta) / 1024);
-
      /* Shift load away from 'from' to 'to'. */
      from->tx_bytes -= delta;
      to->tx_bytes += delta;
@@ -1300,6 +1306,8 @@ bond_rebalance(struct bond *bond)
      struct ovs_list bals;
      bool rebalanced = false;
      bool use_recirc;
+    uint64_t delta_start;
+    uint64_t delta_end;
ovs_rwlock_wrlock(&rwlock);
      if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) {
@@ -1345,6 +1353,16 @@ bond_rebalance(struct bond *bond)
              insert_bal(&bals, member);
          }
      }
+
+    if (ovs_list_is_empty(&bals)) {
+        goto done;
+    }
+
+    delta_start = (bond_member_from_bal_node(
+                   ovs_list_front(&bals))->tx_bytes -
+                   bond_member_from_bal_node(
+                   ovs_list_back(&bals))->tx_bytes) / 1024;
+
      log_bals(bond, &bals);
/* Shift load from the most-loaded members to the least-loaded members. */
@@ -1388,9 +1406,27 @@ bond_rebalance(struct bond *bond)
              reinsert_bal(&bals, from);
              reinsert_bal(&bals, to);
          }
+        VLOG_INFO("shifted %"PRIuSIZE" hashes totaling %"PRIu64" kB "
+                  "from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)",
+                  cnt, (overload + to->tx_bytes - from->tx_bytes) >> 1,
+                  from->name, from->tx_bytes, to->name, to->tx_bytes);
+

As convention, should we start all logs with "bond %s: {message}"?

          rebalanced = true;
      }
+ if (rebalanced) {
+        delta_end = (bond_member_from_bal_node(
+                     ovs_list_front(&bals))->tx_bytes -
+                     bond_member_from_bal_node(
+                     ovs_list_back(&bals))->tx_bytes) / 1024;
+

At this point the members that do not have hashes that can be migrated to other members have been removed from "bals". In fact, most likely "ovs_list_is_short(&bals) == 1". This means delta_end will always be == 0 IIUC.


+        if (delta_start > (delta_end * 2)) {
+            /* Bond was significantly imbalanced. */
+            VLOG_INFO("bond %s rebalanced, deviation reduced from %"PRIu64" kB"
+                      "to %"PRIu64" kB", bond->name, delta_start, delta_end);
+        }
+    }
+

Same as above comment regarding convention: s/bond %s/bond %s:/?

      /* Implement exponentially weighted moving average.  A weight of 1/2 
causes
       * historical data to decay to <1% in 7 rebalancing runs.  1,000,000 bytes
       * take 20 rebalancing runs to decay to 0 and get deleted entirely. */
@@ -1565,6 +1601,9 @@ bond_print_details(struct ds *ds, const struct bond *bond)
              if (be_tx_k) {
                  ds_put_format(ds, "  hash %d: %"PRIu64" kB load\n",
                            hash, be_tx_k);
+            } else if (be->tx_bytes) {
+                ds_put_format(ds, "  hash %d: %"PRIu64" B load\n",
+                          hash, be->tx_bytes);
              }
/* XXX How can we list the MACs assigned to hashes of SLB bonds? */
@@ -1952,14 +1991,15 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
  {
      struct bond_entry *e;
      int balance;
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
balance = bond->balance;
      if (bond->lacp_status == LACP_CONFIGURED) {
          /* LACP has been configured on this bond but negotiations were
-         * unsuccussful. If lacp_fallback_ab is enabled use active-
+         * unsuccessful. If lacp_fallback_ab is enabled use active-
           * backup mode else drop all traffic. */
          if (!bond->lacp_fallback_ab) {
-            return NULL;
+            goto error;
          }
          balance = BM_AB;
      }
@@ -1971,7 +2011,7 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
      case BM_TCP:
          if (bond->lacp_status != LACP_NEGOTIATED) {
              /* Must have LACP negotiations for TCP balanced bonds. */
-            return NULL;
+            goto error;
          }
          if (wc) {
              flow_mask_hash_fields(flow, wc, 
NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
@@ -1990,6 +2030,18 @@ choose_output_member(const struct bond *bond, const 
struct flow *flow,
      default:
          OVS_NOT_REACHED();
      }
+error:
+    if (VLOG_IS_DBG_ENABLED()) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        flow_format(&ds, flow, NULL);
+        VLOG_DBG_RL(&rl, "cannot choose output member on bond %s for flow %s "
+                    "with LACP status %s",
+                    bond->name,
+                    ds_cstr(&ds),
+                    lacp_status_description(bond->lacp_status));
+        ds_destroy(&ds);
+    }
+    return NULL;
  }
static struct bond_member *

--
Adrián Moreno

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to