On Wed, Jun 22, 2022 at 4:29 PM Mike Pattrick <m...@redhat.com> wrote:
>
> Add additional logging for debug and info level with a focus on code
> related to bond members coming up, go down, and changing.
>
> Several existing log messages were modified to handle sub 1kB log
> messages with more grace. Instead of reporting 0kB of traffic load
> shifting from one member to another, we now suppress these messages at
> the INFO level and display exact byte count at the debug level.
>
> Signed-off-by: Mike Pattrick <m...@redhat.com>
>
> --
>
> Since v4:
>  - Removed prefixes
>  - Removed unrequired conditional
>
> Since v3:
>  - Removed log if bond members were significantly deviant
>  - Normalized log prefixes, including LACP
>  - Added details to the member enable/disable log
>
> Since v2:
>  - Normalized log capitalization and periods
>  - Summarized info level hash transfers
>  - Added log if bond members were significantly deviant
>
> Signed-off-by: Mike Pattrick <m...@redhat.com>
> ---
>  lib/lacp.c     |  26 +++++++++---
>  ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++----------------
>  2 files changed, 96 insertions(+), 40 deletions(-)
>
> diff --git a/lib/lacp.c b/lib/lacp.c
> index 3252f17eb..bca347129 100644
> --- a/lib/lacp.c
> +++ b/lib/lacp.c
> @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void 
> *member_,
>              break;
>          case SUBTYPE_MARKER:
>              member->count_rx_pdus_marker++;
> -            VLOG_DBG("%s: received a LACP marker PDU.", lacp->name);
> +            VLOG_DBG("%s: received a LACP marker PDU", lacp->name);
>              goto out;
>          case SUBTYPE_UNUSED:
>          default:
>              member->count_rx_pdus_bad++;
> -            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.",
> +            VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU",
>                           lacp->name);
>              goto out;
>      }
> @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void 
> *member_,
>       * trigger re-checking of L1 state. */
>      if (!member->carrier_up) {
>          VLOG_INFO_RL(&rl, "%s: carrier state is DOWN,"
> -                     " dropping received LACP PDU.", member->name);
> +                     " dropping received LACP PDU", member->name);
>          seq_change(connectivity_seq_get());
>          goto out;
>      }
>
> +    if (member->status == LACP_DEFAULTED) {
> +        VLOG_INFO("%s: defaulted member %s reestablished connection with"
> +                  " LACP partner", lacp->name, member->name);
> +    }
> +
>      member->status = LACP_CURRENT;
>      tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
>      timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
> @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_,
>          if (!lacp->key_member) {
>              lacp->key_member = member;
>          }
> +
> +        VLOG_DBG("%s: member %s added", lacp->name, s->name);
>      }
>
>      if (!member->name || strcmp(s->name, member->name)) {
> +        if (member->name) {
> +            VLOG_DBG("%s: member %s renamed from %s", lacp->name,
> +                     s->name, member->name);
> +        }
> +
>          free(member->name);
>          member->name = xstrdup(s->name);
>      }
> @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, 
> const void *member_,
>          member->count_carrier_changed++;
>      }
>
> +    VLOG_DBG("%s: member %s changed state to %s", lacp->name,
> +             member->name, carrier_up ? "up" : "down");
> +
>  out:
>      lacp_unlock();
>  }
> @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) 
> OVS_REQUIRES(mutex)
>          /* XXX: In the future allow users to configure the expected system 
> ID.
>           * For now just special case loopback. */
>          if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) {
> -            VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is "
> -                         "connected to its own bond", member->name);
> +            VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. "
> +                         "Interface is connected to its own bond",
> +                         lacp->name, member->name);
>              continue;
>          }
>
> diff --git a/ofproto/bond.c b/ofproto/bond.c
> index 845f69e21..13702950d 100644
> --- a/ofproto/bond.c
> +++ b/ofproto/bond.c
> @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond)
>                  char *err_s = match_to_string(&pr_op->match, NULL,
>                                                RECIRC_RULE_PRIORITY);
>
> -                VLOG_ERR("failed to add post recirculation flow %s", err_s);
> +                VLOG_ERR("%s: failed to add post recirculation flow %s",
> +                         bond->name, err_s);
>                  free(err_s);
>              }
>              break;
> @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond)
>                  char *err_s = match_to_string(&pr_op->match, NULL,
>                                                RECIRC_RULE_PRIORITY);
>
> -                VLOG_ERR("failed to remove post recirculation flow %s", 
> err_s);
> +                VLOG_ERR("%s: failed to remove post recirculation flow "
> +                         "%s", bond->name, err_s);
>                  free(err_s);
>              }
>
> @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct 
> bond_settings *s)
>          if (s->use_lb_output_action &&
>              !ovs_lb_output_action_supported(bond->ofproto)) {
>              VLOG_WARN("%s: Datapath does not support 'lb_output' action, "
> -                      "disabled.", bond->name);
> +                      "disabled", bond->name);
>          } else {
>              bond->use_lb_output_action = s->use_lb_output_action;
>              if (!bond->use_lb_output_action) {
> @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void 
> *member_,
>      ovs_rwlock_rdlock(&rwlock);
>      member = bond_member_lookup(bond, member_);
>      if (!member) {
> +        VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed",
> +                    bond->name);

Is there a valid case where this would not be a bug?


>          goto out;
>      }
>
> @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void 
> *member_,
>           * main thread to run LACP state machine and enable the member. */
>          verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : 
> BV_DROP;
>          if (!member->enabled && member->may_enable) {
> -            VLOG_DBG_RL(&rl, "bond %s: member %s: "
> +            VLOG_DBG_RL(&rl, "%s: member %s: "
>                          "main thread has not yet enabled member",
>                          bond->name, bond->active_member->name);
>          }
> @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void 
> *member_,
>          /* Drop all packets which arrive on backup members.  This is similar 
> to
>           * how Linux bonding handles active-backup bonds. */
>          if (bond->active_member != member) {
> -            VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond 
> received "
> +            VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received "
>                          "packet on backup member destined for " ETH_ADDR_FMT,
>                          bond->name, member->name, ETH_ADDR_ARGS(eth_dst));
>              goto out;
> @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void 
> *member_,
>      OVS_NOT_REACHED();
>  out:
>      if (member && (verdict != BV_ACCEPT)) {
> -        VLOG_DBG_RL(&rl, "bond %s: member %s: "
> +        VLOG_DBG_RL(&rl, "%s: member %s: "
>                      "admissibility verdict is to drop pkt%s, "
>                      "active member: %s, may_enable: %s, enabled: %s, "
>                      "LACP status: %s",
> @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list 
> *bals)
>              if (ds.length) {
>                  ds_put_char(&ds, ',');

Nit.

The separator between members in this log message ends up being ", ".
So here, we should push an additonal space...

>              }
> -            ds_put_format(&ds, " %s %"PRIu64"kB",
> -                          member->name, member->tx_bytes / 1024);
> +            if (member->tx_bytes > 1024) {
> +                ds_put_format(&ds, " %s %"PRIu64" kB",

...and then there is no need for this extra space in front of the member name.

Otherwise, we get an extra space for the first member:
2022-07-29T14:14:52.015Z|00147|bond|DBG|dpdkbond0:  dpdk0 0 B
[...]
2022-07-29T14:15:02.479Z|00150|bond|DBG|dpdkbond0:  dpdk1 0 B, dpdk0 0 B


> +                              member->name, member->tx_bytes / 1024);
> +            } else {
> +                ds_put_format(&ds, " %s %"PRIu64" B",
> +                              member->name, member->tx_bytes);
> +            }
>
>              if (!member->enabled) {
>                  ds_put_cstr(&ds, " (disabled)");
> @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list 
> *bals)
>                      if (&e->list_node != ovs_list_front(&member->entries)) {
>                          ds_put_cstr(&ds, " + ");
>                      }
> -                    ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
> -                                  e - bond->hash, e->tx_bytes / 1024);
> +                    if (e->tx_bytes > 1024) {
> +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB",
> +                                      e - bond->hash, e->tx_bytes / 1024);
> +                    } else {
> +                        ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B",
> +                                      e - bond->hash, e->tx_bytes);
> +                    }
>                  }
>                  ds_put_cstr(&ds, ")");
>              }
>          }
> -        VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds));
> +        if (ds.length) {
> +            VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds));
> +        }
>          ds_destroy(&ds);
>      }
>  }
> @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct 
> bond_member *to)
>      struct bond *bond = from->bond;
>      uint64_t delta = hash->tx_bytes;
>
> -    VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") "
> -              "from %s to %s (now carrying %"PRIu64"kB and "
> -              "%"PRIu64"kB load, respectively)",
> -              bond->name, delta / 1024, hash - bond->hash,
> -              from->name, to->name,
> -              (from->tx_bytes - delta) / 1024,
> -              (to->tx_bytes + delta) / 1024);
> -
>      /* Shift load away from 'from' to 'to'. */
>      from->tx_bytes -= delta;
>      to->tx_bytes += delta;
> @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond)
>              insert_bal(&bals, member);
>          }
>      }
> +

One empty line is added, it seems unrelated.

>      log_bals(bond, &bals);
>
>      /* Shift load from the most-loaded members to the least-loaded members. 
> */
> @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond)
>          uint64_t overload;
>
>          overload = from->tx_bytes - to->tx_bytes;
> +

Idem.

>          if (overload < to->tx_bytes >> 5 || overload < 100000) {
>              /* The extra load on 'from' (and all less-loaded members), 
> compared
>               * to that of 'to' (the least-loaded member), is less than ~3%, 
> or
> @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond)
>              reinsert_bal(&bals, from);
>              reinsert_bal(&bals, to);
>          }
> +        VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64
> +                  " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)",
> +                  bond->name, cnt,
> +                  (overload + to->tx_bytes - from->tx_bytes) >> 1,
> +                  from->name, from->tx_bytes, to->name, to->tx_bytes);
> +
>          rebalanced = true;
>      }
>
> @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond 
> *bond)
>              if (be_tx_k) {
>                  ds_put_format(ds, "  hash %d: %"PRIu64" kB load\n",
>                            hash, be_tx_k);
> +            } else if (be->tx_bytes) {
> +                ds_put_format(ds, "  hash %d: %"PRIu64" B load\n",
> +                          hash, be->tx_bytes);
>              }
>
>              /* XXX How can we list the MACs assigned to hashes of SLB bonds? 
> */
> @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn 
> *conn,
>      if (bond->active_member != member) {
>          bond->bond_revalidate = true;
>          bond->active_member = member;
> -        VLOG_INFO("bond %s: active member is now %s",
> +        VLOG_INFO("%s: active member is now %s",
>                    bond->name, member->name);
>          bond->send_learning_packets = true;
>          unixctl_command_reply(conn, "done");
> @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool 
> enable)
>          }
>          ovs_mutex_unlock(&member->bond->mutex);
>
> -        VLOG_INFO("member %s: %s", member->name,
> +        VLOG_INFO("%s: member %s %s", bond->name, member->name,
>                    member->enabled ? "enabled" : "disabled");
>      }
>  }
> @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member)
>  {
>      struct bond *bond = member->bond;
>      bool up;
> +    bool carrier = netdev_get_carrier(member->netdev);
>
> -    up = netdev_get_carrier(member->netdev) && member->may_enable;
> +    up = carrier && member->may_enable;
>      if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) {
> +        char * msg;
>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> -        VLOG_INFO_RL(&rl, "member %s: link state %s",
> -                     member->name, up ? "up" : "down");
> +        if (!carrier) {
> +            msg = "carrier state";
> +        } else {
> +            msg = "link state";
> +        }
> +        VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name,
> +                     member->name, msg, up ? "up" : "down");
>          if (up == member->enabled) {
>              member->delay_expires = LLONG_MAX;
> -            VLOG_INFO_RL(&rl, "member %s: will not be %s",
> -                         member->name, up ? "disabled" : "enabled");
> +            VLOG_INFO_RL(&rl, "%s: member %s will not be %s",
> +                         bond->name, member->name,
> +                         up ? "disabled" : "enabled");
>          } else {
>              int delay = up ? bond->updelay : bond->downdelay;
>              member->delay_expires = time_msec() + delay;
>              if (delay) {
> -                VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s "
> -                             "for %d ms",
> -                             member->name,
> +                VLOG_INFO_RL(&rl, "%s: member %s will be %s"
> +                             " if it stays %s for %d ms",
> +                             bond->name, member->name,
>                               up ? "enabled" : "disabled",
>                               up ? "up" : "down",
>                               delay);
> @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const 
> struct flow *flow,
>  {
>      struct bond_entry *e;
>      int balance;
> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>
>      balance = bond->balance;
>      if (bond->lacp_status == LACP_CONFIGURED) {
>          /* LACP has been configured on this bond but negotiations were
> -         * unsuccussful. If lacp_fallback_ab is enabled use active-
> +         * unsuccessful. If lacp_fallback_ab is enabled use active-
>           * backup mode else drop all traffic. */
>          if (!bond->lacp_fallback_ab) {
> -            return NULL;
> +            goto error;
>          }
>          balance = BM_AB;
>      }
> @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const 
> struct flow *flow,
>      case BM_TCP:
>          if (bond->lacp_status != LACP_NEGOTIATED) {
>              /* Must have LACP negotiations for TCP balanced bonds. */
> -            return NULL;
> +            goto error;
>          }
>          if (wc) {
>              flow_mask_hash_fields(flow, wc, 
> NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP);
> @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const 
> struct flow *flow,
>      default:
>          OVS_NOT_REACHED();
>      }
> +error:
> +    if (VLOG_IS_DBG_ENABLED()) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +        flow_format(&ds, flow, NULL);
> +        VLOG_DBG_RL(&rl, "%s: cannot choose output member "
> +                    "for flow %s with LACP status %s",
> +                    bond->name,
> +                    ds_cstr(&ds),
> +                    lacp_status_description(bond->lacp_status));
> +        ds_destroy(&ds);
> +    }
> +    return NULL;
>  }
>
>  static struct bond_member *
> @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond)
>      bond->active_member = bond_choose_member(bond);
>      if (bond->active_member) {
>          if (bond->active_member->enabled) {
> -            VLOG_INFO_RL(&rl, "bond %s: active member is now %s",
> +            VLOG_INFO_RL(&rl, "%s: active member is now %s",
>                           bond->name, bond->active_member->name);
>          } else {
> -            VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping "
> +            VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping "
>                           "remaining %lld ms updelay (since no member was "
>                           "enabled)", bond->name, bond->active_member->name,
>                           bond->active_member->delay_expires - time_msec());
> @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond)
>          }
>      } else if (old_active_member) {
>          bond_active_member_changed(bond);
> -        VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name);
> +        VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name);
>      }
>  }
>

Reviewed-by: David Marchand <david.march...@redhat.com>


-- 
David Marchand

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to