On Wed, Jun 22, 2022 at 4:29 PM Mike Pattrick <m...@redhat.com> wrote: > > Add additional logging for debug and info level with a focus on code > related to bond members coming up, go down, and changing. > > Several existing log messages were modified to handle sub 1kB log > messages with more grace. Instead of reporting 0kB of traffic load > shifting from one member to another, we now suppress these messages at > the INFO level and display exact byte count at the debug level. > > Signed-off-by: Mike Pattrick <m...@redhat.com> > > -- > > Since v4: > - Removed prefixes > - Removed unrequired conditional > > Since v3: > - Removed log if bond members were significantly deviant > - Normalized log prefixes, including LACP > - Added details to the member enable/disable log > > Since v2: > - Normalized log capitalization and periods > - Summarized info level hash transfers > - Added log if bond members were significantly deviant > > Signed-off-by: Mike Pattrick <m...@redhat.com> > --- > lib/lacp.c | 26 +++++++++--- > ofproto/bond.c | 110 +++++++++++++++++++++++++++++++++---------------- > 2 files changed, 96 insertions(+), 40 deletions(-) > > diff --git a/lib/lacp.c b/lib/lacp.c > index 3252f17eb..bca347129 100644 > --- a/lib/lacp.c > +++ b/lib/lacp.c > @@ -367,12 +367,12 @@ lacp_process_packet(struct lacp *lacp, const void > *member_, > break; > case SUBTYPE_MARKER: > member->count_rx_pdus_marker++; > - VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); > + VLOG_DBG("%s: received a LACP marker PDU", lacp->name); > goto out; > case SUBTYPE_UNUSED: > default: > member->count_rx_pdus_bad++; > - VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", > + VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU", > lacp->name); > goto out; > } > @@ -382,11 +382,16 @@ lacp_process_packet(struct lacp *lacp, const void > *member_, > * trigger re-checking of L1 state. */ > if (!member->carrier_up) { > VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," > - " dropping received LACP PDU.", member->name); > + " dropping received LACP PDU", member->name); > seq_change(connectivity_seq_get()); > goto out; > } > > + if (member->status == LACP_DEFAULTED) { > + VLOG_INFO("%s: defaulted member %s reestablished connection with" > + " LACP partner", lacp->name, member->name); > + } > + > member->status = LACP_CURRENT; > tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; > timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); > @@ -465,9 +470,16 @@ lacp_member_register(struct lacp *lacp, void *member_, > if (!lacp->key_member) { > lacp->key_member = member; > } > + > + VLOG_DBG("%s: member %s added", lacp->name, s->name); > } > > if (!member->name || strcmp(s->name, member->name)) { > + if (member->name) { > + VLOG_DBG("%s: member %s renamed from %s", lacp->name, > + s->name, member->name); > + } > + > free(member->name); > member->name = xstrdup(s->name); > } > @@ -531,6 +543,9 @@ lacp_member_carrier_changed(const struct lacp *lacp, > const void *member_, > member->count_carrier_changed++; > } > > + VLOG_DBG("%s: member %s changed state to %s", lacp->name, > + member->name, carrier_up ? "up" : "down"); > + > out: > lacp_unlock(); > } > @@ -710,8 +725,9 @@ lacp_update_attached(struct lacp *lacp) > OVS_REQUIRES(mutex) > /* XXX: In the future allow users to configure the expected system > ID. > * For now just special case loopback. */ > if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { > - VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " > - "connected to its own bond", member->name); > + VLOG_WARN_RL(&rl, "%s: member %s: Loopback detected. " > + "Interface is connected to its own bond", > + lacp->name, member->name); > continue; > } > > diff --git a/ofproto/bond.c b/ofproto/bond.c > index 845f69e21..13702950d 100644 > --- a/ofproto/bond.c > +++ b/ofproto/bond.c > @@ -386,7 +386,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to add post recirculation flow %s", err_s); > + VLOG_ERR("%s: failed to add post recirculation flow %s", > + bond->name, err_s); > free(err_s); > } > break; > @@ -399,7 +400,8 @@ update_recirc_rules__(struct bond *bond) > char *err_s = match_to_string(&pr_op->match, NULL, > RECIRC_RULE_PRIORITY); > > - VLOG_ERR("failed to remove post recirculation flow %s", > err_s); > + VLOG_ERR("%s: failed to remove post recirculation flow " > + "%s", bond->name, err_s); > free(err_s); > } > > @@ -496,7 +498,7 @@ bond_reconfigure(struct bond *bond, const struct > bond_settings *s) > if (s->use_lb_output_action && > !ovs_lb_output_action_supported(bond->ofproto)) { > VLOG_WARN("%s: Datapath does not support 'lb_output' action, " > - "disabled.", bond->name); > + "disabled", bond->name); > } else { > bond->use_lb_output_action = s->use_lb_output_action; > if (!bond->use_lb_output_action) { > @@ -855,6 +857,8 @@ bond_check_admissibility(struct bond *bond, const void > *member_, > ovs_rwlock_rdlock(&rwlock); > member = bond_member_lookup(bond, member_); > if (!member) { > + VLOG_DBG_RL(&rl, "%s: lookup for member on bond failed", > + bond->name);
Is there a valid case where this would not be a bug? > goto out; > } > > @@ -874,7 +878,7 @@ bond_check_admissibility(struct bond *bond, const void > *member_, > * main thread to run LACP state machine and enable the member. */ > verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : > BV_DROP; > if (!member->enabled && member->may_enable) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "main thread has not yet enabled member", > bond->name, bond->active_member->name); > } > @@ -913,7 +917,7 @@ bond_check_admissibility(struct bond *bond, const void > *member_, > /* Drop all packets which arrive on backup members. This is similar > to > * how Linux bonding handles active-backup bonds. */ > if (bond->active_member != member) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: active-backup bond > received " > + VLOG_DBG_RL(&rl, "%s: member %s: active-backup bond received " > "packet on backup member destined for " ETH_ADDR_FMT, > bond->name, member->name, ETH_ADDR_ARGS(eth_dst)); > goto out; > @@ -935,7 +939,7 @@ bond_check_admissibility(struct bond *bond, const void > *member_, > OVS_NOT_REACHED(); > out: > if (member && (verdict != BV_ACCEPT)) { > - VLOG_DBG_RL(&rl, "bond %s: member %s: " > + VLOG_DBG_RL(&rl, "%s: member %s: " > "admissibility verdict is to drop pkt%s, " > "active member: %s, may_enable: %s, enabled: %s, " > "LACP status: %s", > @@ -1127,8 +1131,13 @@ log_bals(struct bond *bond, const struct ovs_list > *bals) > if (ds.length) { > ds_put_char(&ds, ','); Nit. The separator between members in this log message ends up being ", ". So here, we should push an additonal space... > } > - ds_put_format(&ds, " %s %"PRIu64"kB", > - member->name, member->tx_bytes / 1024); > + if (member->tx_bytes > 1024) { > + ds_put_format(&ds, " %s %"PRIu64" kB", ...and then there is no need for this extra space in front of the member name. Otherwise, we get an extra space for the first member: 2022-07-29T14:14:52.015Z|00147|bond|DBG|dpdkbond0: dpdk0 0 B [...] 2022-07-29T14:15:02.479Z|00150|bond|DBG|dpdkbond0: dpdk1 0 B, dpdk0 0 B > + member->name, member->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, " %s %"PRIu64" B", > + member->name, member->tx_bytes); > + } > > if (!member->enabled) { > ds_put_cstr(&ds, " (disabled)"); > @@ -1141,13 +1150,20 @@ log_bals(struct bond *bond, const struct ovs_list > *bals) > if (&e->list_node != ovs_list_front(&member->entries)) { > ds_put_cstr(&ds, " + "); > } > - ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", > - e - bond->hash, e->tx_bytes / 1024); > + if (e->tx_bytes > 1024) { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" kB", > + e - bond->hash, e->tx_bytes / 1024); > + } else { > + ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64" B", > + e - bond->hash, e->tx_bytes); > + } > } > ds_put_cstr(&ds, ")"); > } > } > - VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); > + if (ds.length) { > + VLOG_DBG("%s: %s", bond->name, ds_cstr(&ds)); > + } > ds_destroy(&ds); > } > } > @@ -1161,14 +1177,6 @@ bond_shift_load(struct bond_entry *hash, struct > bond_member *to) > struct bond *bond = from->bond; > uint64_t delta = hash->tx_bytes; > > - VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %"PRIdPTR") " > - "from %s to %s (now carrying %"PRIu64"kB and " > - "%"PRIu64"kB load, respectively)", > - bond->name, delta / 1024, hash - bond->hash, > - from->name, to->name, > - (from->tx_bytes - delta) / 1024, > - (to->tx_bytes + delta) / 1024); > - > /* Shift load away from 'from' to 'to'. */ > from->tx_bytes -= delta; > to->tx_bytes += delta; > @@ -1345,6 +1353,7 @@ bond_rebalance(struct bond *bond) > insert_bal(&bals, member); > } > } > + One empty line is added, it seems unrelated. > log_bals(bond, &bals); > > /* Shift load from the most-loaded members to the least-loaded members. > */ > @@ -1356,6 +1365,7 @@ bond_rebalance(struct bond *bond) > uint64_t overload; > > overload = from->tx_bytes - to->tx_bytes; > + Idem. > if (overload < to->tx_bytes >> 5 || overload < 100000) { > /* The extra load on 'from' (and all less-loaded members), > compared > * to that of 'to' (the least-loaded member), is less than ~3%, > or > @@ -1388,6 +1398,12 @@ bond_rebalance(struct bond *bond) > reinsert_bal(&bals, from); > reinsert_bal(&bals, to); > } > + VLOG_INFO("%s: shifted %"PRIuSIZE" hashes totaling %"PRIu64 > + " kB from %s (%"PRIu64" kB) to %s (%"PRIu64" kB)", > + bond->name, cnt, > + (overload + to->tx_bytes - from->tx_bytes) >> 1, > + from->name, from->tx_bytes, to->name, to->tx_bytes); > + > rebalanced = true; > } > > @@ -1565,6 +1581,9 @@ bond_print_details(struct ds *ds, const struct bond > *bond) > if (be_tx_k) { > ds_put_format(ds, " hash %d: %"PRIu64" kB load\n", > hash, be_tx_k); > + } else if (be->tx_bytes) { > + ds_put_format(ds, " hash %d: %"PRIu64" B load\n", > + hash, be->tx_bytes); > } > > /* XXX How can we list the MACs assigned to hashes of SLB bonds? > */ > @@ -1691,7 +1710,7 @@ bond_unixctl_set_active_member(struct unixctl_conn > *conn, > if (bond->active_member != member) { > bond->bond_revalidate = true; > bond->active_member = member; > - VLOG_INFO("bond %s: active member is now %s", > + VLOG_INFO("%s: active member is now %s", > bond->name, member->name); > bond->send_learning_packets = true; > unixctl_command_reply(conn, "done"); > @@ -1869,7 +1888,7 @@ bond_enable_member(struct bond_member *member, bool > enable) > } > ovs_mutex_unlock(&member->bond->mutex); > > - VLOG_INFO("member %s: %s", member->name, > + VLOG_INFO("%s: member %s %s", bond->name, member->name, > member->enabled ? "enabled" : "disabled"); > } > } > @@ -1879,23 +1898,31 @@ bond_link_status_update(struct bond_member *member) > { > struct bond *bond = member->bond; > bool up; > + bool carrier = netdev_get_carrier(member->netdev); > > - up = netdev_get_carrier(member->netdev) && member->may_enable; > + up = carrier && member->may_enable; > if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { > + char * msg; > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > - VLOG_INFO_RL(&rl, "member %s: link state %s", > - member->name, up ? "up" : "down"); > + if (!carrier) { > + msg = "carrier state"; > + } else { > + msg = "link state"; > + } > + VLOG_INFO_RL(&rl, "%s: member %s %s %s", bond->name, > + member->name, msg, up ? "up" : "down"); > if (up == member->enabled) { > member->delay_expires = LLONG_MAX; > - VLOG_INFO_RL(&rl, "member %s: will not be %s", > - member->name, up ? "disabled" : "enabled"); > + VLOG_INFO_RL(&rl, "%s: member %s will not be %s", > + bond->name, member->name, > + up ? "disabled" : "enabled"); > } else { > int delay = up ? bond->updelay : bond->downdelay; > member->delay_expires = time_msec() + delay; > if (delay) { > - VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " > - "for %d ms", > - member->name, > + VLOG_INFO_RL(&rl, "%s: member %s will be %s" > + " if it stays %s for %d ms", > + bond->name, member->name, > up ? "enabled" : "disabled", > up ? "up" : "down", > delay); > @@ -1952,14 +1979,15 @@ choose_output_member(const struct bond *bond, const > struct flow *flow, > { > struct bond_entry *e; > int balance; > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > balance = bond->balance; > if (bond->lacp_status == LACP_CONFIGURED) { > /* LACP has been configured on this bond but negotiations were > - * unsuccussful. If lacp_fallback_ab is enabled use active- > + * unsuccessful. If lacp_fallback_ab is enabled use active- > * backup mode else drop all traffic. */ > if (!bond->lacp_fallback_ab) { > - return NULL; > + goto error; > } > balance = BM_AB; > } > @@ -1971,7 +1999,7 @@ choose_output_member(const struct bond *bond, const > struct flow *flow, > case BM_TCP: > if (bond->lacp_status != LACP_NEGOTIATED) { > /* Must have LACP negotiations for TCP balanced bonds. */ > - return NULL; > + goto error; > } > if (wc) { > flow_mask_hash_fields(flow, wc, > NX_HASH_FIELDS_SYMMETRIC_L3L4_UDP); > @@ -1990,6 +2018,18 @@ choose_output_member(const struct bond *bond, const > struct flow *flow, > default: > OVS_NOT_REACHED(); > } > +error: > + if (VLOG_IS_DBG_ENABLED()) { > + struct ds ds = DS_EMPTY_INITIALIZER; > + flow_format(&ds, flow, NULL); > + VLOG_DBG_RL(&rl, "%s: cannot choose output member " > + "for flow %s with LACP status %s", > + bond->name, > + ds_cstr(&ds), > + lacp_status_description(bond->lacp_status)); > + ds_destroy(&ds); > + } > + return NULL; > } > > static struct bond_member * > @@ -2039,10 +2079,10 @@ bond_choose_active_member(struct bond *bond) > bond->active_member = bond_choose_member(bond); > if (bond->active_member) { > if (bond->active_member->enabled) { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s", > + VLOG_INFO_RL(&rl, "%s: active member is now %s", > bond->name, bond->active_member->name); > } else { > - VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " > + VLOG_INFO_RL(&rl, "%s: active member is now %s, skipping " > "remaining %lld ms updelay (since no member was " > "enabled)", bond->name, bond->active_member->name, > bond->active_member->delay_expires - time_msec()); > @@ -2056,7 +2096,7 @@ bond_choose_active_member(struct bond *bond) > } > } else if (old_active_member) { > bond_active_member_changed(bond); > - VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); > + VLOG_INFO_RL(&rl, "%s: all members disabled", bond->name); > } > } > Reviewed-by: David Marchand <david.march...@redhat.com> -- David Marchand _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev