On Wed, Feb 20, 2019 at 04:04:13PM +0100, Stefan Sperling wrote: > iwm(4) has been using a firmware-based approach to retrying failed > Tx attempts with successively lower data rates. This is suboptimal > for our kernel's Tx rate selection algorithms, which are called > MiRA (for 11n) and AMRR (for 11a/b/g). > > I have observed a mismatch between the Tx rate reported by ifconfig iwm0 > and the Rx rate reported at the receiving end, and this diff fixes > that issue for me. > > Because iwm firmware retries on lower rates, our own rate selection > is fooled into believing that high rates succeed when they in fact > don't succeed. This causes more Tx retries than necessary, which > results in lower throughput than we could achieve otherwise. > > With the diff below we instead ask the firmware to always keep retrying > at a constant Tx rate. This provides more accurate feedback to MiRa and > AMRR, which are then able to select an initial Tx rate which is more > likely to succeed. > > However, there is another reason why we were using firmware-based retries. > In 11n mode, net80211 only knows about rates which are based on OFDM. > On 2GHz channels, the older CCK modulation provides a larger range and can > thus make the difference between a bad network connection and no network > connection at all. We were relying on the firmware to retry with CCK if > OFDM frames failed so I had to implement a similar strategy in the driver. > This is achived by falling back to CCK rates and running AMRR instead of > MiRA if the lowest OFDM rate is failing, until AMRR decides to start using > OFDM again, and then switch back to OFDM + MiRA. Getting the driver to switch > back to OFDM once it is using CCK wasn't straightforward. I eventually found > out that feeding only actual Tx failures to AMRR makes this work well. > > I had to remove 11n support from AMRR which interfered with this fallback. > This is overdue anyway since all drivers are using MiRa in 11n mode now. > > When testing this, please compare throughput before and after this diff. > I run tcpbench -s on a host behind the AP and start a tcpbench client on > my iwm laptop. With this diff, and an 11ac AP on channel 149, tcpbench > measures up to 26 Mpbs in my environment, with an average rate above 20 Mbps. > > Also pay attention to changes in latency. To test this, on my iwm laptop > I run ping and rain(6) through an SSH connection to a machine behind the AP. > > Tested on a 7265 device only so far. > > iwn(4) has the same problem and should also be fixed once this change > has made it into the tree. > > diff 3cfc9cae129c023be655a6d31902cddd094fdec4 /usr/src > blob - 45e4446d79d2da0f847170e34b9a01e25b71ea62 > file + sys/dev/pci/if_iwm.c > --- sys/dev/pci/if_iwm.c > +++ sys/dev/pci/if_iwm.c > @@ -217,6 +217,7 @@ const struct iwm_rate { > #define IWM_RIDX_MAX (nitems(iwm_rates)-1) > #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM) > #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM) > +#define IWM_RVAL_IS_OFDM(_i_) ((_i_) >= 12 && (_i_) != 22) > > /* Convert an MCS index into an iwm_rates[] index. */ > const int iwm_mcs2ridx[] = { > @@ -368,6 +369,7 @@ void iwm_rx_rx_phy_cmd(struct iwm_softc *, struct > iwm_ > int iwm_get_noise(const struct iwm_statistics_rx_non_phy *); > void iwm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *, > struct iwm_rx_data *); > +void iwm_enable_ht_cck_fallback(struct iwm_softc *, struct iwm_node *); > void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *, > struct iwm_node *); > void iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *, > @@ -447,7 +449,6 @@ int iwm_run(struct iwm_softc *); > int iwm_run_stop(struct iwm_softc *); > struct ieee80211_node *iwm_node_alloc(struct ieee80211com *); > void iwm_calib_timeout(void *); > -void iwm_setrates(struct iwm_node *); > int iwm_media_change(struct ifnet *); > void iwm_newstate_task(void *); > int iwm_newstate(struct ieee80211com *, enum ieee80211_state, int); > @@ -3574,6 +3575,37 @@ iwm_rx_rx_mpdu(struct iwm_softc *sc, struct iwm_rx_pac > } > > void > +iwm_enable_ht_cck_fallback(struct iwm_softc *sc, struct iwm_node *in) > +{ > + struct ieee80211com *ic = &sc->sc_ic; > + struct ieee80211_node *ni = &in->in_ni; > + struct ieee80211_rateset *rs = &ni->ni_rates; > + uint8_t rval = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL); > + uint8_t min_rval = ieee80211_min_basic_rate(ic); > + int i; > + > + /* Are CCK frames forbidden in our BSS? */ > + if (IWM_RVAL_IS_OFDM(min_rval)) > + return; > + > + in->ht_force_cck = 1; > + > + ieee80211_mira_cancel_timeouts(&in->in_mn); > + ieee80211_mira_node_init(&in->in_mn); > + ieee80211_amrr_node_init(&sc->sc_amrr, &in->in_amn); > + > + /* Choose initial CCK Tx rate. */ > + ni->ni_txrate = 0; > + for (i = 0; i < rs->rs_nrates; i++) { > + rval = (rs->rs_rates[i] & IEEE80211_RATE_VAL); > + if (rval == min_rval) { > + ni->ni_txrate = i; > + break; > + } > + } > +} > + > +void > iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt, > struct iwm_node *in) > { > @@ -3590,12 +3622,18 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_ > status != IWM_TX_STATUS_DIRECT_DONE); > > /* Update rate control statistics. */ > - if ((ni->ni_flags & IEEE80211_NODE_HT) == 0) { > + if ((ni->ni_flags & IEEE80211_NODE_HT) == 0 || in->ht_force_cck) { > in->in_amn.amn_txcnt++; > - if (tx_resp->failure_frame > 0) > + if (in->ht_force_cck) { > + /* > + * We want to move back to OFDM quickly if possible. > + * Only show actual Tx failures to AMRR, not retries. > + */ > + if (txfail) > + in->in_amn.amn_retrycnt++; > + } else if (tx_resp->failure_frame > 0) > in->in_amn.amn_retrycnt++; > } else if (ic->ic_fixed_mcs == -1) { > - int omcs = ni->ni_txmcs; > in->in_mn.frames += tx_resp->frame_count; > in->in_mn.ampdu_size = le16toh(tx_resp->byte_cnt); > in->in_mn.agglen = tx_resp->frame_count; > @@ -3603,14 +3641,16 @@ iwm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_ > in->in_mn.retries += tx_resp->failure_frame; > if (txfail) > in->in_mn.txfail += tx_resp->frame_count; > - if (ic->ic_state == IEEE80211_S_RUN) > + if (ic->ic_state == IEEE80211_S_RUN && !in->ht_force_cck) { > + int otxmcs = ni->ni_txmcs; > + > ieee80211_mira_choose(&in->in_mn, ic, &in->in_ni); > - /* > - * If MiRA has chosen a new TX rate we must update > - * the firwmare's LQ rate table from process context. > - */ > - if (omcs != ni->ni_txmcs) > - iwm_add_task(sc, systq, &sc->setrates_task); > + > + /* Fall back to CCK rates if MCS 0 is failing. */ > + if (txfail && IEEE80211_IS_CHAN_2GHZ(ni->ni_chan) && > + otxmcs == 0 && ni->ni_txmcs == 0) > + iwm_enable_ht_cck_fallback(sc, in); > + } > } > > if (txfail) > @@ -4112,41 +4152,32 @@ iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node > { > struct ieee80211com *ic = &sc->sc_ic; > struct ieee80211_node *ni = &in->in_ni; > + struct ieee80211_rateset *rs = &ni->ni_rates; > const struct iwm_rate *rinfo; > int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK; > - int ridx, rate_flags, i; > - int nrates = ni->ni_rates.rs_nrates; > + int min_ridx = iwm_rval2ridx(ieee80211_min_basic_rate(ic)); > + int ridx, rate_flags; > > tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT; > - tx->data_retry_limit = IWM_DEFAULT_TX_RETRY; > + tx->data_retry_limit = IWM_LOW_RETRY_LIMIT; > > if (IEEE80211_IS_MULTICAST(wh->i_addr1) || > type != IEEE80211_FC0_TYPE_DATA) { > /* for non-data, use the lowest supported rate */ > - ridx = iwm_rval2ridx(ieee80211_min_basic_rate(ic)); > + ridx = min_ridx; > tx->data_retry_limit = IWM_MGMT_DFAULT_RETRY_LIMIT; > } else if (ic->ic_fixed_mcs != -1) { > ridx = sc->sc_fixed_ridx; > } else if (ic->ic_fixed_rate != -1) { > ridx = sc->sc_fixed_ridx; > + } else if ((ni->ni_flags & IEEE80211_NODE_HT) && !in->ht_force_cck) { > + ridx = iwm_mcs2ridx[ni->ni_txmcs]; > } else { > - /* for data frames, use RS table */ > - tx->initial_rate_index = 0; > - tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE); > - if (ni->ni_flags & IEEE80211_NODE_HT) { > - ridx = iwm_mcs2ridx[ni->ni_txmcs]; > - return &iwm_rates[ridx]; > - } > - ridx = (IEEE80211_IS_CHAN_5GHZ(ni->ni_chan)) ? > - IWM_RIDX_OFDM : IWM_RIDX_CCK; > - for (i = 0; i < nrates; i++) { > - if (iwm_rates[i].rate == (ni->ni_txrate & > - IEEE80211_RATE_VAL)) { > - ridx = i; > - break; > - } > - } > - return &iwm_rates[ridx]; > + uint8_t rval; > + rval = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL); > + ridx = iwm_rval2ridx(rval); > + if (ridx < min_ridx) > + ridx = min_ridx; > } > > rinfo = &iwm_rates[ridx]; > @@ -4228,7 +4259,7 @@ iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ie > if ((ni->ni_flags & IEEE80211_NODE_HT) && > !IEEE80211_IS_MULTICAST(wh->i_addr1) && > type == IEEE80211_FC0_TYPE_DATA && > - rinfo->plcp == IWM_RATE_INVM_PLCP) { > + rinfo->ht_plcp != IWM_RATE_HT_SISO_MCS_INV_PLCP) { > tap->wt_rate = (0x80 | rinfo->ht_plcp); > } else > tap->wt_rate = rinfo->rate; > @@ -5172,6 +5203,8 @@ iwm_rval2ridx(int rval) > int ridx; > > for (ridx = 0; ridx < nitems(iwm_rates); ridx++) { > + if (iwm_rates[ridx].plcp == IWM_RATE_INVM_PLCP) > + continue; > if (rval == iwm_rates[ridx].rate) > break; > } > @@ -5837,7 +5870,6 @@ iwm_run(struct iwm_softc *sc) > /* Start at lowest available bit-rate, AMRR will raise. */ > in->in_ni.ni_txrate = 0; > in->in_ni.ni_txmcs = 0; > - iwm_setrates(in); > > timeout_add_msec(&sc->sc_calib_to, 500); > iwm_led_enable(sc); > @@ -5900,159 +5932,27 @@ iwm_calib_timeout(void *arg) > struct ieee80211com *ic = &sc->sc_ic; > struct iwm_node *in = (void *)ic->ic_bss; > struct ieee80211_node *ni = &in->in_ni; > - int s, otxrate; > + int s; > > s = splnet(); > if ((ic->ic_fixed_rate == -1 || ic->ic_fixed_mcs == -1) && > - ((ni->ni_flags & IEEE80211_NODE_HT) == 0) && > + ((ni->ni_flags & IEEE80211_NODE_HT) == 0 || in->ht_force_cck) && > ic->ic_opmode == IEEE80211_M_STA && ic->ic_bss) { > - otxrate = ni->ni_txrate; > ieee80211_amrr_choose(&sc->sc_amrr, &in->in_ni, &in->in_amn); > - /* > - * If AMRR has chosen a new TX rate we must update > - * the firwmare's LQ rate table from process context. > - */ > - if (otxrate != ni->ni_txrate) > - iwm_add_task(sc, systq, &sc->setrates_task); > + if (in->ht_force_cck) { > + struct ieee80211_rateset *rs = &ni->ni_rates; > + uint8_t rv; > + rv = (rs->rs_rates[ni->ni_txrate] & IEEE80211_RATE_VAL); > + if (IWM_RVAL_IS_OFDM(rv)) > + in->ht_force_cck = 0; > + } > } > + > splx(s); > > timeout_add_msec(&sc->sc_calib_to, 500); > } > > -void > -iwm_setrates_task(void *arg) > -{ > - struct iwm_softc *sc = arg; > - struct ieee80211com *ic = &sc->sc_ic; > - struct iwm_node *in = (struct iwm_node *)ic->ic_bss; > - int s = splnet(); > - > - if (sc->sc_flags & IWM_FLAG_SHUTDOWN) { > - refcnt_rele_wake(&sc->task_refs); > - splx(s); > - return; > - } > - > - /* Update rates table based on new TX rate determined by AMRR. */ > - iwm_setrates(in); > - refcnt_rele_wake(&sc->task_refs); > - splx(s); > -} > - > -void > -iwm_setrates(struct iwm_node *in) > -{ > - struct ieee80211_node *ni = &in->in_ni; > - struct ieee80211com *ic = ni->ni_ic; > - struct iwm_softc *sc = IC2IFP(ic)->if_softc; > - struct iwm_lq_cmd *lq = &in->in_lq; > - struct ieee80211_rateset *rs = &ni->ni_rates; > - int i, ridx, ridx_min, ridx_max, j, sgi_ok = 0, mimo, tab = 0; > - struct iwm_host_cmd cmd = { > - .id = IWM_LQ_CMD, > - .len = { sizeof(in->in_lq), }, > - }; > - > - memset(lq, 0, sizeof(*lq)); > - lq->sta_id = IWM_STATION_ID; > - > - if (ic->ic_flags & IEEE80211_F_USEPROT) > - lq->flags |= IWM_LQ_FLAG_USE_RTS_MSK; > - > - if ((ni->ni_flags & IEEE80211_NODE_HT) && > - ieee80211_node_supports_ht_sgi20(ni)) { > - ni->ni_flags |= IEEE80211_NODE_HT_SGI20; > - sgi_ok = 1; > - } > - > - /* > - * Fill the LQ rate selection table with legacy and/or HT rates > - * in descending order, i.e. with the node's current TX rate first. > - * In cases where throughput of an HT rate corresponds to a legacy > - * rate it makes no sense to add both. We rely on the fact that > - * iwm_rates is laid out such that equivalent HT/legacy rates share > - * the same IWM_RATE_*_INDEX value. Also, rates not applicable to > - * legacy/HT are assumed to be marked with an 'invalid' PLCP value. > - */ > - j = 0; > - ridx_min = iwm_rval2ridx(ieee80211_min_basic_rate(ic)); > - mimo = iwm_is_mimo_mcs(ni->ni_txmcs); > - ridx_max = (mimo ? IWM_RIDX_MAX : IWM_LAST_HT_SISO_RATE); > - for (ridx = ridx_max; ridx >= ridx_min; ridx--) { > - uint8_t plcp = iwm_rates[ridx].plcp; > - uint8_t ht_plcp = iwm_rates[ridx].ht_plcp; > - > - if (j >= nitems(lq->rs_table)) > - break; > - tab = 0; > - if (ni->ni_flags & IEEE80211_NODE_HT) { > - if (ht_plcp == IWM_RATE_HT_SISO_MCS_INV_PLCP) > - continue; > - /* Do not mix SISO and MIMO HT rates. */ > - if ((mimo && !iwm_is_mimo_ht_plcp(ht_plcp)) || > - (!mimo && iwm_is_mimo_ht_plcp(ht_plcp))) > - continue; > - for (i = ni->ni_txmcs; i >= 0; i--) { > - if (isclr(ni->ni_rxmcs, i)) > - continue; > - if (ridx == iwm_mcs2ridx[i]) { > - tab = ht_plcp; > - tab |= IWM_RATE_MCS_HT_MSK; > - if (sgi_ok) > - tab |= IWM_RATE_MCS_SGI_MSK; > - break; > - } > - } > - } else if (plcp != IWM_RATE_INVM_PLCP) { > - for (i = ni->ni_txrate; i >= 0; i--) { > - if (iwm_rates[ridx].rate == (rs->rs_rates[i] & > - IEEE80211_RATE_VAL)) { > - tab = plcp; > - break; > - } > - } > - } > - > - if (tab == 0) > - continue; > - > - if (iwm_is_mimo_ht_plcp(ht_plcp)) > - tab |= IWM_RATE_MCS_ANT_AB_MSK; > - else > - tab |= IWM_RATE_MCS_ANT_A_MSK; > - > - if (IWM_RIDX_IS_CCK(ridx)) > - tab |= IWM_RATE_MCS_CCK_MSK; > - lq->rs_table[j++] = htole32(tab); > - } > - > - lq->mimo_delim = (mimo ? j : 0); > - > - /* Fill the rest with the lowest possible rate */ > - while (j < nitems(lq->rs_table)) { > - tab = iwm_rates[ridx_min].plcp; > - if (IWM_RIDX_IS_CCK(ridx_min)) > - tab |= IWM_RATE_MCS_CCK_MSK; > - tab |= IWM_RATE_MCS_ANT_A_MSK; > - lq->rs_table[j++] = htole32(tab); > - } > - > - lq->single_stream_ant_msk = IWM_ANT_A; > - lq->dual_stream_ant_msk = IWM_ANT_AB; > - > - lq->agg_time_limit = htole16(4000); /* 4ms */ > - lq->agg_disable_start_th = 3; > -#ifdef notyet > - lq->agg_frame_cnt_limit = 0x3f; > -#else > - lq->agg_frame_cnt_limit = 1; /* tx agg disabled */ > -#endif > - > - cmd.data[0] = &in->in_lq; > - iwm_send_cmd(sc, &cmd); > -} > - > int > iwm_media_change(struct ifnet *ifp) > { > @@ -6196,7 +6096,6 @@ iwm_newstate(struct ieee80211com *ic, enum ieee80211_s > if (ic->ic_state == IEEE80211_S_RUN) { > timeout_del(&sc->sc_calib_to); > ieee80211_mira_cancel_timeouts(&in->in_mn); > - iwm_del_task(sc, systq, &sc->setrates_task); > iwm_del_task(sc, systq, &sc->ba_task); > iwm_del_task(sc, systq, &sc->htprot_task); > } > @@ -6705,7 +6604,6 @@ iwm_stop(struct ifnet *ifp) > /* Cancel scheduled tasks and let any stale tasks finish up. */ > task_del(systq, &sc->init_task); > iwm_del_task(sc, sc->sc_nswq, &sc->newstate_task); > - iwm_del_task(sc, systq, &sc->setrates_task); > iwm_del_task(sc, systq, &sc->ba_task); > iwm_del_task(sc, systq, &sc->htprot_task); > KASSERT(sc->task_refs.refs >= 1); > @@ -7895,7 +7793,6 @@ iwm_attach(struct device *parent, struct device *self, > timeout_set(&sc->sc_led_blink_to, iwm_led_blink_timeout, sc); > task_set(&sc->init_task, iwm_init_task, sc); > task_set(&sc->newstate_task, iwm_newstate_task, sc); > - task_set(&sc->setrates_task, iwm_setrates_task, sc); > task_set(&sc->ba_task, iwm_ba_task, sc); > task_set(&sc->htprot_task, iwm_htprot_task, sc); > > blob - 7e35820c7ceb997683c3c0c0e1da1112290c38cd > file + sys/dev/pci/if_iwmvar.h > --- sys/dev/pci/if_iwmvar.h > +++ sys/dev/pci/if_iwmvar.h > @@ -509,9 +509,12 @@ struct iwm_node { > uint16_t in_id; > uint16_t in_color; > > - struct iwm_lq_cmd in_lq; > struct ieee80211_amrr_node in_amn; > struct ieee80211_mira_node in_mn; > + > + /* Set in 11n mode if we don't receive ACKs for OFDM frames. */ > + int ht_force_cck; > + > }; > #define IWM_STATION_ID 0 > #define IWM_AUX_STA_ID 1 > blob - 2daf18ca6aed42bfded73d204690736ca6a4ae5e > file + sys/net80211/ieee80211_amrr.c > --- sys/net80211/ieee80211_amrr.c > +++ sys/net80211/ieee80211_amrr.c > @@ -41,45 +41,28 @@ > #define reset_cnt(amn) \ > do { (amn)->amn_txcnt = (amn)->amn_retrycnt = 0; } while (0) > > -/* > - * XXX In HT mode we only support MCS 0-7, for now. > - * Beyond MCS 7, incrementing the MCS index does not imply a > - * higher data rate, so this simple implementation will need > - * to be enhanced. > - */ > - > static inline int > is_min_rate(struct ieee80211_node *ni) > { > - if (ni->ni_flags & IEEE80211_NODE_HT) > - return (ni->ni_txmcs == 0); > return (ni->ni_txrate == 0); > } > > static inline int > is_max_rate(struct ieee80211_node *ni) > { > - if (ni->ni_flags & IEEE80211_NODE_HT) > - return (ni->ni_txmcs == 7); /* XXX up to MCS 7 only */ > return (ni->ni_txrate == ni->ni_rates.rs_nrates - 1); > } > > static inline void > increase_rate(struct ieee80211_node *ni) > { > - if (ni->ni_flags & IEEE80211_NODE_HT) > - ni->ni_txmcs++; > - else > - ni->ni_txrate++; > + ni->ni_txrate++; > } > > static inline void > decrease_rate(struct ieee80211_node *ni) > { > - if (ni->ni_flags & IEEE80211_NODE_HT) > - ni->ni_txmcs--; > - else > - ni->ni_txrate--; > + ni->ni_txrate--; > } > > void > @@ -110,7 +93,6 @@ ieee80211_amrr_choose(struct ieee80211_amrr *amrr, str > amn->amn_success = 0; > increase_rate(ni); > DPRINTF(("increase rate=%d,#tx=%d,#retries=%d\n", > - (ni->ni_flags & IEEE80211_NODE_HT) ? ni->ni_txmcs : > RV(ni->ni_rates.rs_rates[ni->ni_txrate]), > amn->amn_txcnt, amn->amn_retrycnt)); > need_change = 1; > @@ -132,7 +114,6 @@ ieee80211_amrr_choose(struct ieee80211_amrr *amrr, str > } > decrease_rate(ni); > DPRINTF(("decrease rate=%d,#tx=%d,#retries=%d\n", > - (ni->ni_flags & IEEE80211_NODE_HT) ? ni->ni_txmcs : > RV(ni->ni_rates.rs_rates[ni->ni_txrate]), > amn->amn_txcnt, amn->amn_retrycnt)); > need_change = 1; > > > > Hi
# hardware iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi iwm0: hw rev 0x230, fw ver 22.361476.0, address b4:6b:fc:f3:e4:13 # local ping before round-trip min/avg/max/std-dev = 2.148/2.695/4.014/0.491 ms # local ping after round-trip min/avg/max/std-dev = 1.888/3.236/9.751/1.356 ms I'll report any changes if I see some.