Hi all, While profiling ath5k_reset, I noticed that ath5k_hw_txpower() takes around 200-300 us due to a lot of calls to ath5k_get_interpolated_value(). I'm not too familiar with the code, is there anything we could cache here that we wouldn't have to recompute? 200-300s is only a small part of reset, though.
Here's a small patch to make some things better for now, please test and let me know if there are any issues: There's some low-hanging fruit: whenever we interpolate a line in a loop and the endpoints don't change, we can compute the slope only once and save a bunch of divides. This reduces runtime for ath5k_create_power_curve() from 30us to 1us and cuts ath5k_hw_txpower() to 100us. More gains could be had by using suitable shifts for SLOPE_SCALE, but that will change rounding results so leave it be for now. Signed-off-by: Bob Copeland <m...@bobcopeland.com> --- drivers/net/wireless/ath/ath5k/phy.c | 55 ++++++++++++++++++++++++---------- 1 files changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c index e2a5606..41e36a3 100644 --- a/drivers/net/wireless/ath/ath5k/phy.c +++ b/drivers/net/wireless/ath/ath5k/phy.c @@ -1871,14 +1871,30 @@ ath5k_hw_set_antenna_mode(struct ath5k_hw *ah, u8 ant_mode) * Helper functions */ +#define SLOPE_SCALE 100 +/* + * Compute dy/dx, returning result pre-scaled by SLOPE_SCALE + */ +static s32 ath5k_compute_slope(s16 x0, s16 y0, s16 x1, s16 y1) +{ + s32 dy = y1 - y0; + return (dy * SLOPE_SCALE) / (x1 - x0); +} + /* * Do linear interpolation between two given (x, y) points */ static s16 +ath5k_interpolate(s16 target, s16 x_left, s16 y_left, s32 slope) +{ + return y_left + (slope * (target - x_left)) / SLOPE_SCALE; +} + +static s16 ath5k_get_interpolated_value(s16 target, s16 x_left, s16 x_right, s16 y_left, s16 y_right) { - s16 ratio, result; + s32 ratio; /* Avoid divide by zero and skip interpolation * if we have the same point */ @@ -1891,12 +1907,8 @@ ath5k_get_interpolated_value(s16 target, s16 x_left, s16 x_right, * always 1 instead of 1.25, 1.75 etc). We scale up by 100 * to have some accuracy both for 0.5 and 0.25 steps. */ - ratio = ((100 * y_right - 100 * y_left)/(x_right - x_left)); - - /* Now scale down to be in range */ - result = y_left + (ratio * (target - x_left) / 100); - - return result; + ratio = ath5k_compute_slope(x_left, y_left, x_right, y_right); + return ath5k_interpolate(target, x_left, y_left, ratio); } /* @@ -1914,6 +1926,7 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR, s8 tmp; s16 min_pwrL, min_pwrR; s16 pwr_i; + u32 slope; /* Some vendors write the same pcdac value twice !!! */ if (stepL[0] == stepL[1] || stepR[0] == stepR[1]) @@ -1922,12 +1935,14 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR, if (pwrL[0] == pwrL[1]) min_pwrL = pwrL[0]; else { + slope = ath5k_compute_slope(pwrL[0], stepL[0], + pwrL[1], stepL[1]); + pwr_i = pwrL[0]; do { pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrL[0], pwrL[1], - stepL[0], stepL[1]); + tmp = (s8) ath5k_interpolate(pwr_i, pwrL[0], + stepL[0], slope); } while (tmp > 1); min_pwrL = pwr_i; @@ -1936,12 +1951,14 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR, if (pwrR[0] == pwrR[1]) min_pwrR = pwrR[0]; else { + slope = ath5k_compute_slope(pwrR[0], stepR[0], + pwrR[1], stepR[1]); + pwr_i = pwrR[0]; do { pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrR[0], pwrR[1], - stepR[0], stepR[1]); + tmp = (s8) ath5k_interpolate(pwr_i, pwrR[0], + stepR[0], slope); } while (tmp > 1); min_pwrR = pwr_i; @@ -1971,6 +1988,7 @@ ath5k_create_power_curve(s16 pmin, s16 pmax, { u8 idx[2] = { 0, 1 }; s16 pwr_i = 2*pmin; + u32 slope; int i; if (num_points < 2) @@ -1988,6 +2006,9 @@ ath5k_create_power_curve(s16 pmin, s16 pmax, /* Find surrounding turning points (TPs) * and interpolate between them */ + slope = ath5k_compute_slope(pwr[idx[0]], vpd[idx[0]], + pwr[idx[1]], vpd[idx[1]]); + for (i = 0; (i <= (u16) (pmax - pmin)) && (i < AR5K_EEPROM_POWER_TABLE_SIZE); i++) { @@ -1997,11 +2018,13 @@ ath5k_create_power_curve(s16 pmin, s16 pmax, if ((pwr_i > pwr[idx[1]]) && (idx[1] < num_points - 1)) { idx[0]++; idx[1]++; + + slope = ath5k_compute_slope(pwr[idx[0]], vpd[idx[0]], + pwr[idx[1]], vpd[idx[1]]); } - vpd_table[i] = (u8) ath5k_get_interpolated_value(pwr_i, - pwr[idx[0]], pwr[idx[1]], - vpd[idx[0]], vpd[idx[1]]); + vpd_table[i] = (u8) ath5k_interpolate(pwr_i, + pwr[idx[0]], vpd[idx[0]], slope); /* Increase by 0.5dB * (0.25 dB units) */ -- 1.6.2.5 -- Bob Copeland %% www.bobcopeland.com _______________________________________________ ath5k-devel mailing list ath5k-devel@lists.ath5k.org https://lists.ath5k.org/mailman/listinfo/ath5k-devel