Hi all,

While profiling ath5k_reset, I noticed that ath5k_hw_txpower() takes
around 200-300 us due to a lot of calls to ath5k_get_interpolated_value().
I'm not too familiar with the code, is there anything we could cache
here that we wouldn't have to recompute?  200-300s is only a small
part of reset, though.

Here's a small patch to make some things better for now, please test
and let me know if there are any issues:

There's some low-hanging fruit: whenever we interpolate a line in
a loop and the endpoints don't change, we can compute the slope only
once and save a bunch of divides.  This reduces runtime for
ath5k_create_power_curve() from 30us to 1us and cuts ath5k_hw_txpower()
to 100us.  More gains could be had by using suitable shifts for
SLOPE_SCALE, but that will change rounding results so leave it be for
now.

Signed-off-by: Bob Copeland <m...@bobcopeland.com>
---
 drivers/net/wireless/ath/ath5k/phy.c |   55 ++++++++++++++++++++++++----------
 1 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/ath/ath5k/phy.c 
b/drivers/net/wireless/ath/ath5k/phy.c
index e2a5606..41e36a3 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -1871,14 +1871,30 @@ ath5k_hw_set_antenna_mode(struct ath5k_hw *ah, u8 
ant_mode)
  * Helper functions
  */
 
+#define SLOPE_SCALE 100
+/*
+ * Compute dy/dx, returning result pre-scaled by SLOPE_SCALE
+ */
+static s32 ath5k_compute_slope(s16 x0, s16 y0, s16 x1, s16 y1)
+{
+       s32 dy = y1 - y0;
+       return (dy * SLOPE_SCALE) / (x1 - x0);
+}
+
 /*
  * Do linear interpolation between two given (x, y) points
  */
 static s16
+ath5k_interpolate(s16 target, s16 x_left, s16 y_left, s32 slope)
+{
+       return y_left + (slope * (target - x_left)) / SLOPE_SCALE;
+}
+
+static s16
 ath5k_get_interpolated_value(s16 target, s16 x_left, s16 x_right,
                                        s16 y_left, s16 y_right)
 {
-       s16 ratio, result;
+       s32 ratio;
 
        /* Avoid divide by zero and skip interpolation
         * if we have the same point */
@@ -1891,12 +1907,8 @@ ath5k_get_interpolated_value(s16 target, s16 x_left, s16 
x_right,
         * always 1 instead of 1.25, 1.75 etc). We scale up by 100
         * to have some accuracy both for 0.5 and 0.25 steps.
         */
-       ratio = ((100 * y_right - 100 * y_left)/(x_right - x_left));
-
-       /* Now scale down to be in range */
-       result = y_left + (ratio * (target - x_left) / 100);
-
-       return result;
+       ratio = ath5k_compute_slope(x_left, y_left, x_right, y_right);
+       return ath5k_interpolate(target, x_left, y_left, ratio);
 }
 
 /*
@@ -1914,6 +1926,7 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 
*stepR,
        s8 tmp;
        s16 min_pwrL, min_pwrR;
        s16 pwr_i;
+       u32 slope;
 
        /* Some vendors write the same pcdac value twice !!! */
        if (stepL[0] == stepL[1] || stepR[0] == stepR[1])
@@ -1922,12 +1935,14 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 
*stepR,
        if (pwrL[0] == pwrL[1])
                min_pwrL = pwrL[0];
        else {
+               slope = ath5k_compute_slope(pwrL[0], stepL[0],
+                       pwrL[1], stepL[1]);
+
                pwr_i = pwrL[0];
                do {
                        pwr_i--;
-                       tmp = (s8) ath5k_get_interpolated_value(pwr_i,
-                                                       pwrL[0], pwrL[1],
-                                                       stepL[0], stepL[1]);
+                       tmp = (s8) ath5k_interpolate(pwr_i, pwrL[0],
+                               stepL[0], slope);
                } while (tmp > 1);
 
                min_pwrL = pwr_i;
@@ -1936,12 +1951,14 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 
*stepR,
        if (pwrR[0] == pwrR[1])
                min_pwrR = pwrR[0];
        else {
+               slope = ath5k_compute_slope(pwrR[0], stepR[0],
+                       pwrR[1], stepR[1]);
+
                pwr_i = pwrR[0];
                do {
                        pwr_i--;
-                       tmp = (s8) ath5k_get_interpolated_value(pwr_i,
-                                                       pwrR[0], pwrR[1],
-                                                       stepR[0], stepR[1]);
+                       tmp = (s8) ath5k_interpolate(pwr_i, pwrR[0],
+                               stepR[0], slope);
                } while (tmp > 1);
 
                min_pwrR = pwr_i;
@@ -1971,6 +1988,7 @@ ath5k_create_power_curve(s16 pmin, s16 pmax,
 {
        u8 idx[2] = { 0, 1 };
        s16 pwr_i = 2*pmin;
+       u32 slope;
        int i;
 
        if (num_points < 2)
@@ -1988,6 +2006,9 @@ ath5k_create_power_curve(s16 pmin, s16 pmax,
 
        /* Find surrounding turning points (TPs)
         * and interpolate between them */
+       slope = ath5k_compute_slope(pwr[idx[0]], vpd[idx[0]],
+                                   pwr[idx[1]], vpd[idx[1]]);
+
        for (i = 0; (i <= (u16) (pmax - pmin)) &&
        (i < AR5K_EEPROM_POWER_TABLE_SIZE); i++) {
 
@@ -1997,11 +2018,13 @@ ath5k_create_power_curve(s16 pmin, s16 pmax,
                if ((pwr_i > pwr[idx[1]]) && (idx[1] < num_points - 1)) {
                        idx[0]++;
                        idx[1]++;
+
+                       slope = ath5k_compute_slope(pwr[idx[0]], vpd[idx[0]],
+                               pwr[idx[1]], vpd[idx[1]]);
                }
 
-               vpd_table[i] = (u8) ath5k_get_interpolated_value(pwr_i,
-                                               pwr[idx[0]], pwr[idx[1]],
-                                               vpd[idx[0]], vpd[idx[1]]);
+               vpd_table[i] = (u8) ath5k_interpolate(pwr_i,
+                       pwr[idx[0]], vpd[idx[0]], slope);
 
                /* Increase by 0.5dB
                 * (0.25 dB units) */
-- 
1.6.2.5

-- 
Bob Copeland %% www.bobcopeland.com

_______________________________________________
ath5k-devel mailing list
ath5k-devel@lists.ath5k.org
https://lists.ath5k.org/mailman/listinfo/ath5k-devel

Reply via email to