The current implementation attempts to use a uint16_t to alias the lpm table
structures. Such aliasing can break optimizer performance. This patch uses
union type indirection and adds static inline functions for performing the
aliasing.

Signed-off-by: Aaron Conole <aconole at redhat.com>
---
 lib/librte_lpm/rte_lpm.h | 53 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 12 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index c299ce2..eae6ff1 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -157,6 +157,33 @@ struct rte_lpm {
 };

 /**
+ * Convert from tbl_entry types to integer types
+ */
+static inline uint16_t
+rte_lpm_tbl24_entry_to_uint16(const struct rte_lpm_tbl24_entry *entry)
+{
+       union {
+               uint16_t                   i;
+               struct rte_lpm_tbl24_entry s;
+       } tbl_entry_u;
+
+       tbl_entry_u.s = *entry;
+       return tbl_entry_u.i;
+}
+
+static inline uint16_t
+rte_lpm_tbl8_entry_to_uint16(const struct rte_lpm_tbl8_entry *entry)
+{
+       union {
+               uint16_t                  i;
+               struct rte_lpm_tbl8_entry s;
+       } tbl_entry_u;
+
+       tbl_entry_u.s = *entry;
+       return tbl_entry_u.i;
+}
+
+/**
  * Create an LPM object.
  *
  * @param name
@@ -286,7 +313,7 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t 
*next_hop)
        RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL);

        /* Copy tbl24 entry */
-       tbl_entry = *(const uint16_t *)&lpm->tbl24[tbl24_index];
+       tbl_entry = rte_lpm_tbl24_entry_to_uint16(&lpm->tbl24[tbl24_index]);

        /* Copy tbl8 entry (only if needed) */
        if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
@@ -295,7 +322,7 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t 
*next_hop)
                unsigned tbl8_index = (uint8_t)ip +
                                ((uint8_t)tbl_entry * 
RTE_LPM_TBL8_GROUP_NUM_ENTRIES);

-               tbl_entry = *(const uint16_t *)&lpm->tbl8[tbl8_index];
+               tbl_entry = 
rte_lpm_tbl8_entry_to_uint16(&lpm->tbl8[tbl8_index]);
        }

        *next_hop = (uint8_t)tbl_entry;
@@ -342,7 +369,8 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const 
uint32_t * ips,

        for (i = 0; i < n; i++) {
                /* Simply copy tbl24 entry to output */
-               next_hops[i] = *(const uint16_t *)&lpm->tbl24[tbl24_indexes[i]];
+               next_hops[i] = rte_lpm_tbl24_entry_to_uint16(
+                       &lpm->tbl24[tbl24_indexes[i]]);

                /* Overwrite output with tbl8 entry if needed */
                if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
@@ -352,7 +380,8 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const 
uint32_t * ips,
                                        ((uint8_t)next_hops[i] *
                                         RTE_LPM_TBL8_GROUP_NUM_ENTRIES);

-                       next_hops[i] = *(const uint16_t 
*)&lpm->tbl8[tbl8_index];
+                       next_hops[i] = rte_lpm_tbl8_entry_to_uint16(
+                                       &lpm->tbl8[tbl8_index]);
                }
        }
        return 0;
@@ -419,13 +448,13 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, 
uint16_t hop[4],
        idx = _mm_cvtsi128_si64(i24);
        i24 = _mm_srli_si128(i24, sizeof(uint64_t));

-       tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-       tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
+       tbl[0] = rte_lpm_tbl24_entry_to_uint16(&lpm->tbl24[(uint32_t)idx]);
+       tbl[1] = rte_lpm_tbl24_entry_to_uint16(&lpm->tbl24[idx >> 32]);

        idx = _mm_cvtsi128_si64(i24);

-       tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-       tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
+       tbl[2] = rte_lpm_tbl24_entry_to_uint16(&lpm->tbl24[(uint32_t)idx]);
+       tbl[3] = rte_lpm_tbl24_entry_to_uint16(&lpm->tbl24[idx >> 32]);

        /* get 4 indexes for tbl8[]. */
        i8.x = _mm_and_si128(ip, mask8);
@@ -446,25 +475,25 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, 
uint16_t hop[4],
                        RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
                i8.u32[0] = i8.u32[0] +
                        (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-               tbl[0] = *(const uint16_t *)&lpm->tbl8[i8.u32[0]];
+               tbl[0] = rte_lpm_tbl8_entry_to_uint16(&lpm->tbl8[i8.u32[0]]);
        }
        if (unlikely((pt >> 16 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
                        RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
                i8.u32[1] = i8.u32[1] +
                        (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-               tbl[1] = *(const uint16_t *)&lpm->tbl8[i8.u32[1]];
+               tbl[1] = rte_lpm_tbl8_entry_to_uint16(&lpm->tbl8[i8.u32[1]]);
        }
        if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
                        RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
                i8.u32[2] = i8.u32[2] +
                        (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-               tbl[2] = *(const uint16_t *)&lpm->tbl8[i8.u32[2]];
+               tbl[2] = rte_lpm_tbl8_entry_to_uint16(&lpm->tbl8[i8.u32[2]]);
        }
        if (unlikely((pt >> 48 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
                        RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
                i8.u32[3] = i8.u32[3] +
                        (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-               tbl[3] = *(const uint16_t *)&lpm->tbl8[i8.u32[3]];
+               tbl[3] = rte_lpm_tbl8_entry_to_uint16(&lpm->tbl8[i8.u32[3]]);
        }

        hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[0] : defv;
-- 
2.5.0

Reply via email to