As reported in Bug https://bugs.linaro.org/show_bug.cgi?id=2816 packet alloc/free performance for non-references appears significantly degraded after the introduction of packet reference support. Add fastpaths for non-reference packet frees to optimize this.
Signed-off-by: Bill Fischofer <bill.fischo...@linaro.org> --- Changes for v3: - Fix memory leak detected by odp_bench_packet. Changes for v2: - Reworked odp_packet_free_multi() to streamline processing for reference and non-reference paths. platform/linux-generic/include/odp_pool_internal.h | 22 ++++- platform/linux-generic/odp_packet.c | 97 ++++++++++++++++++++-- platform/linux-generic/odp_pool.c | 62 ++++---------- 3 files changed, 126 insertions(+), 55 deletions(-) diff --git a/platform/linux-generic/include/odp_pool_internal.h b/platform/linux-generic/include/odp_pool_internal.h index b0805ac..8fdc688 100644 --- a/platform/linux-generic/include/odp_pool_internal.h +++ b/platform/linux-generic/include/odp_pool_internal.h @@ -121,9 +121,29 @@ static inline odp_buffer_hdr_t *buf_hdl_to_hdr(odp_buffer_t buf) return pool_buf_hdl_to_hdr(pool, buf); } +static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx) +{ + return _odp_cast_scalar(odp_pool_t, pool_idx); +} + +static inline uint32_t pool_id_from_buf(odp_buffer_t buf) +{ + odp_buffer_bits_t handle; + + handle.handle = buf; + return handle.pool_id; +} + int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[], odp_buffer_hdr_t *buf_hdr[], int num); -void buffer_free_multi(const odp_buffer_t buf[], int num_free); +void buffer_free_to_pool(uint32_t pool_id, const odp_buffer_t buf[], int num); + +static inline void buffer_free_multi(const odp_buffer_t buf[], int num_total) +{ + uint32_t pool_id = pool_id_from_buf(buf[0]); + + buffer_free_to_pool(pool_id, buf, num_total); +} #ifdef __cplusplus } diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 170965a..a0e65cc 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -30,7 +30,7 @@ static inline odp_packet_t packet_handle(odp_packet_hdr_t *pkt_hdr) static inline odp_buffer_t buffer_handle(odp_packet_hdr_t *pkt_hdr) { - return pkt_hdr->buf_hdr.handle.handle; + return odp_hdr_to_buf(&pkt_hdr->buf_hdr); } static inline uint32_t packet_ref_inc(odp_packet_hdr_t *pkt_hdr) @@ -599,18 +599,69 @@ int odp_packet_alloc_multi(odp_pool_t pool_hdl, uint32_t len, return num; } -static inline void packet_free(odp_packet_hdr_t *pkt_hdr) +static inline odp_packet_hdr_t *packet_free_to_list(odp_packet_hdr_t *pkt_hdr, + odp_buffer_t buf[], + int nbufs, + int *nfree) { odp_packet_hdr_t *ref_hdr; uint32_t ref_count; + int num_seg, i; do { + ref_count = packet_ref_count(pkt_hdr) - 1; + num_seg = pkt_hdr->buf_hdr.segcount; ref_hdr = pkt_hdr->ref_hdr; + + if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) && + ref_count == 0)) { + if (*nfree >= nbufs) + break; + + buf[(*nfree)++] = buffer_handle(pkt_hdr); + } else { + if (*nfree + num_seg >= nbufs) + break; + + for (i = 0; i < num_seg; i++) { + odp_packet_hdr_t *hdr = + pkt_hdr->buf_hdr.seg[i].hdr; + + if (packet_ref_dec(hdr) == 1) + buf[(*nfree)++] = buffer_handle(hdr); + } + + if (ref_count == 1) + pkt_hdr->unshared_len = pkt_hdr->frame_len; + } + + pkt_hdr = ref_hdr; + } while (pkt_hdr); + + return pkt_hdr; +} + +static inline void packet_free(odp_packet_hdr_t *pkt_hdr) +{ + odp_packet_hdr_t *ref_hdr; + uint32_t ref_count; + int num_seg; + + do { ref_count = packet_ref_count(pkt_hdr) - 1; - free_bufs(pkt_hdr, 0, pkt_hdr->buf_hdr.segcount); + num_seg = pkt_hdr->buf_hdr.segcount; + ref_hdr = pkt_hdr->ref_hdr; - if (ref_count == 1) - pkt_hdr->unshared_len = pkt_hdr->frame_len; + if (odp_likely((CONFIG_PACKET_MAX_SEGS == 1 || num_seg == 1) && + ref_count == 0)) { + buffer_free_multi((odp_buffer_t *) + &pkt_hdr->buf_hdr.handle.handle, 1); + } else { + free_bufs(pkt_hdr, 0, num_seg); + + if (ref_count == 1) + pkt_hdr->unshared_len = pkt_hdr->frame_len; + } pkt_hdr = ref_hdr; } while (pkt_hdr); @@ -623,10 +674,40 @@ void odp_packet_free(odp_packet_t pkt) void odp_packet_free_multi(const odp_packet_t pkt[], int num) { - int i; + int nbufs = num * CONFIG_PACKET_MAX_SEGS * 2; + odp_buffer_t buf[nbufs]; + int nfree = 0; + int i = 0; + odp_packet_hdr_t *pkt_hdr = odp_packet_hdr(pkt[0]); + uint32_t pool_id = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle); + uint32_t next_pool; + + while (1) { + pkt_hdr = packet_free_to_list(pkt_hdr, buf, nbufs, &nfree); - for (i = 0; i < num; i++) - packet_free(odp_packet_hdr(pkt[i])); + if (pkt_hdr) { + buffer_free_to_pool(pool_id, buf, nfree); + nfree = 0; + continue; + } + + if (++i == num) + break; + + pkt_hdr = odp_packet_hdr(pkt[i]); + next_pool = pool_id_from_buf(pkt_hdr->buf_hdr.handle.handle); + + if (odp_unlikely(next_pool != pool_id)) { + if (nfree > 0) { + buffer_free_to_pool(pool_id, buf, nfree); + nfree = 0; + } + pool_id = next_pool; + } + } + + if (nfree > 0) + buffer_free_to_pool(pool_id, buf, nfree); } int odp_packet_reset(odp_packet_t pkt, uint32_t len) diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c index 090a55f..2648068 100644 --- a/platform/linux-generic/odp_pool.c +++ b/platform/linux-generic/odp_pool.c @@ -47,19 +47,6 @@ typedef struct pool_local_t { pool_table_t *pool_tbl; static __thread pool_local_t local; -static inline odp_pool_t pool_index_to_handle(uint32_t pool_idx) -{ - return _odp_cast_scalar(odp_pool_t, pool_idx); -} - -static inline uint32_t pool_id_from_buf(odp_buffer_t buf) -{ - odp_buffer_bits_t handle; - - handle.handle = buf; - return handle.pool_id; -} - int odp_pool_init_global(void) { uint32_t i; @@ -671,8 +658,8 @@ int buffer_alloc_multi(pool_t *pool, odp_buffer_t buf[], return num_ch + num_deq; } -static inline void buffer_free_to_pool(uint32_t pool_id, - const odp_buffer_t buf[], int num) +void buffer_free_to_pool(uint32_t pool_id, + const odp_buffer_t buf[], int num) { pool_t *pool; int i; @@ -732,36 +719,6 @@ static inline void buffer_free_to_pool(uint32_t pool_id, cache->num = cache_num + num; } -void buffer_free_multi(const odp_buffer_t buf[], int num_total) -{ - uint32_t pool_id; - int num; - int i; - int first = 0; - - while (1) { - num = 1; - i = 1; - pool_id = pool_id_from_buf(buf[first]); - - /* 'num' buffers are from the same pool */ - if (num_total > 1) { - for (i = first; i < num_total; i++) - if (pool_id != pool_id_from_buf(buf[i])) - break; - - num = i - first; - } - - buffer_free_to_pool(pool_id, &buf[first], num); - - if (i == num_total) - return; - - first = i; - } -} - odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl) { odp_buffer_t buf; @@ -793,7 +750,20 @@ void odp_buffer_free(odp_buffer_t buf) void odp_buffer_free_multi(const odp_buffer_t buf[], int num) { - buffer_free_multi(buf, num); + uint32_t next_pool, pool_id = pool_id_from_buf(buf[0]); + int i, first = 0; + + for (i = 1; i < num; i++) { + next_pool = pool_id_from_buf(buf[i]); + + if (odp_unlikely(next_pool != pool_id)) { + buffer_free_to_pool(pool_id, &buf[first], i - first); + first = i; + pool_id = next_pool; + } + } + + buffer_free_to_pool(pool_id, &buf[first], num - first); } int odp_pool_capability(odp_pool_capability_t *capa) -- 2.9.3