Signed-off-by: Josh Stone <[email protected]> --- On Fedora 21, this appears to be slightly faster, although pretty close to noise levels. Mark, can you see if this helps the performance slip on your el7 system?
--- libdw/ChangeLog | 6 ++++++ libdw/memory-access.h | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/libdw/ChangeLog b/libdw/ChangeLog index 2aa878bd8ad7..17b4db980278 100644 --- a/libdw/ChangeLog +++ b/libdw/ChangeLog @@ -1,3 +1,9 @@ +2014-12-15 Josh Stone <[email protected]> + + * memory-access.h (__libdw_max_len_leb128): New. + (__libdw_get_uleb128): Use __libdw_max_len_leb128. + (__libdw_get_sleb128): Likewise. + 2014-12-15 Mark Wielaard <[email protected]> * dwarf_getpubnames.c (get_offsets): Make sure whole unit fall inside diff --git a/libdw/memory-access.h b/libdw/memory-access.h index 8226d00e9b33..99c827af22bd 100644 --- a/libdw/memory-access.h +++ b/libdw/memory-access.h @@ -39,6 +39,14 @@ #define len_leb128(var) ((8 * sizeof (var) + 6) / 7) +static inline size_t +__libdw_max_len_leb128 (const unsigned char *addr, const unsigned char *end) +{ + const size_t type_len = len_leb128 (uint64_t); + const size_t pointer_len = likely (addr < end) ? end - addr : 0; + return likely (type_len <= pointer_len) ? type_len : pointer_len; +} + #define get_uleb128_step(var, addr, nth) \ do { \ unsigned char __b = *(addr)++; \ @@ -51,10 +59,13 @@ static inline uint64_t __libdw_get_uleb128 (const unsigned char **addrp, const unsigned char *end) { uint64_t acc = 0; + /* Unroll the first step to help the compiler optimize for the common single-byte case. */ get_uleb128_step (acc, *addrp, 0); - for (unsigned int i = 1; i < len_leb128 (acc) && *addrp < end; ++i) + + const size_t max = __libdw_max_len_leb128 (*addrp - 1, end); + for (size_t i = 1; i < max; ++i) get_uleb128_step (acc, *addrp, i); /* Other implementations set VALUE to UINT_MAX in this case. So we better do this as well. */ @@ -82,8 +93,10 @@ static inline int64_t __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end) { int64_t acc = 0; + /* Unrolling 0 like uleb128 didn't prove to benefit optimization. */ - for (unsigned int i = 0; i < len_leb128 (acc) && *addrp < end; ++i) + const size_t max = __libdw_max_len_leb128 (*addrp, end); + for (size_t i = 0; i < max; ++i) get_sleb128_step (acc, *addrp, i); /* Other implementations set VALUE to INT_MAX in this case. So we better do this as well. */ -- 2.1.0
