Re: [PATCH v4 1/3] lib: find_*_bit reimplementation
On Tue, Feb 17 2015, Yury Norov wrote: > The new implementation takes less space in the sources > (see diffstat) and in the object. For me it's 710 vs 453 > bytes of text. It also shows a better performance. > > find_last_bit description fixed due to obvious typo. > > In this patch 2 macros were introduced: {LOW,HIGH}_BITS_MASK, > that are doing almost the same as BITMAP_{FIRST,LAST}_WORD_MASK > in include/linux/bitmap.h. But 'LAST' macro is potentially less > effective, because it issues a conditional branch that can be > omitted. If it is replaced one day by a more effective > implementation, {LOW,HIGH}_BITS_MASK can be removed. > I think it's better to use the existing macros and then improve them instead of duplicating the functionality. I'll submit a patch for that later tonight (that should then make it to 3.21 [or whatever 3.19+2 will be called] together with this series). More on this issue below. > > Signed-off-by: Yury Norov > --- > include/linux/bitops.h | 4 +- > lib/find_last_bit.c| 37 +++ > lib/find_next_bit.c| 269 > ++--- > 3 files changed, 94 insertions(+), 216 deletions(-) > > diff --git a/include/linux/bitops.h b/include/linux/bitops.h > index 5d858e0..297f5bd 100644 > --- a/include/linux/bitops.h > +++ b/include/linux/bitops.h > @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) > /** > * find_last_bit - find the last set bit in a memory region > * @addr: The address to start the search at > - * @size: The maximum size to search > + * @size: The number of bits to search > * > - * Returns the bit number of the first set bit, or size. > + * Returns the bit number of the last set bit, or size. > */ > extern unsigned long find_last_bit(const unsigned long *addr, > unsigned long size); > diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c > index 91ca09f..edbb281 100644 > --- a/lib/find_last_bit.c > +++ b/lib/find_last_bit.c > @@ -4,6 +4,9 @@ > * Written by Rusty Russell > * (Inspired by David Howell's find_next_bit implementation) > * > + * Rewritten by Yury Norov to decrease > + * size and improve performance, 2015. > + * > * This program is free software; you can redistribute it and/or > * modify it under the terms of the GNU General Public License > * as published by the Free Software Foundation; either version > @@ -12,36 +15,26 @@ > > #include > #include > -#include > -#include > +#include > + > +#define LOW_BITS_MASK(nr) (~0UL >> -(nr)) This is technically wrong, and may not even work on architectures that are not as forgiving as x86: Whatever type and value nr has, -(nr) is almost guaranteed not to be a number between 0 and BITS_PER_LONG-1. And even on x86, gcc doesn't generate as good code as it could: 163: 49 c7 c0 ff ff ff ffmov$0x,%r8 16a: 83 e1 3fand$0x3f,%ecx 16d: f7 d9 neg%ecx 16f: 48 c1 ea 06 shr$0x6,%rdx 173: 49 d3 e8shr%cl,%r8 It doesn't realize that pre-masking %ecx with 0x3f is redundant when we then negate it and use it as a shift amount. So a better definition of the macro is #define BITMAP_LAST_WORD_MASK(nr) (~0UL >> (-(nr) & (BITS_PER_LONG-1))) and then callers shouldn't do the modulo. On x86, gcc knows that the & is redundant. I use & instead of % so that nr may also have signed type (otherwise we're again in UB land, since -(nr) % BITS_PER_LONG is then, by the broken C standard, a negative number). > #include > #include > -#include > -#include > +#include > > -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) > +#define HIGH_BITS_MASK(nr) (~0UL << (nr)) > + > +#if !defined(find_next_bit) || !defined(find_next_zero_bit) > > -#ifndef find_next_bit > /* > - * Find the next set bit in a memory region. > + * This is a common helper function for find_next_bit and > + * find_next_zero_bit. The difference is the "invert" argument, which > + * is XORed with each fetched word before searching it for one bits. > */ > -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, > - unsigned long offset) > +static unsigned long _find_next_bit(const unsigned long *addr, > + unsigned long nbits, unsigned long start, unsigned long invert) > { > - const unsigned long *p = addr + BITOP_WORD(offset); > - unsigned long result = offset & ~(BITS_PER_LONG-1); > unsigned long tmp; > > - if (offset >= size) > - return size; > - size -= result; > - offset %= BITS_PER_LONG; > - if (offset) { > - tmp = *(p++); > - tmp &= (~0UL << offset); > - if (size < BITS_PER_LONG) > - goto found_first; > - if (tmp) > - goto found_middle; > - size -= BITS_PER_LONG; > - result += BITS_PER_LONG; >
Re: [PATCH v4 1/3] lib: find_*_bit reimplementation
On Tue, Feb 17 2015, Yury Norov yury.no...@gmail.com wrote: The new implementation takes less space in the sources (see diffstat) and in the object. For me it's 710 vs 453 bytes of text. It also shows a better performance. find_last_bit description fixed due to obvious typo. In this patch 2 macros were introduced: {LOW,HIGH}_BITS_MASK, that are doing almost the same as BITMAP_{FIRST,LAST}_WORD_MASK in include/linux/bitmap.h. But 'LAST' macro is potentially less effective, because it issues a conditional branch that can be omitted. If it is replaced one day by a more effective implementation, {LOW,HIGH}_BITS_MASK can be removed. I think it's better to use the existing macros and then improve them instead of duplicating the functionality. I'll submit a patch for that later tonight (that should then make it to 3.21 [or whatever 3.19+2 will be called] together with this series). More on this issue below. Signed-off-by: Yury Norov yury.no...@gmail.com --- include/linux/bitops.h | 4 +- lib/find_last_bit.c| 37 +++ lib/find_next_bit.c| 269 ++--- 3 files changed, 94 insertions(+), 216 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e0..297f5bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..edbb281 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell ru...@rustcorp.com.au * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov yury.no...@gmail.com to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -12,36 +15,26 @@ #include linux/bitops.h #include linux/export.h -#include asm/types.h -#include asm/byteorder.h +#include linux/kernel.h + +#define LOW_BITS_MASK(nr) (~0UL -(nr)) This is technically wrong, and may not even work on architectures that are not as forgiving as x86: Whatever type and value nr has, -(nr) is almost guaranteed not to be a number between 0 and BITS_PER_LONG-1. And even on x86, gcc doesn't generate as good code as it could: 163: 49 c7 c0 ff ff ff ffmov$0x,%r8 16a: 83 e1 3fand$0x3f,%ecx 16d: f7 d9 neg%ecx 16f: 48 c1 ea 06 shr$0x6,%rdx 173: 49 d3 e8shr%cl,%r8 It doesn't realize that pre-masking %ecx with 0x3f is redundant when we then negate it and use it as a shift amount. So a better definition of the macro is #define BITMAP_LAST_WORD_MASK(nr) (~0UL (-(nr) (BITS_PER_LONG-1))) and then callers shouldn't do the modulo. On x86, gcc knows that the is redundant. I use instead of % so that nr may also have signed type (otherwise we're again in UB land, since -(nr) % BITS_PER_LONG is then, by the broken C standard, a negative number). #include linux/bitops.h #include linux/export.h -#include asm/types.h -#include asm/byteorder.h +#include linux/kernel.h -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define HIGH_BITS_MASK(nr) (~0UL (nr)) + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) -#ifndef find_next_bit /* - * Find the next set bit in a memory region. + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the invert argument, which + * is XORed with each fetched word before searching it for one bits. */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset ~(BITS_PER_LONG-1); unsigned long tmp; - if (offset = size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp = (~0UL offset); - if (size BITS_PER_LONG) - goto found_first; - if (tmp) -
[PATCH v4 1/3] lib: find_*_bit reimplementation
The new implementation takes less space in the sources (see diffstat) and in the object. For me it's 710 vs 453 bytes of text. It also shows a better performance. find_last_bit description fixed due to obvious typo. In this patch 2 macros were introduced: {LOW,HIGH}_BITS_MASK, that are doing almost the same as BITMAP_{FIRST,LAST}_WORD_MASK in include/linux/bitmap.h. But 'LAST' macro is potentially less effective, because it issues a conditional branch that can be omitted. If it is replaced one day by a more effective implementation, {LOW,HIGH}_BITS_MASK can be removed. Signed-off-by: Yury Norov --- include/linux/bitops.h | 4 +- lib/find_last_bit.c| 37 +++ lib/find_next_bit.c| 269 ++--- 3 files changed, 94 insertions(+), 216 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e0..297f5bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..edbb281 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -12,36 +15,26 @@ #include #include -#include -#include +#include + +#define LOW_BITS_MASK(nr) (~0UL >> -(nr)) #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; + if (size) { + unsigned long val = LOW_BITS_MASK(size % BITS_PER_LONG); + unsigned long idx = (size-1) / BITS_PER_LONG; - /* Start at final word. */ - words = size / BITS_PER_LONG; + do { + val &= addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG -- (size & (BITS_PER_LONG-1); - if (tmp) - goto found; + val = ~0ul; + } while (idx--); } - - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } - } - - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 0cbfc0b..1f5f108 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -3,6 +3,9 @@ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowe...@redhat.com) * + * Rewritten by Yury Norov to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -11,98 +14,60 @@ #include #include -#include -#include +#include -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define HIGH_BITS_MASK(nr) (~0UL << (nr)) + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) -#ifndef find_next_bit /* - * Find the next set bit in a memory region. + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the "invert" argument, which + * is XORed with each fetched word before searching it for one bits. */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL
[PATCH v4 1/3] lib: find_*_bit reimplementation
The new implementation takes less space in the sources (see diffstat) and in the object. For me it's 710 vs 453 bytes of text. It also shows a better performance. find_last_bit description fixed due to obvious typo. In this patch 2 macros were introduced: {LOW,HIGH}_BITS_MASK, that are doing almost the same as BITMAP_{FIRST,LAST}_WORD_MASK in include/linux/bitmap.h. But 'LAST' macro is potentially less effective, because it issues a conditional branch that can be omitted. If it is replaced one day by a more effective implementation, {LOW,HIGH}_BITS_MASK can be removed. Signed-off-by: Yury Norov yury.no...@gmail.com --- include/linux/bitops.h | 4 +- lib/find_last_bit.c| 37 +++ lib/find_next_bit.c| 269 ++--- 3 files changed, 94 insertions(+), 216 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5d858e0..297f5bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -218,9 +218,9 @@ static inline unsigned long __ffs64(u64 word) /** * find_last_bit - find the last set bit in a memory region * @addr: The address to start the search at - * @size: The maximum size to search + * @size: The number of bits to search * - * Returns the bit number of the first set bit, or size. + * Returns the bit number of the last set bit, or size. */ extern unsigned long find_last_bit(const unsigned long *addr, unsigned long size); diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09f..edbb281 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell ru...@rustcorp.com.au * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov yury.no...@gmail.com to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -12,36 +15,26 @@ #include linux/bitops.h #include linux/export.h -#include asm/types.h -#include asm/byteorder.h +#include linux/kernel.h + +#define LOW_BITS_MASK(nr) (~0UL -(nr)) #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; + if (size) { + unsigned long val = LOW_BITS_MASK(size % BITS_PER_LONG); + unsigned long idx = (size-1) / BITS_PER_LONG; - /* Start at final word. */ - words = size / BITS_PER_LONG; + do { + val = addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); - /* Partial final word? */ - if (size (BITS_PER_LONG-1)) { - tmp = (addr[words] (~0UL (BITS_PER_LONG -- (size (BITS_PER_LONG-1); - if (tmp) - goto found; + val = ~0ul; + } while (idx--); } - - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } - } - - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 0cbfc0b..1f5f108 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -3,6 +3,9 @@ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowe...@redhat.com) * + * Rewritten by Yury Norov yury.no...@gmail.com to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -11,98 +14,60 @@ #include linux/bitops.h #include linux/export.h -#include asm/types.h -#include asm/byteorder.h +#include linux/kernel.h -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define HIGH_BITS_MASK(nr) (~0UL (nr)) + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) -#ifndef find_next_bit /* - * Find the next set bit in a memory region. + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the invert argument, which + * is XORed with each fetched word before searching it for one bits. */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset ~(BITS_PER_LONG-1); unsigned long