On 29 April 2014 17:53, <stef...@apache.org> wrote: > Author: stefan2 > Date: Tue Apr 29 13:53:06 2014 > New Revision: 1590982 > > URL: http://svn.apache.org/r1590982 > Log: > Make the svn_bit_array__* code use less memory when accessed sparsely. > This also reduces the initialization overhead when bits are set at > high indexes. > > The basic idea is to replace the one-dimensional array with a two- > dimensional one where the second dimension ("blocks") gets lazily > allocated - allowing for a sparse layout. > Hi Stefan,
See my small suggestions inline. [...] > > Modified: subversion/trunk/subversion/libsvn_subr/bit_array.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/bit_array.c?rev=1590982&r1=1590981&r2=1590982&view=diff > ============================================================================== > --- subversion/trunk/subversion/libsvn_subr/bit_array.c (original) > +++ subversion/trunk/subversion/libsvn_subr/bit_array.c Tue Apr 29 13:53:06 > 2014 [...] > @@ -71,15 +101,17 @@ svn_bit_array__set(svn_bit_array__t *arr > apr_size_t idx, > svn_boolean_t value) > { > + unsigned char *block; > + > /* If IDX is outside the allocated range, we _may_ have to grow it. > * > * Be sure to use division instead of multiplication as we need to cover > * the full value range of APR_SIZE_T for the bit indexes. > */ > - if (idx / 8 >= array->data_size) > + if (idx / BLOCK_SIZE_BITS >= array->block_count) May be block_idx local variable will make code more readable. > { > - apr_size_t new_size; > - unsigned char *new_data; > + apr_size_t new_count; > + unsigned char **new_blocks; > > /* Unallocated indexes are implicitly 0, so no actual allocation > * required in that case. > @@ -87,34 +119,57 @@ svn_bit_array__set(svn_bit_array__t *arr > if (!value) > return; > > - /* Grow data buffer to cover IDX. > - * Clear the new buffer to guarantee our array[idx]==0 default. > + /* Grow block list to cover IDX. > + * Clear the new entries to guarantee our array[idx]==0 default. > */ > - new_size = select_data_size(idx); > - new_data = apr_pcalloc(array->pool, new_size); > - memcpy(new_data, array->data, array->data_size); > - array->data = new_data; > - array->data_size = new_size; > + new_count = select_data_size(idx); > + new_blocks = apr_pcalloc(array->pool, new_count * sizeof(*new_blocks)); > + memcpy(new_blocks, array->blocks, new_count * sizeof(*new_blocks)); It looks like a bug here. It think it should be. [[[ memcpy(new_blocks, array->blocks, array->block_count * sizeof(*new_blocks)); ]]] Otherwise you're reading unallocated memory. > + array->blocks = new_blocks; > + array->block_count = new_count; > } > > - /* IDX is covered by ARRAY->DATA now. */ > + /* IDX is covered by ARRAY->BLOCKS now. */ > + > + /* Get the block that contains IDX. Auto-allocate it if missing. */ > + block = array->blocks[idx / BLOCK_SIZE_BITS]; > + if (block == NULL) > + { > + /* Unallocated indexes are implicitly 0, so no actual allocation > + * required in that case. > + */ > + if (!value) > + return; > + > + /* Allocate the previously missing block and clear it for our > + * array[idx] == 0 default. */ > + block = apr_pcalloc(array->pool, BLOCK_SIZE); > + array->blocks[idx / BLOCK_SIZE_BITS] = block; > + } > > /* Set / reset one bit. Be sure to use unsigned shifts. */ > if (value) > - array->data[idx / 8] |= (unsigned char)(1u << (idx % 8)); > + block[(idx % BLOCK_SIZE_BITS) / 8] |= (unsigned char)(1u << (idx % 8)); It looks this code have assumption that BLOCK_SIZE_BITS is multiplication of '8'. I mean that strictly speaking right expression should be "1u << ((idx % BLOCK_SIZE_BITS) % 8)" or I didn't understand something? It also may be worth to add inline function /macro 'bit_mask(int bit)' > else > - array->data[idx / 8] &= (unsigned char)(255u - (1u << (idx % 8))); > + block[(idx % BLOCK_SIZE_BITS) / 8] &= (unsigned char)(255u - (1u << (idx > % 8))); What do you think about using binary negate for right expression i.e.: '~(unsigned char)(1u << (idx % 8));'? > } > > svn_boolean_t > svn_bit_array__get(svn_bit_array__t *array, > apr_size_t idx) > { > - /* Indexes outside the allocated range are implictly 0. */ > - if (idx / 8 >= array->data_size) > + unsigned char *block; > + > + /* Indexes outside the allocated range are implicitly 0. */ > + if (idx / BLOCK_SIZE_BITS >= array->block_count) > + return 0; The same suggestion for block_idx local and may be block_offset variable. > + > + /* Same if the respective block has not been allocated. */ > + block = array->blocks[idx / BLOCK_SIZE_BITS]; > + if (block == NULL) > return 0; > > /* Extract one bit (get the byte, shift bit to LSB, extract it). */ > - return (array->data[idx / 8] >> (idx % 8)) & 1; > + return (block[(idx % BLOCK_SIZE_BITS) / 8] >> (idx % 8)) & 1; > } > > > Modified: subversion/trunk/subversion/tests/libsvn_subr/bit-array-test.c > URL: > http://svn.apache.org/viewvc/subversion/trunk/subversion/tests/libsvn_subr/bit-array-test.c?rev=1590982&r1=1590981&r2=1590982&view=diff > ============================================================================== > --- subversion/trunk/subversion/tests/libsvn_subr/bit-array-test.c (original) > +++ subversion/trunk/subversion/tests/libsvn_subr/bit-array-test.c Tue Apr 29 > 13:53:06 2014 > @@ -44,8 +44,8 @@ test_zero_defaults(apr_pool_t *pool) > svn_bit_array__t *array = svn_bit_array__create(0, pool); > > /* Test (default) allocation boundaries */ > - SVN_TEST_ASSERT(svn_bit_array__get(array, 127) == 0); > - SVN_TEST_ASSERT(svn_bit_array__get(array, 128) == 0); > + SVN_TEST_ASSERT(svn_bit_array__get(array, 0x7ffff) == 0); > + SVN_TEST_ASSERT(svn_bit_array__get(array, 0x80000) == 0); > > /* Test address boundaries */ > SVN_TEST_ASSERT(svn_bit_array__get(array, 0) == 0); > @@ -58,7 +58,7 @@ static svn_error_t * > test_get_set(apr_pool_t *pool) > { > svn_bit_array__t *array = svn_bit_array__create(0, pool); > - apr_size_t i = 0, min = 0, max = 1025; > + apr_size_t i, min = 0x7ff00, max = 0x7ff00 + 1025; > > /* All values default to 0. */ > for (i = min; i < max; ++i) > > -- Ivan Zhakov CTO | VisualSVN | http://www.visualsvn.com