Bram Moolenaar wrote: > > Instead of doubling each time, which is going to be big chunks quickly, > another way would be to first allocate one block at the start size of > about 1000 bytes, then set the growsize to 10000. So we grow at the > same speed as before. Then no extra field or function is needed, it's > local change in the regexp code. > > Something like: > if (regstack.ga_data == NULL) > { > ga_init2(®stack, 1, 1000); > ga_grow(®stack, 1); > } > regstack.ga_growsize = 10000; > > I do wonder if the memory needs to be cleared when re-using the > allocated memory. Can you check that? >
I'd like to get back to this topic - after I use my patch for half a year without a problem. The patch is attached. It addresses your comments fully. Please take a look. Regarding cleaning the stacks - from what I can tell from the code, it's not needed. regmatch() clears the stack heads in the beginning by resetting the len of arrays, and then it uses only the part of the stack it fills. In fact, I also considered moving the stack allocation management directly to regmatch() since the stacks are actually used only there. But it's more cosmetic change and shouldn't affect the functionality in any way (though we may get 2 reallocations per regexec_both call in case of big chunks). So does the patch look like a good one to you? Or will I just live with it here locally? :) --~--~---------~--~----~------------~-------~--~----~ You received this message from the "vim_dev" maillist. For more information, visit http://www.vim.org/maillist.php -~----------~----~----~----~------~----~------~--~---
Index: regexp.c =================================================================== --- regexp.c (revision 579) +++ regexp.c (working copy) @@ -378,14 +378,6 @@ static char_u *reg_prev_sub = NULL; -#if defined(EXITFREE) || defined(PROTO) - void -free_regexp_stuff() -{ - vim_free(reg_prev_sub); -} -#endif - /* * REGEXP_INRANGE contains all characters which are always special in a [] * range after '\'. @@ -3212,6 +3204,17 @@ preceded by regstar_T or regbehind_T. */ static garray_T backpos; /* table with backpos_T for BACK */ +#if defined(EXITFREE) || defined(PROTO) + void +free_regexp_stuff() +{ + ga_clear(®stack); + ga_clear(&backpos); + vim_free(reg_tofree); + vim_free(reg_prev_sub); +} +#endif + /* * Get pointer to the line "lnum", which is relative to "reg_firstlnum". */ @@ -3332,7 +3335,20 @@ return r; } + /* + * Both for regstack and backpos tables we use the following strategy of + * allocation (to reduce malloc/free calls). Initial size is the size of table + * with which it is created. After executing the match we check the size of the + * table and free the memory only if the array has grown more than initial + * capacity. The idea is simple: keep the memory across the calls in case of + * shallow regexps but don't allow to hold a lot of memory after some + * memory-requiring search. + */ +#define REGSTACK_INITIAL 2048 +#define BACKPOS_INITIAL 64 + +/* * Match a regexp against a string ("line" points to the string) or multiple * lines ("line" is NULL, use reg_getline()). */ @@ -3345,16 +3361,28 @@ char_u *s; long retval = 0L; - reg_tofree = NULL; + /* Creating regstack and backpos tables if they are not created yet. + * We allocate *_INITIAL amount of bytes first and then set the grow size + * to much bigger value to make sure we allocate quickly in case of deep + * regular expressions. + */ + if (regstack.ga_data == NULL) + { + /* Use an item size of 1 byte, since we push different things + * onto the regstack. + */ + ga_init2(®stack, 1, REGSTACK_INITIAL); + ga_grow(®stack, REGSTACK_INITIAL); + regstack.ga_growsize = REGSTACK_INITIAL * 8; + } - /* Init the regstack empty. Use an item size of 1 byte, since we push - * different things onto it. Use a large grow size to avoid reallocating - * it too often. */ - ga_init2(®stack, 1, 10000); + if (backpos.ga_data == NULL) + { + ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL); + ga_grow(&backpos, BACKPOS_INITIAL); + backpos.ga_growsize = BACKPOS_INITIAL * 8; + } - /* Init the backpos table empty. */ - ga_init2(&backpos, sizeof(backpos_T), 10); - if (REG_MULTI) { prog = reg_mmatch->regprog; @@ -3524,9 +3552,18 @@ } theend: - vim_free(reg_tofree); + /* + * Check regstack and backpos tables for their current size. If the size + * is higher than initial capacity - free the tables. + */ + if (regstack.ga_maxlen > REGSTACK_INITIAL) + { ga_clear(®stack); + } + if (backpos.ga_maxlen > BACKPOS_INITIAL) + { ga_clear(&backpos); + } return retval; } @@ -3716,8 +3753,8 @@ #define RA_MATCH 4 /* successful match */ #define RA_NOMATCH 5 /* didn't match */ - /* Init the regstack and backpos table empty. They are initialized and - * freed in vim_regexec_both() to reduce malloc()/free() calls. */ + /* Initialize tables regstack and backpos to empty state. They are + * managed in vim_regexec_both() to reduce malloc()/free() calls. */ regstack.ga_len = 0; backpos.ga_len = 0;