Patch 8.0.0190
Summary: finding duplicate tags uses a slow linear search
Problem: Detecting duplicate tags uses a slow linear search.
Solution: Use a much faster hash table solution. (James McCoy, closes #1046)
But don't add hi_keylen, it makes hash tables 50% bigger.
Files: src/tag.c
*** ../vim-8.0.0189/src/tag.c 2016-12-01 21:32:28.678025257 +0100
--- src/tag.c 2017-01-15 16:47:11.816668598 +0100
***************
*** 35,43 ****
} tagptrs_T;
/*
! * The matching tags are first stored in ga_match[]. In which one depends on
! * the priority of the match.
! * At the end, the matches from ga_match[] are concatenated, to make a list
* sorted on priority.
*/
#define MT_ST_CUR 0 /* static match in current file */
--- 35,43 ----
} tagptrs_T;
/*
! * The matching tags are first stored in one of the ht_match[] hash tables.
In
! * which one depends on the priority of the match.
! * At the end, all the matches from ht_match[] are concatenated, to make a
list
* sorted on priority.
*/
#define MT_ST_CUR 0 /* static match in current file */
***************
*** 1341,1352 ****
int is_etag; /* current file is emaces style
*/
#endif
! struct match_found
! {
! int len; /* nr of chars of match[] to be compared */
! char_u match[1]; /* actually longer */
! } *mfp, *mfp2;
! garray_T ga_match[MT_COUNT];
int match_count = 0; /* number of matches
found */
char_u **matches;
int mtt;
--- 1341,1349 ----
int is_etag; /* current file is emaces style
*/
#endif
! char_u *mfp;
! hashtab_T ht_match[MT_COUNT];
! hash_T hash = 0;
int match_count = 0; /* number of matches
found */
char_u **matches;
int mtt;
***************
*** 1402,1417 ****
vimconv.vc_type = CONV_NONE;
#endif
! /*
! * Allocate memory for the buffers that are used
! */
lbuf = alloc(lbuf_size);
tag_fname = alloc(MAXPATHL + 1);
#ifdef FEAT_EMACS_TAGS
ebuf = alloc(LSIZE);
#endif
for (mtt = 0; mtt < MT_COUNT; ++mtt)
! ga_init2(&ga_match[mtt], (int)sizeof(struct match_found *), 100);
/* check for out of memory situation */
if (lbuf == NULL || tag_fname == NULL
--- 1399,1414 ----
vimconv.vc_type = CONV_NONE;
#endif
! /*
! * Allocate memory for the buffers that are used
! */
lbuf = alloc(lbuf_size);
tag_fname = alloc(MAXPATHL + 1);
#ifdef FEAT_EMACS_TAGS
ebuf = alloc(LSIZE);
#endif
for (mtt = 0; mtt < MT_COUNT; ++mtt)
! hash_init(&ht_match[mtt]);
/* check for out of memory situation */
if (lbuf == NULL || tag_fname == NULL
***************
*** 2206,2215 ****
}
/*
! * If a match is found, add it to ga_match[].
*/
if (match)
{
#ifdef FEAT_CSCOPE
if (use_cscope)
{
--- 2203,2214 ----
}
/*
! * If a match is found, add it to ht_match[].
*/
if (match)
{
+ int len = 0;
+
#ifdef FEAT_CSCOPE
if (use_cscope)
{
***************
*** 2262,2440 ****
}
/*
! * Add the found match in ga_match[mtt], avoiding duplicates.
* Store the info we need later, which depends on the kind of
* tags we are dealing with.
*/
! if (ga_grow(&ga_match[mtt], 1) == OK)
{
- int len;
- int heuristic;
-
- if (help_only)
- {
#ifdef FEAT_MULTI_LANG
# define ML_EXTRA 3
#else
# define ML_EXTRA 0
#endif
! /*
! * Append the help-heuristic number after the
! * tagname, for sorting it later.
! */
! *tagp.tagname_end = NUL;
! len = (int)(tagp.tagname_end - tagp.tagname);
! mfp = (struct match_found *)
! alloc((int)sizeof(struct match_found) + len
! + 10 + ML_EXTRA);
! if (mfp != NULL)
! {
! /* "len" includes the language and the NUL, but
! * not the priority. */
! mfp->len = len + ML_EXTRA + 1;
! #define ML_HELP_LEN 6
! p = mfp->match;
! STRCPY(p, tagp.tagname);
#ifdef FEAT_MULTI_LANG
! p[len] = '@';
! STRCPY(p + len + 1, help_lang);
#endif
! heuristic = help_heuristic(tagp.tagname,
! match_re ? matchoff : 0, !match_no_ic);
#ifdef FEAT_MULTI_LANG
! heuristic += help_pri;
#endif
! sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
! heuristic);
! }
! *tagp.tagname_end = TAB;
}
! else if (name_only)
{
! if (get_it_again)
! {
! char_u *temp_end = tagp.command;
! if (*temp_end == '/')
! while (*temp_end && *temp_end != '\r'
! && *temp_end != '\n'
! && *temp_end != '$')
! temp_end++;
! if (tagp.command + 2 < temp_end)
! {
! len = (int)(temp_end - tagp.command - 2);
! mfp = (struct match_found *)alloc(
! (int)sizeof(struct match_found) + len);
! if (mfp != NULL)
! {
! mfp->len = len + 1; /* include the NUL */
! p = mfp->match;
! vim_strncpy(p, tagp.command + 2, len);
! }
! }
! else
! mfp = NULL;
! get_it_again = FALSE;
! }
! else
{
! len = (int)(tagp.tagname_end - tagp.tagname);
! mfp = (struct match_found *)alloc(
! (int)sizeof(struct match_found) + len);
if (mfp != NULL)
! {
! mfp->len = len + 1; /* include the NUL */
! p = mfp->match;
! vim_strncpy(p, tagp.tagname, len);
! }
!
! /* if wanted, re-read line to get long form too */
! if (State & INSERT)
! get_it_again = p_sft;
}
}
else
{
! /* Save the tag in a buffer.
! * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
! * other tag: <mtt><tag_fname><NUL><NUL><lbuf>
! * without Emacs tags: <mtt><tag_fname><NUL><lbuf>
! */
! len = (int)STRLEN(tag_fname)
! + (int)STRLEN(lbuf) + 3;
#ifdef FEAT_EMACS_TAGS
! if (is_etag)
! len += (int)STRLEN(ebuf) + 1;
! else
! ++len;
#endif
! mfp = (struct match_found *)alloc(
! (int)sizeof(struct match_found) + len);
! if (mfp != NULL)
! {
! mfp->len = len;
! p = mfp->match;
! p[0] = mtt;
! STRCPY(p + 1, tag_fname);
#ifdef BACKSLASH_IN_FILENAME
! /* Ignore differences in slashes, avoid adding
! * both path/file and path\file. */
! slash_adjust(p + 1);
#endif
! s = p + 1 + STRLEN(tag_fname) + 1;
#ifdef FEAT_EMACS_TAGS
! if (is_etag)
! {
! STRCPY(s, ebuf);
! s += STRLEN(ebuf) + 1;
! }
! else
! *s++ = NUL;
! #endif
! STRCPY(s, lbuf);
}
}
! if (mfp != NULL)
! {
! /*
! * Don't add identical matches.
! * This can take a lot of time when finding many
! * matches, check for CTRL-C now and then.
! * Add all cscope tags, because they are all listed.
! */
#ifdef FEAT_CSCOPE
! if (use_cscope)
! i = -1;
! else
#endif
! for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; )
! {
! mfp2 = ((struct match_found **)
! (ga_match[mtt].ga_data))[i];
! if (mfp2->len == mfp->len
! && memcmp(mfp2->match, mfp->match,
! (size_t)mfp->len) == 0)
! break;
! fast_breakcheck();
! }
! if (i < 0)
{
! ((struct match_found **)(ga_match[mtt].ga_data))
! [ga_match[mtt].ga_len++] = mfp;
! ++match_count;
}
else
! vim_free(mfp);
}
! }
! else /* Out of memory! Just forget about the rest. */
! {
! retval = OK;
! stop_searching = TRUE;
! break;
}
}
#ifdef FEAT_CSCOPE
--- 2261,2429 ----
}
/*
! * Add the found match in ht_match[mtt].
* Store the info we need later, which depends on the kind of
* tags we are dealing with.
*/
! if (help_only)
{
#ifdef FEAT_MULTI_LANG
# define ML_EXTRA 3
#else
# define ML_EXTRA 0
#endif
! /*
! * Append the help-heuristic number after the tagname, for
! * sorting it later. The heuristic is ignored for
! * detecting duplicates.
! * The format is {tagname}@{lang}NUL{heuristic}NUL
! */
! *tagp.tagname_end = NUL;
! len = (int)(tagp.tagname_end - tagp.tagname);
! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 10 +
ML_EXTRA + 1);
! if (mfp != NULL)
! {
! int heuristic;
!
! p = mfp;
! STRCPY(p, tagp.tagname);
#ifdef FEAT_MULTI_LANG
! p[len] = '@';
! STRCPY(p + len + 1, help_lang);
#endif
! heuristic = help_heuristic(tagp.tagname,
! match_re ? matchoff : 0, !match_no_ic);
#ifdef FEAT_MULTI_LANG
! heuristic += help_pri;
#endif
! sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
! heuristic);
}
! *tagp.tagname_end = TAB;
! }
! else if (name_only)
! {
! if (get_it_again)
{
! char_u *temp_end = tagp.command;
! if (*temp_end == '/')
! while (*temp_end && *temp_end != '\r'
! && *temp_end != '\n'
! && *temp_end != '$')
! temp_end++;
! if (tagp.command + 2 < temp_end)
{
! len = (int)(temp_end - tagp.command - 2);
! mfp = (char_u *)alloc((int)sizeof(char_u) + len +
1);
if (mfp != NULL)
! vim_strncpy(mfp, tagp.command + 2, len);
}
+ else
+ mfp = NULL;
+ get_it_again = FALSE;
}
else
{
! len = (int)(tagp.tagname_end - tagp.tagname);
! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
! if (mfp != NULL)
! vim_strncpy(mfp, tagp.tagname, len);
!
! /* if wanted, re-read line to get long form too */
! if (State & INSERT)
! get_it_again = p_sft;
! }
! }
! else
! {
! #define TAG_SEP 0x01
! size_t tag_fname_len = STRLEN(tag_fname);
#ifdef FEAT_EMACS_TAGS
! size_t ebuf_len = 0;
#endif
!
! /* Save the tag in a buffer.
! * Use 0x01 to separate fields (Can't use NUL, because the
! * hash key is terminated by NUL).
! * Emacs tag: <mtt><tag_fname><0x01><ebuf><0x01><lbuf><NUL>
! * other tag: <mtt><tag_fname><0x01><0x01><lbuf><NUL>
! * without Emacs tags: <mtt><tag_fname><0x01><lbuf><NUL>
! */
! len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3;
! #ifdef FEAT_EMACS_TAGS
! if (is_etag)
! {
! ebuf_len = STRLEN(ebuf);
! len += (int)ebuf_len + 1;
! }
! else
! ++len;
! #endif
! mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
! if (mfp != NULL)
! {
! p = mfp;
! p[0] = mtt;
! STRCPY(p + 1, tag_fname);
#ifdef BACKSLASH_IN_FILENAME
! /* Ignore differences in slashes, avoid adding
! * both path/file and path\file. */
! slash_adjust(p + 1);
#endif
! p[tag_fname_len + 1] = TAG_SEP;
! s = p + 1 + tag_fname_len + 1;
#ifdef FEAT_EMACS_TAGS
! if (is_etag)
! {
! STRCPY(s, ebuf);
! s[ebuf_len] = TAG_SEP;
! s += ebuf_len + 1;
}
+ else
+ *s++ = TAG_SEP;
+ #endif
+ STRCPY(s, lbuf);
}
+ }
! if (mfp != NULL)
! {
! hashitem_T *hi;
!
! /*
! * Don't add identical matches.
! * Add all cscope tags, because they are all listed.
! * "mfp" is used as a hash key, there is a NUL byte to end
! * the part matters for comparing, more bytes may follow
! * after it. E.g. help tags store the priority after the
! * NUL.
! */
#ifdef FEAT_CSCOPE
! if (use_cscope)
! hash++;
! else
#endif
! hash = hash_hash(mfp);
! hi = hash_lookup(&ht_match[mtt], mfp, hash);
! if (HASHITEM_EMPTY(hi))
! {
! if (hash_add_item(&ht_match[mtt], hi, mfp, hash)
! == FAIL)
{
! /* Out of memory! Just forget about the rest. */
! retval = OK;
! stop_searching = TRUE;
! break;
}
else
! ++match_count;
}
! else
! /* duplicate tag, drop it */
! vim_free(mfp);
}
}
#ifdef FEAT_CSCOPE
***************
*** 2532,2538 ****
#endif
/*
! * Move the matches from the ga_match[] arrays into one list of
* matches. When retval == FAIL, free the matches.
*/
if (retval == FAIL)
--- 2521,2527 ----
#endif
/*
! * Move the matches from the ht_match[] arrays into one list of
* matches. When retval == FAIL, free the matches.
*/
if (retval == FAIL)
***************
*** 2546,2567 ****
match_count = 0;
for (mtt = 0; mtt < MT_COUNT; ++mtt)
{
! for (i = 0; i < ga_match[mtt].ga_len; ++i)
{
! mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i];
! if (matches == NULL)
! vim_free(mfp);
! else
{
! /* To avoid allocating memory again we turn the struct
! * match_found into a string. For help the priority was not
! * included in the length. */
! mch_memmove(mfp, mfp->match,
! (size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0)));
! matches[match_count++] = (char_u *)mfp;
}
}
! ga_clear(&ga_match[mtt]);
}
*matchesp = matches;
--- 2535,2563 ----
match_count = 0;
for (mtt = 0; mtt < MT_COUNT; ++mtt)
{
! hashitem_T *hi;
! long_u todo;
!
! todo = (long)ht_match[mtt].ht_used;
! for (hi = ht_match[mtt].ht_array; todo > 0; ++hi)
{
! if (!HASHITEM_EMPTY(hi))
{
! mfp = hi->hi_key;
! if (matches == NULL)
! vim_free(mfp);
! else
! {
! /* now change the TAG_SEP back to NUL */
! for (p = mfp; *p != NUL; ++p)
! if (*p == TAG_SEP)
! *p = NUL;
! matches[match_count++] = (char_u *)mfp;
! }
! todo--;
}
}
! hash_clear(&ht_match[mtt]);
}
*matchesp = matches;
*** ../vim-8.0.0189/src/version.c 2017-01-15 15:22:14.162467173 +0100
--- src/version.c 2017-01-15 15:51:01.027012795 +0100
***************
*** 766,767 ****
--- 766,769 ----
{ /* Add new patch number below this line */
+ /**/
+ 190,
/**/
--
This is an airconditioned room, do not open Windows.
/// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net \\\
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\ an exciting new programming language -- http://www.Zimbu.org ///
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.