[patch] Handle files with mixed LF - CRLF line endings when doing tag search

Lech Lorens Mon, 01 Apr 2013 08:16:47 -0700

People using ctags while working on code which is edited by multiple 
people in various editors will be familiar with this situation: the code 
ends up with a mix of line endings – some of them are Unix-style, some 
of them are DOS-style.


The problem is that if Vim reads such a file with ff=unix, it will fail 
to find tags if the tag pattern searched should match on a DOS-style 
line. The attached patch handles the problem in a naïve but surprisingly 
effective way: if a pattern search fails, Vim will try putting "\r\*" 
before the last "$" in the pattern and will retry the search.

Cheers,
Lech

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to vim_dev+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.

diff --git a/src/tag.c b/src/tag.c
index 34e9c4f..14d5f22 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -3055,7 +3055,7 @@ jumpto_tag(lbuf, forceit, keep_help)
     int		old_KeyTyped = KeyTyped;    /* getting the file may reset it */
 #endif
 
-    pbuf = alloc(LSIZE);
+    pbuf = alloc(LSIZE + 3);
 
     /* parse the match line into the tagp structure */
     if (pbuf == NULL || parse_match(lbuf, &tagp) == FAIL)
@@ -3220,6 +3220,8 @@ jumpto_tag(lbuf, forceit, keep_help)
 	    str = skip_regexp(pbuf + 1, pbuf[0], FALSE, NULL) + 1;
 	if (str > pbuf_end - 1)	/* search command with nothing following */
 	{
+	    int	found;
+
 	    save_p_ws = p_ws;
 	    save_p_ic = p_ic;
 	    save_p_scs = p_scs;
@@ -3234,64 +3236,140 @@ jumpto_tag(lbuf, forceit, keep_help)
 #endif
 	    save_lnum = curwin->w_cursor.lnum;
 	    curwin->w_cursor.lnum = 0;	/* start search before first line */
-	    if (do_search(NULL, pbuf[0], pbuf + 1, (long)1,
-							search_options, NULL))
-		retval = OK;
-	    else
+
+	    enum {
+		TAG_SEARCH_FAILED         = 0,
+		TAG_SEARCH_NORMAL         = 1,
+		TAG_SEARCH_IGNORE_CASE    = 2,
+		TAG_SEARCH_IGNORE_CR      = 3,
+		TAG_SEARCH_IGNORE_CASE_CR = 4,
+		TAG_SEARCH_GUESS          = 5,
+	    };
+
+	    for (found = TAG_SEARCH_NORMAL;
+		    found <= TAG_SEARCH_IGNORE_CASE_CR;
+		    ++found)
 	    {
-		int	found = 1;
-		int	cc;
+		if (do_search(NULL, pbuf[0], pbuf + 1, (long)1,
+			    search_options, NULL))
+		{
+		    retval = OK;
+		    break;
+		}
 
+		switch (found)
+		{
+		    case TAG_SEARCH_NORMAL:
+			/*
+			 * Try again, ignore case now.
+			 */
+			p_ic = TRUE;
+			break;
+		    case TAG_SEARCH_IGNORE_CASE:
+			/*
+			 * Try again but this time try to ignore any trailing
+			 * carriage return characters at the end of the line.
+			 * This might help us handle the case when we have
+			 * mixed line endings in a file.
+			 * Let's append "\r\*" to the regexp but only if it
+			 * ends with one of:
+			 * - $
+			 * - $/
+			 * - $/;
+			 * - $/;"
+			 * We will put \r\* before the $
+			 * Note that if the regexp is not a straightforward one
+			 * (i.e. /^line-contents$/;" )
+			 * we might break it so we only do it if we failed
+			 * matching the original regexp.
+			 */
+			p_ic = FALSE;
+			if (pbuf_end >= pbuf + 1)
+			{
+			    const char *regexp_end = "$/;\"";
+			    int len = pbuf_end - pbuf;
+			    int i;
+			    int j;
+
+			    if (len > 4)
+				len = 4;
+
+			    for (j = len; j > 0; --j)
+			    {
+				if (!STRNCMP(pbuf_end - j, regexp_end, j))
+				{
+				    *(pbuf_end - j)     = '\r';
+				    *(pbuf_end - j + 1) = '\\';
+				    *(pbuf_end - j + 2) = '*';
+
+				    for (i = 0; i < j; ++i)
+					*(pbuf_end - j + 3 + i) = regexp_end[i];
+				    pbuf_end += 3;
+				    break;
+				}
+			    }
+			}
+			*pbuf_end = NUL;
+			break;
+		    case TAG_SEARCH_IGNORE_CR:
+			/*
+			 * yet again, ignoring the case
+			 */
+			p_ic = TRUE;
+			break;
+		}
+	    }
+
+	    if (found == TAG_SEARCH_GUESS)
+	    {
+		int	cc;
+		p_ic = TRUE;
 		/*
-		 * try again, ignore case now
+		 * Failed to find pattern, take a guess: "^func  ("
 		 */
-		p_ic = TRUE;
-		if (!do_search(NULL, pbuf[0], pbuf + 1, (long)1,
-							search_options, NULL))
+		(void)test_for_static(&tagp);
+		cc = *tagp.tagname_end;
+		*tagp.tagname_end = NUL;
+		sprintf((char *)pbuf, "^%s\\s\\*(", tagp.tagname);
+		if (!do_search(NULL, '/', pbuf, (long)1,
+			    search_options, NULL))
 		{
-		    /*
-		     * Failed to find pattern, take a guess: "^func  ("
-		     */
-		    found = 2;
-		    (void)test_for_static(&tagp);
-		    cc = *tagp.tagname_end;
-		    *tagp.tagname_end = NUL;
-		    sprintf((char *)pbuf, "^%s\\s\\*(", tagp.tagname);
+		    /* Guess again: "^char * \<func  (" */
+		    sprintf((char *)pbuf, "^\\[#a-zA-Z_]\\.\\*\\<%s\\s\\*(",
+			    tagp.tagname);
 		    if (!do_search(NULL, '/', pbuf, (long)1,
-							search_options, NULL))
-		    {
-			/* Guess again: "^char * \<func  (" */
-			sprintf((char *)pbuf, "^\\[#a-zA-Z_]\\.\\*\\<%s\\s\\*(",
-								tagp.tagname);
-			if (!do_search(NULL, '/', pbuf, (long)1,
-							search_options, NULL))
-			    found = 0;
-		    }
-		    *tagp.tagname_end = cc;
+				search_options, NULL))
+			found = TAG_SEARCH_FAILED;
 		}
-		if (found == 0)
-		{
-		    EMSG(_("E434: Can't find tag pattern"));
-		    curwin->w_cursor.lnum = save_lnum;
-		}
-		else
+		*tagp.tagname_end = cc;
+	    }
+
+	    if (found == TAG_SEARCH_FAILED)
+	    {
+		EMSG(_("E434: Can't find tag pattern"));
+		curwin->w_cursor.lnum = save_lnum;
+	    }
+	    else
+	    {
+		/*
+		 * Only give a message when we guessed or found the match while
+		 * ignoring case and 'ignorecase' was not set.
+		 */
+		if (found >= TAG_SEARCH_GUESS
+			|| (!save_p_ic &&
+			    (found == TAG_SEARCH_IGNORE_CASE
+			     || found == TAG_SEARCH_IGNORE_CASE_CR)))
 		{
-		    /*
-		     * Only give a message when really guessed, not when 'ic'
-		     * is set and match found while ignoring case.
-		     */
-		    if (found == 2 || !save_p_ic)
+		    MSG(_("E435: Couldn't find tag, just guessing!"));
+		    if (!msg_scrolled && msg_silent == 0)
 		    {
-			MSG(_("E435: Couldn't find tag, just guessing!"));
-			if (!msg_scrolled && msg_silent == 0)
-			{
-			    out_flush();
-			    ui_delay(1000L, TRUE);
-			}
+			out_flush();
+			ui_delay(1000L, TRUE);
 		    }
-		    retval = OK;
 		}
+		retval = OK;
 	    }
+
 	    p_ws = save_p_ws;
 	    p_ic = save_p_ic;
 	    p_scs = save_p_scs;
diff --git a/src/testdir/Makefile b/src/testdir/Makefile
index 43acc45..bcaed91 100644
--- a/src/testdir/Makefile
+++ b/src/testdir/Makefile
@@ -28,7 +28,8 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
 		test74.out test75.out test76.out test77.out test78.out \
 		test79.out test80.out test81.out test82.out test83.out \
 		test84.out test85.out test86.out test87.out test88.out \
-		test89.out test90.out test91.out test92.out test93.out
+		test89.out test90.out test91.out test92.out test93.out \
+		test94.out
 
 SCRIPTS_GUI = test16.out
 
diff --git a/src/testdir/test94.in b/src/testdir/test94.in
new file mode 100644
index 0000000..5cceccb
--- /dev/null
+++ b/src/testdir/test94.in
@@ -0,0 +1,70 @@
+Tests for tags in lines with trailing carriage return characters.
+
+STARTTEST
+:set ffs= ff=unix
+:"write tags to Xtags file
+:/^tags$/+1,/^tags-end$/-1w! Xtags
+:"write file contents to Xcontents.txt file
+:/^file-contents/+1,/^file-contents-end$/-1w! ++ff=unix Xcontents.txt
+
+ggdG
+:call setline('.', 'Results of test 94:')
+:set tags=Xtags
+
+:for tagname in ['foo', 'bar', 'baz', 'quux']
+:    new
+:    exe 'tag ' . tagname
+:    let currline=line('.')
+:    redir => messages
+:    messages
+:    redir END
+:    close
+:    unlet! lmessages
+:    let lmessages=split(messages,'\n')
+:    put =(tagname . ' found at line ' . string(currline))
+:    put ='   total messages number: ' . string(len(lmessages))
+:    put ='   last one starts with: \"' . split(lmessages[-1])[0] . '\"'
+:endfor
+
+:put ='Results of test 94 with case ignored:'
+:set ignorecase
+:for tagname in ['foo', 'bar', 'baz', 'quux']
+:    new
+:    exe 'tag ' . tagname
+:    let currline=line('.')
+:    redir => messages
+:    messages
+:    redir END
+:    close
+:    let lmessages=split(messages,'\n')
+:    put =(tagname . ' found at line ' . string(currline))
+:    put ='   total messages number: ' . string(len(lmessages))
+:    put ='   last one starts with: \"' . split(lmessages[-1])[0] . '\"'
+:endfor
+
+:wq! test.out
+ENDTEST
+
+file-contents
+foo usage
+bar usage
+baz usage
+quux usage
+
+foo definition
+bar definition
+baz definition
+quux definition
+
+foo definition it is not
+bar definition it is not
+baz definition it is not
+quux definition it is not
+file-contents-end
+
+tags
+bar	Xcontents.txt	/bar definition$/;"
+baz	Xcontents.txt	/BAZ definition$
+foo	Xcontents.txt	/foo definition$/;"
+quux	Xcontents.txt	/QUUX definition$
+tags-end
diff --git a/src/testdir/test94.ok b/src/testdir/test94.ok
new file mode 100644
index 0000000..c3ff539
--- /dev/null
+++ b/src/testdir/test94.ok
@@ -0,0 +1,26 @@
+Results of test 94:
+foo found at line 6
+   total messages number: 10
+   last one starts with: ""Xcontents.txt""
+bar found at line 7
+   total messages number: 11
+   last one starts with: ""Xcontents.txt""
+baz found at line 8
+   total messages number: 13
+   last one starts with: "E435:"
+quux found at line 9
+   total messages number: 15
+   last one starts with: "E435:"
+Results of test 94 with case ignored:
+foo found at line 6
+   total messages number: 16
+   last one starts with: ""Xcontents.txt""
+bar found at line 7
+   total messages number: 17
+   last one starts with: ""Xcontents.txt""
+baz found at line 8
+   total messages number: 18
+   last one starts with: ""Xcontents.txt""
+quux found at line 9
+   total messages number: 19
+   last one starts with: ""Xcontents.txt""

[patch] Handle files with mixed LF - CRLF line endings when doing tag search

Reply via email to